使用未标记的自定义图像代替mist和CIFAR用于Pytorch的简单GAN



我试图在简单的GAN中使用png格式的未标记自定义图像替换pytorch(如MNIST和CIFAR)的标准化数据。不幸的是,大多数例子总是使用这样的数据集,并没有显示准备和实现gan中自定义数据的过程。我存储png图像(336 * 336,RGB)在VS代码的工作目录。你能给我一个下一步怎么做的建议吗?下面是我想用自己的图像替换mnist以生成新图像的当前代码(从#准备训练数据到#绘制样本:

)
import torch
from torch import nn
import math
import matplotlib.pyplot as plt
import torchvision
import torchvision.transforms as transforms
torch.manual_seed(111)
# DEVICE
device = ""
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
print(device)
***# PREPARING TRAINING DATA
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)
#  LOADING DATA
train_set = torchvision.datasets.MNIST(
root=".", train=True, download=True, transform=transform
)
# CREATE DATALOADER
batch_size = 32
train_loader = torch.utils.data.DataLoader(
train_set, batch_size=batch_size, shuffle=True
)***
# PLOTTING SAMPLES
real_samples, mnist_labels = next(iter(train_loader))
for i in range(16):
ax = plt.subplot(4, 4, i + 1)
plt.imshow(real_samples[i].reshape(28, 28), cmap="gray_r")
plt.xticks([])
plt.yticks([])
plt.show()´
# IMPLEMENTING DISCRIMINATOR AND GENERATOR

class Discriminator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(784, 1024),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 1),
nn.Sigmoid(),
)
def forward(self, x):
x = x.view(x.size(0), 784)
output = self.model(x)
return output

discriminator = Discriminator().to(device=device)

class Generator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(100, 256),
nn.ReLU(),
nn.Linear(256, 512),
nn.ReLU(),
nn.Linear(512, 1024),
nn.ReLU(),
nn.Linear(1024, 784),
nn.Tanh(),
)
def forward(self, x):
output = self.model(x)
output = output.view(x.size(0), 1, 28, 28)
return output

generator = Generator().to(device=device)
# TRAINING PARAMS
lr = 0.0001
num_epochs = 100
loss_function = nn.BCELoss()
optimizer_discriminator = torch.optim.Adam(discriminator.parameters(), lr=lr)
optimizer_generator = torch.optim.Adam(generator.parameters(), lr=lr)
# TRAINING LOOP
for epoch in range(num_epochs):
for n, (real_samples, mnist_labels) in enumerate(train_loader):
# Data for training the discriminator
real_samples = real_samples.to(device=device)
real_samples_labels = torch.ones((batch_size, 1)).to(
device=device
)
latent_space_samples = torch.randn((batch_size, 100)).to(
device=device
)
generated_samples = generator(latent_space_samples)
generated_samples_labels = torch.zeros((batch_size, 1)).to(
device=device
)
all_samples = torch.cat((real_samples, generated_samples))
all_samples_labels = torch.cat(
(real_samples_labels, generated_samples_labels)
)
# Training the discriminator
discriminator.zero_grad()
output_discriminator = discriminator(all_samples)
loss_discriminator = loss_function(
output_discriminator, all_samples_labels
)
loss_discriminator.backward()
optimizer_discriminator.step()
# Data for training the generator
latent_space_samples = torch.randn((batch_size, 100)).to(
device=device
)
# Training the generator
generator.zero_grad()
generated_samples = generator(latent_space_samples)
output_discriminator_generated = discriminator(generated_samples)
loss_generator = loss_function(
output_discriminator_generated, real_samples_labels
)
loss_generator.backward()
optimizer_generator.step()
# Show loss
if n == batch_size - 1:
print(f"Epoch: {epoch} Loss D.: {loss_discriminator}")
print(f"Epoch: {epoch} Loss G.: {loss_generator}")
# SAMPLES
latent_space_samples = torch.randn(batch_size, 100).to(device=device)
generated_samples = generator(latent_space_samples)
generated_samples = generated_samples.cpu().detach()
for i in range(16):
ax = plt.subplot(4, 4, i + 1)
plt.imshow(generated_samples[i].reshape(28, 28), cmap="gray_r")
plt.xticks([])
plt.yticks([])
plt.show()´´´

在您上面分享的示例中,您正在尝试在单通道图像上训练生成器。具体来说,你的生成器和鉴别器层是用来处理维度1x28x28的图像的,这是MNIST或Fashion-MNIST数据集的维度。

我假设你正在尝试训练彩色图像(3通道)或不同的维度,在你的情况下-3x336x336。在您的示例中,我添加了一个tensor transform,它首先将任何维度的输入图像转换为维度-3x28x28的图像。

下面是创建自定义数据集和自定义数据加载器的代码示例。

from glob import glob
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from skimage import io
path = 'your/image/path'
image_paths = glob(path + '/*.jpg')
img_size = 28
batch_size = 32
transform = transforms.Compose(
[
transforms.ToPILImage(),
transforms.Resize(img_size),
transforms.CenterCrop(img_size),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
]
)
class ImageDataset(Dataset):
def __init__(self, paths, transform):
self.paths = paths
self.transform = transform

def __len__(self):
return len(self.paths)

def __getitem__(self, index):
image_path = self.paths[index]
image = io.imread(image_path)

if self.transform:
image_tensor = self.transform(image)

return image_tensor
dataset = ImageDataset(image_paths, transform)

train_loader = DataLoader(dataset, batch_size=batch_size, num_workers=1, shuffle=True)

数据加载器生成维度为-batch_size x img_channels x img_dim x img_dim的图像张量,在本例中为-32x3x28x28

import torch
import torch.nn as nn
device = 'cuda' if torch.cuda.is_available() else 'cpu'
class Discriminator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(784*3, 2048),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(2048, 1024),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(256, 1),
nn.Sigmoid(),
)
def forward(self, x):
x = x.view(x.size(0), 784*3) # change required for 3 channel image
output = self.model(x)
return output

discriminator = Discriminator().to(device=device)

class Generator(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Linear(100, 256),
nn.ReLU(),
nn.Linear(256, 512),
nn.ReLU(),
nn.Linear(512, 1024),
nn.ReLU(),
nn.Linear(1024, 2048),
nn.ReLU(),
nn.Linear(2048, 784*3),
nn.Tanh(),
)
def forward(self, x):
output = self.model(x)
output = output.view(x.size(0), 3, 28, 28)
return output

generator = Generator().to(device=device)
# TRAINING PARAMS
lr = 0.0001
num_epochs = 100
loss_function = nn.BCELoss()
optimizer_discriminator = torch.optim.Adam(discriminator.parameters(), lr=lr)
optimizer_generator = torch.optim.Adam(generator.parameters(), lr=lr)

这是生成器和鉴别器的代码。我对生成器和鉴别器做了一些修改。注意,在Discriminator

中添加了以下层
nn.Linear(784*3, 2048),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(2048, 1024),

和这些在Generator

nn.Linear(1024, 2048),
nn.ReLU(),
nn.Linear(2048, 784*3)

这是生成和区分正确尺寸的图像所必需的。

最后,这是你的训练循环-

for epoch in range(num_epochs):
for n, real_samples in enumerate(train_loader):
# Data for training the discriminator
real_samples = real_samples.to(device=device)
real_samples_labels = torch.ones((batch_size, 1)).to(
device=device
)
latent_space_samples = torch.randn((batch_size, 100)).to(
device=device
)
print(f'Latent space samples : {latent_space_samples.shape}')
generated_samples = generator(latent_space_samples)
generated_samples_labels = torch.zeros((batch_size, 1)).to(
device=device
)
all_samples = torch.cat((real_samples, generated_samples))
print(f'Real samples : {real_samples.shape}, generated samples : {generated_samples.shape}')
all_samples_labels = torch.cat(
(real_samples_labels, generated_samples_labels)
)
# Training the discriminator
discriminator.zero_grad()
output_discriminator = discriminator(all_samples)
loss_discriminator = loss_function(
output_discriminator, all_samples_labels
)
loss_discriminator.backward()
optimizer_discriminator.step()
# Data for training the generator
latent_space_samples = torch.randn((batch_size, 100)).to(
device=device
)
# Training the generator
generator.zero_grad()
generated_samples = generator(latent_space_samples)
output_discriminator_generated = discriminator(generated_samples)
loss_generator = loss_function(
output_discriminator_generated, real_samples_labels
)
loss_generator.backward()
optimizer_generator.step()
# Show loss
if n == batch_size - 1:
print(f"Epoch: {epoch} Loss D.: {loss_discriminator}")
print(f"Epoch: {epoch} Loss G.: {loss_generator}")

这是有效的,因为图像从784*3维度重塑到3*28*28维度。

这将工作,但如果你正在处理3通道的图像,你需要在你的生成器和鉴别器中分别写ConvTranspose2dConv2d操作来对图像进行上采样和下采样。

如果您对使用ConvTranspose2dConv2d处理多维图像的示例感兴趣,请访问- https://drive.google.com/file/d/1gYiBHPu-r3kialO0klsTdE2RjBR50rMs/view?usp=sharing。要处理不同维度的图像,您必须修改Generator和Discriminator类中的图层。

最新更新