增加GAN神经网络输入大小的最佳实践是什么?



我需要将GAN的输入大小从32 x 32图像扩展到128 x 128。我怎样才能做到这一点呢?我没有完整地编写这段代码,它让我有点困惑。

# -*- coding: utf-8 -*-
"""gans.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/18pkC8YJZRV0K0gqgixHupy55Kwhs1QK3
"""
# Commented out IPython magic to ensure Python compatibility.
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch.nn import Module, Sequential, Conv2d, ConvTranspose2d, LeakyReLU, BatchNorm2d, ReLU, Tanh, Sigmoid, BCELoss 
# %matplotlib inline
# plot images in a nxn grid
k = 0
def plot_images(imgs, grid_size = 5):
"""
imgs: vector containing all the numpy images
grid_size: 2x2 or 5x5 grid containing images
"""

fig = plt.figure(figsize = (8, 8))
columns = rows = grid_size
plt.title("Training Images")
for i in range(1, columns*rows +1):
plt.axis("off")
fig.add_subplot(rows, columns, i)
plt.imshow(imgs[i])
plt.savefig("Figure" + str(i) + ".jpg")
# load the numpy vector containing image representations
imgs = np.load('32x32imgcompressed.npz')
# to check all the files contained in it
imgs.files
# this is where all our images are saved
imgs['arr_0'].shape
# pls ignore the poor quality of the images as we are working with 32x32 sized images.
plot_images(imgs['arr_0'], 3)
# Always good to check if gpu support available or not
dev = 'cpu'
device = torch.device(dev)
# To check the device name
#print ('Current cuda device name ', torch.cuda.get_device_name())
# Preparing custom dataset class - https://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class

class ArtDataset(Dataset):

def __init__(self, npz_imgs):
self.imgs = npz_imgs
def __len__(self):
return len(self.imgs)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
image = self.imgs[idx]
return image
imgs['arr_0'][0].dtype # it will output float 64 i.e. double
# must convert it to float 32 (which is same as model weights)
np.float32(imgs['arr_0']).dtype

# Preparing dataloader for training
transpose_imgs = np.transpose( 
np.float32(imgs['arr_0']), # imp step to convert double -> float (by default numpy input uses double as data type)
(0, 3,1,2) # tuple to describe how to rearrange the dimensions
) 
dset = ArtDataset(transpose_imgs) # passing the npz variable to the constructor class
batch_size = 32
shuffle = True
dataloader = DataLoader(dataset = dset, batch_size = batch_size, shuffle = shuffle)
# Defining the Generator class
class Generator(Module):
def __init__(self):
# calling constructor of parent class
super().__init__()
self.gen = Sequential(
ConvTranspose2d(in_channels = 100, out_channels = 512 , kernel_size = 4, stride = 1, padding = 0, bias = False),
# the output from the above will be b_size ,512, 4,4
BatchNorm2d(num_features = 512), # From an input of size (b_size, C, H, W), pick num_features = C
ReLU(inplace = True),
ConvTranspose2d(in_channels = 512, out_channels = 256 , kernel_size = 4, stride = 2, padding = 1, bias = False),
# the output from the above will be b_size ,256, 8,8
BatchNorm2d(num_features = 256),
ReLU(inplace = True),
ConvTranspose2d(in_channels = 256, out_channels = 128 , kernel_size = 4, stride = 2, padding = 1, bias = False),

BatchNorm2d(num_features = 128),
ReLU(inplace = True),
ConvTranspose2d(in_channels = 128, out_channels = 3 , kernel_size = 4, stride = 2, padding = 1, bias = False),

Tanh()

)
def forward(self, input):
return self.gen(input)
netG = Generator().to(device)
print(netG)

t = torch.randn(2, 100, 1, 1)
netG(t.to(device)).shape
def init_weights(m):
if type(m) == ConvTranspose2d:
nn.init.normal_(m.weight, 0.0, 0.02)
elif type(m) == BatchNorm2d:
nn.init.normal_(m.weight, 1.0, 0.02)
nn.init.constant_(m.bias, 0)
netG.apply(init_weights)
print(netG)
class Discriminator(Module):
def __init__(self):
super().__init__()
self.dis = Sequential(
# input is (3, 32, 32)
Conv2d(in_channels = 3, out_channels = 32, kernel_size = 4, stride = 2, padding = 1, bias=False),
# ouput from above layer is b_size, 32, 16, 16
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels = 32, out_channels = 32*2, kernel_size = 4, stride = 2, padding = 1, bias=False),
# ouput from above layer is b_size, 32*2, 8, 8
BatchNorm2d(32 * 2),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels = 32*2, out_channels = 32*4, kernel_size = 4, stride = 2, padding = 1, bias=False),
# ouput from above layer is b_size, 32*4, 4, 4
BatchNorm2d(32 * 4),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels = 32*4, out_channels = 32*8, kernel_size = 4, stride = 2, padding = 1, bias=False),
# ouput from above layer is b_size, 256, 2, 2
# NOTE: spatial size of this layer is 2x2, hence in the final layer, the kernel size must be 2 instead (or smaller than) 4
BatchNorm2d(32 * 8),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels = 32*8, out_channels = 1, kernel_size = 2, stride = 2, padding = 0, bias=False),
# ouput from above layer is b_size, 1, 1, 1
Sigmoid()
)

def forward(self, input):
return self.dis(input)
# As an example, to show the shape of the output from the generator -  must be an integer
t = torch.randn(2, 3, 32, 32)
netD = Discriminator().to(device)
netD(t.to(device)).shape
# initializing the weights
netD.apply(init_weights)
print(netD)
# Setting up otimizers for both Generator and Discriminator
opt_D = optim.Adam(netD.parameters(), lr = 0.0002, betas= (0.5, 0.999))
opt_G = optim.Adam(netG.parameters(), lr = 0.0002, betas= (0.5, 0.999))
# Setting up the loss function - BCELoss (to check how far the predicted value is from real value)
loss = BCELoss()
# TRAINING GANS
epochs = 1000
# going over the entire dataset 10 times
for e in range(epochs):

# pick each batch b of input images: shape of each batch is (32, 3, 32, 32)
for i, b in enumerate(dataloader):
##########################
## Update Discriminator ##
##########################
# Loss on real images

# clear the gradient
opt_D.zero_grad() # set the gradients to 0 at start of each loop because gradients are accumulated on subsequent backward passes
# compute the D model output
yhat = netD(b.to(device)).view(-1) # view(-1) reshapes a 4-d tensor of shape (2,1,1,1) to 1-d tensor with 2 values only
# specify target labels or true labels
target = torch.ones(len(b), dtype=torch.float, device=device)
# calculate loss
loss_real = loss(yhat, target)
# calculate gradients -  or rather accumulation of gradients on loss tensor
loss_real.backward()
# Loss on fake images
# generate batch of fake images using G
# Step1: creating noise to be fed as input to G
noise = torch.randn(len(b), 100, 1, 1, device = device)
# Step 2: feed noise to G to create a fake img (this will be reused when updating G)
fake_img = netG(noise) 
# compute D model output on fake images
yhat = netD.cpu()(fake_img.detach()).view(-1)
# specify target labels
target = torch.zeros(len(b), dtype=torch.float, device=device)
# calculate loss
loss_fake = loss(yhat, target)
# calculate gradients
loss_fake.backward()
# total error on D
loss_disc = loss_real + loss_fake
# Update weights of D
opt_D.step()
##########################
#### Update Generator ####
##########################
# clear gradient
opt_G.zero_grad()
# pass fake image through D
yhat = netD.cpu()(fake_img).view(-1)
# specify target variables - remember G wants D *to think* these are real images so label is 1
target = torch.ones(len(b), dtype=torch.float, device=device)
# calculate loss
loss_gen = loss(yhat, target)
# calculate gradients
loss_gen.backward()
# update weights on G
opt_G.step()
k +=1
####################################
#### Plot some Generator images ####
####################################
print(" Epoch %d and iteration %d " % (e, i))
# during every epoch, print images at every 10th iteration.
if i% 100 == 0:
# convert the fake images from (b_size, 3, 32, 32) to (b_size, 32, 32, 3) for plotting 
img_plot = np.transpose(fake_img.detach().cpu(), (0,2,3,1)) # .detach().cpu() is imp for copying fake_img tensor to host memory first
plot_images(img_plot)
print("********************")


我试过调整不同的网络通道,但我只以错误结束。我试着在其他地方寻求帮助,但我似乎找不到在这方面有专业知识的人。我被告知我可能需要重新制作所有的网络以适应新的图像大小。

您的生成器使用ConvTranspose2d层,您的鉴别器使用Conv2d层。您必须理解这两个层的stride参数将通过stride增加(对于ConvTranspose2d)或减少(对于Conv2d)图像的大小。注意,图层的输出大小也受到padding参数的影响。如果要将其乘以/除以stride,则需要将padding对Cnv2d设置为0,对ConvTranspose2d设置为1。

例如,一个32 × 32的图像通过一个stride=2的Conv2d得到一个16 × 16的图像。由于鉴别器使用5个Conv2d层,每个步长=2,因此最终得到1x1图像,或者换句话说就是单个向量(其维度是最后一层的通道数)。和一个32 × 32的图像通过ConvTranspose2d设置为64 × 64。

要处理128x128的图像,一个选择是改变一些图层的步幅,但我不建议这样做。

另一个选择是在生成器和鉴别器中添加一个stride=2的新层。不要忘记在之后添加BatchNorm2d和ReLU。我更喜欢这个,因为64x64比32x32更复杂,所以你需要更多的参数,而仅仅修改步长并不会增加参数。

class Generator(Module):
def __init__(self):
# calling constructor of parent class
super().__init__()
self.gen = Sequential(
ConvTranspose2d(in_channels=100, out_channels=512, kernel_size=4, stride=1, padding=0, bias=False),
# the output from the above will be b_size ,512, 4,4
BatchNorm2d(num_features=512),  # From an input of size (b_size, C, H, W), pick num_features = C
ReLU(inplace=True),
############################ new layer here ###########################
ConvTranspose2d(in_channels=512, out_channels=512, kernel_size=4, stride=2, padding=1, bias=False),
BatchNorm2d(num_features=512),
ReLU(inplace=True),
#######################################################################################
############################ new layer here ###########################
ConvTranspose2d(in_channels=512, out_channels=512, kernel_size=4, stride=2, padding=1, bias=False),
BatchNorm2d(num_features=512),
ReLU(inplace=True),
#######################################################################################
ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1, bias=False),
# the output from the above will be b_size ,256, 8,8
BatchNorm2d(num_features=256),
ReLU(inplace=True),
ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
BatchNorm2d(num_features=128),
ReLU(inplace=True),
ConvTranspose2d(in_channels=128, out_channels=3, kernel_size=4, stride=2, padding=1, bias=False),
Tanh()
)
def forward(self, input):
return self.gen(input)

class Discriminator(Module):
def __init__(self):
super().__init__()
self.dis = Sequential(
# input is (3, 32, 32)
Conv2d(in_channels=3, out_channels=32, kernel_size=4, stride=2, padding=1, bias=False),
# ouput from above layer is b_size, 32, 16, 16
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels=32, out_channels=32 * 2, kernel_size=4, stride=2, padding=1, bias=False),
# ouput from above layer is b_size, 32*2, 8, 8
BatchNorm2d(32 * 2),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels=32 * 2, out_channels=32 * 4, kernel_size=4, stride=2, padding=1, bias=False),
# ouput from above layer is b_size, 32*4, 4, 4
BatchNorm2d(32 * 4),
LeakyReLU(0.2, inplace=True),
############################ new layer here ###########################
Conv2d(in_channels=32 * 4, out_channels=32 * 4, kernel_size=4, stride=2, padding=0, bias=False),
BatchNorm2d(32 * 4),
LeakyReLU(0.2, inplace=True),
#######################################################################################
Conv2d(in_channels=32 * 4, out_channels=32 * 8, kernel_size=4, stride=2, padding=1, bias=False),
# ouput from above layer is b_size, 256, 2, 2
# NOTE: spatial size of this layer is 2x2, hence in the final layer, the kernel size must be 2 instead (or smaller than) 4
BatchNorm2d(32 * 8),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels=32 * 8, out_channels=1, kernel_size=2, stride=2, padding=0, bias=False),
# ouput from above layer is b_size, 1, 1, 1
Sigmoid()
)
def forward(self, input):
return self.dis(input)

这应该可以工作。您可能希望更改通道的数量以减少参数的数量。我没有改变其余的代码,但也应该有一些细微的变化(没有疯狂的)。

相关内容

最新更新