
我需要将GAN的输入大小从32 x 32图像扩展到128 x 128。我怎样才能做到这一点呢?我没有完整地编写这段代码,它让我有点困惑。

# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from torch.nn import Module, Sequential, Conv2d, ConvTranspose2d, LeakyReLU, BatchNorm2d, ReLU, Tanh, Sigmoid, BCELoss 
# %matplotlib inline
# plot images in a nxn grid
k = 0
def plot_images(imgs, grid_size = 5):
imgs: vector containing all the numpy images
grid_size: 2x2 or 5x5 grid containing images

fig = plt.figure(figsize = (8, 8))
columns = rows = grid_size
plt.title("Training Images")
for i in range(1, columns*rows +1):
fig.add_subplot(rows, columns, i)
plt.savefig("Figure" + str(i) + ".jpg")
# load the numpy vector containing image representations
imgs = np.load('32x32imgcompressed.npz')
# to check all the files contained in it
# this is where all our images are saved
# pls ignore the poor quality of the images as we are working with 32x32 sized images.
plot_images(imgs['arr_0'], 3)
# Always good to check if gpu support available or not
dev = 'cpu'
device = torch.device(dev)
# To check the device name
#print ('Current cuda device name ', torch.cuda.get_device_name())
# Preparing custom dataset class - https://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class

class ArtDataset(Dataset):

def __init__(self, npz_imgs):
self.imgs = npz_imgs
def __len__(self):
return len(self.imgs)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
image = self.imgs[idx]
return image
imgs['arr_0'][0].dtype # it will output float 64 i.e. double
# must convert it to float 32 (which is same as model weights)

# Preparing dataloader for training
transpose_imgs = np.transpose( 
np.float32(imgs['arr_0']), # imp step to convert double -> float (by default numpy input uses double as data type)
(0, 3,1,2) # tuple to describe how to rearrange the dimensions
dset = ArtDataset(transpose_imgs) # passing the npz variable to the constructor class
batch_size = 32
shuffle = True
dataloader = DataLoader(dataset = dset, batch_size = batch_size, shuffle = shuffle)
# Defining the Generator class
class Generator(Module):
def __init__(self):
# calling constructor of parent class
self.gen = Sequential(
ConvTranspose2d(in_channels = 100, out_channels = 512 , kernel_size = 4, stride = 1, padding = 0, bias = False),
# the output from the above will be b_size ,512, 4,4
BatchNorm2d(num_features = 512), # From an input of size (b_size, C, H, W), pick num_features = C
ReLU(inplace = True),
ConvTranspose2d(in_channels = 512, out_channels = 256 , kernel_size = 4, stride = 2, padding = 1, bias = False),
# the output from the above will be b_size ,256, 8,8
BatchNorm2d(num_features = 256),
ReLU(inplace = True),
ConvTranspose2d(in_channels = 256, out_channels = 128 , kernel_size = 4, stride = 2, padding = 1, bias = False),

BatchNorm2d(num_features = 128),
ReLU(inplace = True),
ConvTranspose2d(in_channels = 128, out_channels = 3 , kernel_size = 4, stride = 2, padding = 1, bias = False),


def forward(self, input):
return self.gen(input)
netG = Generator().to(device)

t = torch.randn(2, 100, 1, 1)
def init_weights(m):
if type(m) == ConvTranspose2d:
nn.init.normal_(m.weight, 0.0, 0.02)
elif type(m) == BatchNorm2d:
nn.init.normal_(m.weight, 1.0, 0.02)
nn.init.constant_(m.bias, 0)
class Discriminator(Module):
def __init__(self):
self.dis = Sequential(
# input is (3, 32, 32)
Conv2d(in_channels = 3, out_channels = 32, kernel_size = 4, stride = 2, padding = 1, bias=False),
# ouput from above layer is b_size, 32, 16, 16
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels = 32, out_channels = 32*2, kernel_size = 4, stride = 2, padding = 1, bias=False),
# ouput from above layer is b_size, 32*2, 8, 8
BatchNorm2d(32 * 2),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels = 32*2, out_channels = 32*4, kernel_size = 4, stride = 2, padding = 1, bias=False),
# ouput from above layer is b_size, 32*4, 4, 4
BatchNorm2d(32 * 4),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels = 32*4, out_channels = 32*8, kernel_size = 4, stride = 2, padding = 1, bias=False),
# ouput from above layer is b_size, 256, 2, 2
# NOTE: spatial size of this layer is 2x2, hence in the final layer, the kernel size must be 2 instead (or smaller than) 4
BatchNorm2d(32 * 8),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels = 32*8, out_channels = 1, kernel_size = 2, stride = 2, padding = 0, bias=False),
# ouput from above layer is b_size, 1, 1, 1

def forward(self, input):
return self.dis(input)
# As an example, to show the shape of the output from the generator -  must be an integer
t = torch.randn(2, 3, 32, 32)
netD = Discriminator().to(device)
# initializing the weights
# Setting up otimizers for both Generator and Discriminator
opt_D = optim.Adam(netD.parameters(), lr = 0.0002, betas= (0.5, 0.999))
opt_G = optim.Adam(netG.parameters(), lr = 0.0002, betas= (0.5, 0.999))
# Setting up the loss function - BCELoss (to check how far the predicted value is from real value)
loss = BCELoss()
epochs = 1000
# going over the entire dataset 10 times
for e in range(epochs):

# pick each batch b of input images: shape of each batch is (32, 3, 32, 32)
for i, b in enumerate(dataloader):
## Update Discriminator ##
# Loss on real images

# clear the gradient
opt_D.zero_grad() # set the gradients to 0 at start of each loop because gradients are accumulated on subsequent backward passes
# compute the D model output
yhat = netD(b.to(device)).view(-1) # view(-1) reshapes a 4-d tensor of shape (2,1,1,1) to 1-d tensor with 2 values only
# specify target labels or true labels
target = torch.ones(len(b), dtype=torch.float, device=device)
# calculate loss
loss_real = loss(yhat, target)
# calculate gradients -  or rather accumulation of gradients on loss tensor
# Loss on fake images
# generate batch of fake images using G
# Step1: creating noise to be fed as input to G
noise = torch.randn(len(b), 100, 1, 1, device = device)
# Step 2: feed noise to G to create a fake img (this will be reused when updating G)
fake_img = netG(noise) 
# compute D model output on fake images
yhat = netD.cpu()(fake_img.detach()).view(-1)
# specify target labels
target = torch.zeros(len(b), dtype=torch.float, device=device)
# calculate loss
loss_fake = loss(yhat, target)
# calculate gradients
# total error on D
loss_disc = loss_real + loss_fake
# Update weights of D
#### Update Generator ####
# clear gradient
# pass fake image through D
yhat = netD.cpu()(fake_img).view(-1)
# specify target variables - remember G wants D *to think* these are real images so label is 1
target = torch.ones(len(b), dtype=torch.float, device=device)
# calculate loss
loss_gen = loss(yhat, target)
# calculate gradients
# update weights on G
k +=1
#### Plot some Generator images ####
print(" Epoch %d and iteration %d " % (e, i))
# during every epoch, print images at every 10th iteration.
if i% 100 == 0:
# convert the fake images from (b_size, 3, 32, 32) to (b_size, 32, 32, 3) for plotting 
img_plot = np.transpose(fake_img.detach().cpu(), (0,2,3,1)) # .detach().cpu() is imp for copying fake_img tensor to host memory first



例如,一个32 × 32的图像通过一个stride=2的Conv2d得到一个16 × 16的图像。由于鉴别器使用5个Conv2d层,每个步长=2,因此最终得到1x1图像,或者换句话说就是单个向量(其维度是最后一层的通道数)。和一个32 × 32的图像通过ConvTranspose2d设置为64 × 64。



class Generator(Module):
def __init__(self):
# calling constructor of parent class
self.gen = Sequential(
ConvTranspose2d(in_channels=100, out_channels=512, kernel_size=4, stride=1, padding=0, bias=False),
# the output from the above will be b_size ,512, 4,4
BatchNorm2d(num_features=512),  # From an input of size (b_size, C, H, W), pick num_features = C
############################ new layer here ###########################
ConvTranspose2d(in_channels=512, out_channels=512, kernel_size=4, stride=2, padding=1, bias=False),
############################ new layer here ###########################
ConvTranspose2d(in_channels=512, out_channels=512, kernel_size=4, stride=2, padding=1, bias=False),
ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1, bias=False),
# the output from the above will be b_size ,256, 8,8
ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1, bias=False),
ConvTranspose2d(in_channels=128, out_channels=3, kernel_size=4, stride=2, padding=1, bias=False),
def forward(self, input):
return self.gen(input)

class Discriminator(Module):
def __init__(self):
self.dis = Sequential(
# input is (3, 32, 32)
Conv2d(in_channels=3, out_channels=32, kernel_size=4, stride=2, padding=1, bias=False),
# ouput from above layer is b_size, 32, 16, 16
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels=32, out_channels=32 * 2, kernel_size=4, stride=2, padding=1, bias=False),
# ouput from above layer is b_size, 32*2, 8, 8
BatchNorm2d(32 * 2),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels=32 * 2, out_channels=32 * 4, kernel_size=4, stride=2, padding=1, bias=False),
# ouput from above layer is b_size, 32*4, 4, 4
BatchNorm2d(32 * 4),
LeakyReLU(0.2, inplace=True),
############################ new layer here ###########################
Conv2d(in_channels=32 * 4, out_channels=32 * 4, kernel_size=4, stride=2, padding=0, bias=False),
BatchNorm2d(32 * 4),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels=32 * 4, out_channels=32 * 8, kernel_size=4, stride=2, padding=1, bias=False),
# ouput from above layer is b_size, 256, 2, 2
# NOTE: spatial size of this layer is 2x2, hence in the final layer, the kernel size must be 2 instead (or smaller than) 4
BatchNorm2d(32 * 8),
LeakyReLU(0.2, inplace=True),
Conv2d(in_channels=32 * 8, out_channels=1, kernel_size=2, stride=2, padding=0, bias=False),
# ouput from above layer is b_size, 1, 1, 1
def forward(self, input):
return self.dis(input)


