执行所需操作的最简单方法是扩展
我正试图在添加高斯噪声的MNIST数据集上使用自动编码器。我在嘈杂的MNIST数据集上使用了DataLoader。在添加噪声之后,为原始MNIST数据集工作的模型不再工作。
gauss_train_loader = torch.utils.data.DataLoader(train_gauss, batch_size=BATCH_SIZE, shuffle=True, drop_last=DROP_LAST)
gauss_test_loader = torch.utils.data.DataLoader(test_gauss, batch_size=BATCH_SIZE, shuffle=True, drop_last=DROP_LAST)
# length = 150
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(28 * 28, 128),
nn.ReLU(inplace=True),
nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.Linear(64, 12),
nn.ReLU(inplace=True),
nn.Linear(12, 10))
self.decoder = nn.Sequential(
nn.Linear(10, 12),
nn.ReLU(inplace=True),
nn.Linear(12, 64),
nn.ReLU(inplace=True),
nn.Linear(64, 128),
nn.ReLU(inplace=True),
nn.Linear(128, 28 * 28),
nn.Tanh())
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
def train(train_loader, model, criterion, optimizer, num_epochs):
epoch_losses = []
for epoch in trange(num_epochs):
for data in gauss_train_loader:
img = data[0].to(device)
#torch.size of img = [400,784] and of data[0] = [400,1,28,28]
# We don't utilize the target data[1]
img = img.view(img.size(0), -1)
output = model(img)
loss = criterion(output, img)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_value = loss.item()
epoch_losses.append(loss_value)
return epoch_losses
# Train the autoencoder using the mnist train set
epoch_losses = train(gauss_train_loader, model, criterion, optimizer, NUM_EPOCHS)
我不知道原始数据集和嘈杂的数据集之间有什么区别。或者如何调整自动编码器。
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
def add_noise(dataset):
noisy_data = []
for data in dataset:
img, _ = data[0], data[1]
noisy_data += torch.tensor(random_noise(img, mode='gaussian', mean=0, var=0.05, clip=True))
return noisy_data
train_gauss = add_noise(mnist_trainset)
test_gauss = add_noise(mnist_testset)
MNIST
数据集类。我在您的实现中添加了几个微小的更改,结果就成功了。
import torch
from torch import nn
from torchvision import datasets
from torchvision import transforms
from torch.optim import Adam
from PIL import Image
from typing import Optional
class Autoencoder(nn.Module):
def __init__(self):
super(Autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Linear(28 * 28, 128),
nn.ReLU(inplace=True),
nn.Linear(128, 64),
nn.ReLU(inplace=True),
nn.Linear(64, 12),
nn.ReLU(inplace=True),
nn.Linear(12, 10))
self.decoder = nn.Sequential(
nn.Linear(10, 12),
nn.ReLU(inplace=True),
nn.Linear(12, 64),
nn.ReLU(inplace=True),
nn.Linear(64, 128),
nn.ReLU(inplace=True),
nn.Linear(128, 28 * 28),
nn.Tanh())
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
def train(train_loader, model, criterion, optimizer, num_epochs):
epoch_losses = []
for epoch in range(num_epochs):
for data in train_loader:
img = data[0].to(device)
img = img.view(img.size(0), -1)
output = model(img)
loss = criterion(output, img)
optimizer.zero_grad()
loss.backward()
optimizer.step()
loss_value = loss.item()
epoch_losses.append(loss_value)
return epoch_losses
class your_noisy_dataset(datasets.MNIST):
def __getitem__(self, index: int):
img, target = self.data[index], int(self.targets[index])
img = Image.fromarray(img.numpy(), mode='L')
if self.transform is not None:
img = self.transform(img)
img += torch.normal(0, 0.05, size=img.size())
if self.target_transform is not None:
target = self.target_transform(target)
return img, target
mnist_trainset = your_noisy_dataset(root='./data', train=True, download=True, transform=transforms.ToTensor())
mnist_testset = your_noisy_dataset(root='./data', train=False, download=True, transform=transforms.ToTensor())
model = Autoencoder()
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.05)
device = torch.device("cpu")
epoch_losses = train(mnist_trainset, model, criterion, optimizer, 1)
__getitem__
方法的主要区别在于,它将返回图像加噪声掩模,而不是仅返回图像。(阅读更多关于Dataset
类的信息(
您可以通过定义自己的RandomNoise
transform
来在线应用转换,而不是生成新的数据集,如下所示:
class RandomNoise(object):
def __init__(self, mode="gaussian", mean, var, clip):
self.mode = mode
...
def __call__(self, image):
image = random_noise(image, self.mode, self.mean, self.var, self.clip)
return torch.tensor(image)
然后定义:
t = transforms.Compose([transforms.ToTensor(), RandomNoise(0, 0.05)])
train_gauss = datasets.MNIST(root, train=True, download=True, transform=t)
val_gauss = ...