在MNIST数据集上运行conv自动编码器



我编写了以下conv自动编码器实现:

class ConvolutionEncoder(nn.Module):
def __init__(self):
super().__init__()
self.conv1    = nn.Conv2d(1,32,3,stride=1,padding=1,dilation=1)
self.maxpool1 = nn.MaxPool2d(2,padding=0,return_indices=True)
self.conv2    = nn.Conv2d(32,32,3,stride=1,padding=1,dilation=1)
self.maxpool2 = nn.MaxPool2d(2,padding=0,return_indices=True)
self.conv3    = nn.Conv2d(32,32,3,stride=1,padding=1,dilation=1)
self.maxpool3 = nn.MaxPool2d(2,padding=0,return_indices=True)
self.relu     = nn.ReLU()
self.middle   = []
self.maxpool1_index = None
self.maxpool2_index = None
self.maxpool3_index = None

def forward(self, image):
temp = self.conv1(image)
temp = self.relu(temp)
self.middle = [temp]
temp, maxpool1_index = self.maxpool1(temp)
self.middle.append(temp)
temp = self.conv2(temp)
temp = self.relu(temp)
self.middle.append(temp)
feature_map, maxpool2_index = self.maxpool2(temp)
temp = self.conv3(temp)
temp = self.relu(temp)
self.middle.append(temp)
feature_map, maxpool3_index = self.maxpool3(temp)
self.maxpool1_index = maxpool1_index
self.maxpool2_index = maxpool2_index
self.maxpool3_index = maxpool3_index
return feature_map
class ConvolutionDecoder(nn.Module):
def __init__(self):
super().__init__()
self.unmaxpool1    = nn.MaxUnpool2d(2,padding=0)
self.conv1         = nn.ConvTranspose2d(32,32,3,stride=1,padding=1,dilation=1)
self.unmaxpool2    = nn.MaxUnpool2d(2,padding=0)
self.conv2         = nn.ConvTranspose2d(32,32,3,stride=1,padding=1,dilation=1)
self.unmaxpool3    = nn.MaxUnpool2d(2,padding=0)
self.conv3         = nn.ConvTranspose2d(32,1,3,stride=1,padding=1,dilation=1)
self.relu          = nn.ReLU()
self.sigmoid       = nn.Sigmoid()
self.middle        = []
def forward(self, feature_map, maxpool_index):
temp = self.unmaxpool1(feature_map, maxpool_index[0])
self.middle = [temp]
temp = self.relu(temp)
temp = self.conv1(temp)
self.middle.append(temp)
temp = self.unmaxpool2(temp, maxpool_index[1])
self.middle.append(temp)
temp = self.relu(temp)
temp = self.conv2(temp)
temp = self.unmaxpool3(temp, maxpool_index[2])
self.middle.append(temp)
temp = self.relu(temp)
temp = self.conv3(temp)
reconstructed_image = self.sigmoid(temp)
return reconstructed_image
class ConvolutionAutoencoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = ConvolutionEncoder()
self.decoder = ConvolutionDecoder()
def forward(self, image):
reconstructed_image = self.decoder(self.encoder(image), [self.encoder.maxpool3_index, self.encoder.maxpool2_index, self.encoder.maxpool1_index])
return reconstructed_image
# Load train dataloder
train_data_transformed = torchvision.datasets.MNIST(root="/MNIST", train=True, download=True,transform=torchvision.transforms.ToTensor())
train_dataloader = DataLoader(train_data_transformed, batch_size=1024)
# Load test dataloder
test_data_transformed = torchvision.datasets.MNIST(root="/MNIST", train=False, download=True,transform=torchvision.transforms.ToTensor())
test_dataloader = DataLoader(test_data_transformed, batch_size=1024)
conv_autoencoder = ConvolutionAutoencoder()
conv_optimizer   = torch.optim.AdamW(conv_autoencoder.parameters())
conv_MSELoss     = nn.MSELoss()
for conv_epoch_idx in tqdm(range(epochs)):
for conv_batch_idx, (imgs, _) in enumerate(train_dataloader):
conv_optimizer.zero_grad()
reconstructed = conv_autoencoder(imgs)

我得到以下错误:

期望指标形状与输入张量([1024,32,14,14])相同,但得到形状为[1024,32,7,7]的指标张量

据我所知,我是从temp = self.unmaxpool2(temp, maxpool_index[1])那里得到的。我想让它有三个Cov2d层和三个max pool层。我试图在MNIST数据集上运行这个自动编码器。我猜我提供的尺寸不匹配。我似乎不知道我怎么才能固定大小,这样它就会工作。能否建议一下如何解决这个问题?

编码器的正向传递有错误。您将第二个maxpool的输出命名为feature_map,而不是temp,这是您在以下层中使用的名称。

这样做可以使您的代码按预期工作:

class ConvolutionEncoder(nn.Module):
def __init__(self):
super().__init__()
self.conv1    = nn.Conv2d(1,32,3,stride=1,padding=1,dilation=1)
self.maxpool1 = nn.MaxPool2d(2,padding=0,return_indices=True)
self.conv2    = nn.Conv2d(32,32,3,stride=1,padding=1,dilation=1)
self.maxpool2 = nn.MaxPool2d(2,padding=0,return_indices=True)
self.conv3    = nn.Conv2d(32,32,3,stride=1,padding=1,dilation=1)
self.maxpool3 = nn.MaxPool2d(2,padding=0,return_indices=True)
self.relu     = nn.ReLU()
self.middle   = []
self.maxpool1_index = None
self.maxpool2_index = None
self.maxpool3_index = None

def forward(self, image):
temp = self.conv1(image)
temp = self.relu(temp)
self.middle = [temp]
temp, maxpool1_index = self.maxpool1(temp)
self.middle.append(temp)
temp = self.conv2(temp)
temp = self.relu(temp)
self.middle.append(temp)
temp, maxpool2_index = self.maxpool2(temp)
temp = self.conv3(temp)
temp = self.relu(temp)
self.middle.append(temp)
feature_map, maxpool3_index = self.maxpool3(temp)
self.maxpool1_index = maxpool1_index
self.maxpool2_index = maxpool2_index
self.maxpool3_index = maxpool3_index
return feature_map

相关内容

  • 没有找到相关文章

最新更新