我想创建并训练AutoEncoder来提取特征,并将这些特征用于聚类算法。现在我在计算损失时出错了。
RuntimeError:张量a(224(的大小必须与非单例维度3的张量b(244(的大小匹配
和一个警告
用户警告:使用不同于输入大小(torch.size([1,3,224,244]((的目标大小(tork.size([1,3,224,224](。这可能会由于广播而导致不正确的结果。请确保它们的尺寸相同
- 返回F.mse_loss(输入,目标,减少=自身减少(*
我正在使用Pytorch。
有人能告诉我这是怎么回事吗?在警告和错误大小的输入和输出是相同的,但它说它是不同的。输入和输出图像的摘要大小如下
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 16, 112, 112] 448
ReLU-2 [-1, 16, 112, 112] 0
Conv2d-3 [-1, 32, 56, 56] 4,640
ReLU-4 [-1, 32, 56, 56] 0
Conv2d-5 [-1, 64, 18, 18] 100,416
ReLU-6 [-1, 64, 18, 18] 0
Conv2d-7 [-1, 128, 3, 3] 401,536
ReLU-8 [-1, 128, 3, 3] 0
Conv2d-9 [-1, 256, 1, 1] 295,168
ConvTranspose2d-10 [-1, 128, 3, 3] 295,040
ReLU-11 [-1, 128, 3, 3] 0
ConvTranspose2d-12 [-1, 64, 12, 12] 401,472
ReLU-13 [-1, 64, 12, 12] 0
ConvTranspose2d-14 [-1, 24, 28, 28] 75,288
ReLU-15 [-1, 24, 28, 28] 0
ConvTranspose2d-16 [-1, 16, 56, 56] 3,472
ReLU-17 [-1, 16, 56, 56] 0
ConvTranspose2d-18 [-1, 8, 111, 111] 1,160
ReLU-19 [-1, 8, 111, 111] 0
ConvTranspose2d-20 [-1, 3, 224, 224] 603
Sigmoid-21 [-1, 3, 224, 224] 0
================================================================
Total params: 1,579,243
Trainable params: 1,579,243
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 9.94
Params size (MB): 6.02
Estimated Total Size (MB): 16.54
----------------------------------------------------------------
Min Value of input Image = tensor(0.0627)
Max Value of input Image = tensor(0.5098)
Input Image shape = torch.Size([1, 3, 224, 244])
Output Image shape = torch.Size([1, 3, 224, 224])
我的自动编码器类是
class autoencoder(nn.Module):
def __init__(self):
super(autoencoder, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 16, 3, stride=2, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.Conv2d(16, 32, 3, stride=2, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.Conv2d(32, 64, 7, stride=3, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.Conv2d(64, 128, 7, stride=5, padding=1), # b, 16, 10, 10
nn.ReLU(True),
nn.Conv2d(128, 256, 3, stride=5, padding=1) # b, 16, 10, 10
)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(256, 128, 3), # b, 16, 5, 5
nn.ReLU(True),
nn.ConvTranspose2d(128, 64, 7,stride=3, padding=1,output_padding=1), # b, 16, 5, 5
nn.ReLU(True),
nn.ConvTranspose2d(64, 24, 7,stride=2, padding=1,output_padding=1), # b, 16, 5, 5
nn.ReLU(True),
nn.ConvTranspose2d(24, 16, 3, stride=2, padding=1,output_padding=1), # b, 8, 15, 15
nn.ReLU(True),
nn.ConvTranspose2d(16, 8, 3, stride=2, padding=1), # b, 1, 28, 28
nn.ReLU(True),
nn.ConvTranspose2d(8,3, 5, stride=2, padding=1,output_padding=1), # b, 1, 28, 28
nn.Sigmoid()
)
def forward(self, x):
x = self.encoder(x)
x = self.decoder(x)
return x
训练功能如下
dataset = DatasetLoader('E:/DAL/Dataset/Images', get_transform(train=True))
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset, indices[-50:])
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=1, shuffle=True, num_workers=0)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, shuffle=False, num_workers=0)
model = autoencoder().cuda()
summary(model, (3, 224, 224))
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,weight_decay=1e-5)
total_loss = 0
for epoch in range(num_epochs):
for data in data_loader:
# print(data)
img = data
print("Min Value of input Image = ",torch.min(img))
print("Max Value of input Image = ",torch.max(img))
img = Variable(img).cuda()
# ===================forward=====================
output = model(img)
print("Input Image shape = ",img.shape)
print("Output Image shape = ",output.shape)
loss = criterion(output, img)
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
total_loss += loss.data
print('epoch [{}/{}], loss:{:.4f}'
.format(epoch+1, num_epochs, total_loss))
if epoch % 10 == 0:
pic = to_img(output.cpu().data)
save_image(pic, './dc_img/image_{}.png'.format(epoch))
torch.save(model.state_dict(), './conv_autoencoder.pth')
Dataloader类和转换函数如下
def get_transform(train):
transforms = []
transforms.append(T.Resize((224,244)))
if train:
transforms.append(T.RandomHorizontalFlip(0.5))
transforms.append(T.RandomVerticalFlip(0.5))
transforms.append(T.ToTensor())
return T.Compose(transforms)
class DatasetLoader(torch.utils.data.Dataset):
def __init__(self, root, transforms=None):
self.root = root
self.transforms = transforms
self.imgs = list(sorted(os.listdir(root)))
def __getitem__(self, idx):
img_path = os.path.join(self.root, self.imgs[idx])
img = Image.open(img_path).convert("RGB")
if self.transforms is not None:
img = self.transforms(img)
return img
def __len__(self):
return len(self.imgs)
我很确定你的get_transform
函数中有一个拼写错误:
transforms.append(T.Resize((224,244)))
您可能想将其调整为(224, 224)
而不是(224, 244)
。