到目前为止,我已经创建了从PyTorch Lightning模块继承的分类器类。培训和验证从之前创建的training_loader和val_loader中顺利进行。现在,我想做预测,但我不知道如何以正确的方式做。我不知道我是应该使用加载器来完成,还是应该迭代目录中的图像,在这种情况下,我应该应用什么转换来使它们具有正确的格式,以便我的模型能够进行预测?
train_loader = DataLoader(train_ds,
batch_size=32,
collate_fn=collator,
num_workers=4,
shuffle=True) # mutually exclusive with sampler)
val_loader = DataLoader(val_ds,
batch_size=32,
collate_fn=collator,
num_workers=4)
class ImageClassifier(pl.LightningModule):
def __init__(self, num_classes=3, lr=1e-3, weight_decay=2e-4, start_finetuning_backbone_at_epoch=5):
super().__init__()
self.save_hyperparameters()
self.backbone = models.resnet50(pretrained=True) # resnet50
# .fc.out_features if backbone == "resnet50"
# .classifier[-1].out_features if backbone == "vgg16"
self.finetune_layer = torch.nn.Linear(self.backbone.fc.out_features, self.hparams.num_classes)
def forward(self, x):
# use forward for inference/predictions
# Expected 4-dimensional input for 4-dimensional weight [64, 3, 7, 7]
with torch.no_grad():
features = self.backbone(x)
preds = self.finetune_layer(features)
return preds
def training_step(self, batch):
# return the loss given a batch: this has a computational graph attached to it: optimization
x = batch["pixel_values"]
y = batch["labels"]
if self.trainer.current_epoch < self.hparams.start_finetuning_backbone_at_epoch:
with torch.no_grad():
features = self.backbone(x)
else:
features = self.backbone(x)
preds = self.finetune_layer(features)
loss = cross_entropy(preds, y)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True) # lightning detaches your loss graph and uses its value
self.log('train_acc', accuracy(preds, y))
return loss
...
现在,我正在尝试使用以下内容进行预测,尽管我不确定我应该如何格式化";x〃:
import torch
from torchvision import transforms
from PIL import Image
img_dir = "my/image/path"
img = Image.open(img_dir)
convert_tensor = transforms.ToTensor()
x = convert_tensor(img)
model = ImageClassifier()
model(x)
当我运行该代码时,我得到以下错误消息:
运行时错误:4维权重[64,3,7,7]需要4维输入,但得到的却是大小[3669503]的3维输入
如果你能就这方面的最佳实践提供一些指导,我将不胜感激!
我已经找到了答案,所以如果它对任何人都有用,我会把它发布在这里:
from torch.autograd import Variable
import torch
from torchvision import transforms
from PIL import Image
def image_loader(img_path):
"""load image, returns cuda tensor"""
imsize = 256
loader = transforms.Compose([transforms.Resize(imsize), transforms.ToTensor()])
image = Image.open(img_path)
image = loader(image).float()
image = Variable(image, requires_grad=True)
image = image.unsqueeze(0) #this is for VGG, may not be needed for ResNet
return image # .cuda() #assumes that you're using GPU
ckpt_path = "lightning_logs/epoch=2-step=125.ckpt" #epoch=9-step=419.ckpt"
model = ImageClassifier.load_from_checkpoint(ckpt_path)
model.eval()
model.freeze()
# calls .forward method
# model(x)
def predict_from_img_path(img_path):
image = image_loader(img_path)
probs = model(image)
pred = np.argmax(probs).item()
pred_label = id2label[str(pred)]
return pred_label