Pytorch U-net分割模型"ValueError: axes don't match array error"的原因可能是什么?



我正试图为kaggle数据集实现一个名为";Carvana Image Masking Chalange";。

我搜索了很多,但仍然不知道是什么原因导致我出现了这个错误。有人建议检查图像尺寸,可以是灰度格式,但我似乎有3个通道用于原始图像和掩模图像。我感谢你们对的支持

我的代码如下:

图书馆

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
from PIL import Image
import numpy as np
import cv2
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset
import albumentations as albu
import torch
import numpy as np
import segmentation_models_pytorch as smp

数据路径

DATA_DIR = 'D:/Users/eugur/Belgeler/Jupyter/Segmentation_Kaggle'
x_train_dir = os.path.join(DATA_DIR, 'train')
y_train_dir = os.path.join(DATA_DIR, 'train_masks')
x_valid_dir = os.path.join(DATA_DIR, 'valid')
y_valid_dir = os.path.join(DATA_DIR, 'valid_masks')
x_test_dir = os.path.join(DATA_DIR, 'test')

用于数据可视化的助手函数

def visualize(**images):
"""PLot images in one row."""
n = len(images)
plt.figure(figsize=(16, 5))
for i, (name, image) in enumerate(images.items()):
plt.subplot(1, n, i + 1)
plt.xticks([])
plt.yticks([])
plt.title(' '.join(name.split('_')).title())
plt.imshow(image)
plt.show()

数据集类

class Dataset(BaseDataset):
"""

Args:
images_dir (str): path to images folder
masks_dir (str): path to segmentation masks folder
class_values (list): values of classes to extract from segmentation mask
augmentation (albumentations.Compose): data transfromation pipeline 
(e.g. flip, scale, etc.)
preprocessing (albumentations.Compose): data preprocessing 
(e.g. noralization, shape manipulation, etc.)

"""

CLASSES = ['car']

def __init__(
self, 
images_dir, 
masks_dir, 
classes=None, 
augmentation=None, 
preprocessing=None,
):
self.ids = os.listdir(images_dir)
self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
self.masks_fps = [os.path.join(masks_dir, image_id.split('.')[0]+'_mask.gif') for image_id in self.ids]

# convert str names to class values on masks
self.class_values = [self.CLASSES.index(cls.lower()) for cls in classes]

self.augmentation = augmentation
self.preprocessing = preprocessing

def __getitem__(self, i):

# read data
image = cv2.imread(self.images_fps[i])
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#         mask = cv2.imread(self.masks_fps[i], 0)
mask = cv2.VideoCapture(self.masks_fps[i],0)
ret,mask = mask.read()
mask = mask/255

# extract certain classes from mask (e.g. cars)
masks = [(mask == v) for v in self.class_values]
mask = np.stack(masks, axis=-1).astype('float')

# apply augmentations
if self.augmentation:
sample = self.augmentation(image=image, mask=mask)
image, mask = sample['image'], sample['mask']

# apply preprocessing
if self.preprocessing:
sample = self.preprocessing(image=image, mask=mask)
image, mask = sample['image'], sample['mask']

return image, np.squeeze(mask,axis=3)


def __len__(self):
return len(self.ids)

预处理和增强

def get_training_augmentation():
train_transform = [
albu.HorizontalFlip(p=0.5),
albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0),
albu.PadIfNeeded(min_height=320, min_width=320, always_apply=True, border_mode=0),
albu.RandomCrop(height=320, width=320, always_apply=True),
albu.IAAAdditiveGaussianNoise(p=0.2),
albu.IAAPerspective(p=0.5),
albu.OneOf(
[
albu.CLAHE(p=1),
albu.RandomBrightness(p=1),
albu.RandomGamma(p=1),
],
p=0.9,
),
albu.OneOf(
[
albu.IAASharpen(p=1),
albu.Blur(blur_limit=3, p=1),
albu.MotionBlur(blur_limit=3, p=1),
],
p=0.9,
),
albu.OneOf(
[
albu.RandomContrast(p=1),
albu.HueSaturationValue(p=1),
],
p=0.9,
),
]
return albu.Compose(train_transform)

def get_validation_augmentation():
"""Add paddings to make image shape divisible by 32"""
test_transform = [
albu.PadIfNeeded(384, 480)
]
return albu.Compose(test_transform)

def to_tensor(x, **kwargs):

return x.transpose(0,2,1).astype('float32')

def get_preprocessing(preprocessing_fn):
"""Construct preprocessing transform

Args:
preprocessing_fn (callbale): data normalization function 
(can be specific for each pretrained neural network)
Return:
transform: albumentations.Compose

"""

_transform = [
albu.Lambda(image=preprocessing_fn),
albu.Lambda(image=to_tensor, mask=to_tensor),
]
return albu.Compose(_transform)

模型定义

ENCODER = 'se_resnext50_32x4d'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['car']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multicalss segmentation
DEVICE = 'cuda'
# create segmentation model with pretrained encoder
model = smp.FPN(
encoder_name=ENCODER, 
encoder_weights=ENCODER_WEIGHTS, 
classes=len(CLASSES), 
in_channels=3,
activation=ACTIVATION,
)
preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

数据加载器

train_dataset = Dataset(
x_train_dir, 
y_train_dir, 
preprocessing=get_preprocessing(preprocessing_fn),
classes=CLASSES,
)
valid_dataset = Dataset(
x_valid_dir, 
y_valid_dir, 
preprocessing=get_preprocessing(preprocessing_fn),
classes=CLASSES,
)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=0)

Optimer定义

loss = smp.utils.losses.DiceLoss()
metrics = [
smp.utils.metrics.IoU(threshold=0.5),
]
optimizer = torch.optim.Adam([ 
dict(params=model.parameters(), lr=0.0001),
])

培训

train_epoch = smp.utils.train.TrainEpoch(
model, 
loss=loss, 
metrics=metrics, 
optimizer=optimizer,
device=DEVICE,
verbose=True,
)
valid_epoch = smp.utils.train.ValidEpoch(
model, 
loss=loss, 
metrics=metrics, 
device=DEVICE,
verbose=True,
)
max_score = 0
for i in range(0, 20):

print('nEpoch: {}'.format(i))
train_logs = train_epoch.run(train_loader)
valid_logs = valid_epoch.run(valid_loader)

# do something (save model, change lr, etc.)
if max_score < valid_logs['iou_score']:
max_score = valid_logs['iou_score']
torch.save(model, './best_model.pth')
print('Model saved!')

if i == 25:
optimizer.param_groups[0]['lr'] = 1e-5
print('Decrease decoder learning rate to 1e-5!')

错误

> Epoch: 0 train:   0%|          | 0/510 [00:00<?, ?it/s]
> 
> --------------------------------------------------------------------------- ValueError                                Traceback (most recent call
> last) <ipython-input-208-d2306c5ca0ea> in <module>
>       6 
>       7     print('nEpoch: {}'.format(i))
> ----> 8     train_logs = train_epoch.run(train_loader)
>       9     valid_logs = valid_epoch.run(valid_loader)
>      10 
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagessegmentation_models_pytorchutilstrain.py
> in run(self, dataloader)
>      43 
>      44         with tqdm(dataloader, desc=self.stage_name, file=sys.stdout, disable=not (self.verbose)) as iterator:
> ---> 45             for x, y in iterator:
>      46                 x, y = x.to(self.device), y.to(self.device)
>      47                 loss, y_pred = self.batch_update(x, y)
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagestqdmstd.py
> in __iter__(self)    1169     1170         try:
> -> 1171             for obj in iterable:    1172                 yield obj    1173                 # Update and possibly print the
> progressbar.
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagestorchutilsdatadataloader.py
> in __next__(self)
>     433         if self._sampler_iter is None:
>     434             self._reset()
> --> 435         data = self._next_data()
>     436         self._num_yielded += 1
>     437         if self._dataset_kind == _DatasetKind.Iterable and 
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagestorchutilsdatadataloader.py
> in _next_data(self)
>     473     def _next_data(self):
>     474         index = self._next_index()  # may raise StopIteration
> --> 475         data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
>     476         if self._pin_memory:
>     477             data = _utils.pin_memory.pin_memory(data)
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagestorchutilsdata_utilsfetch.py
> in fetch(self, possibly_batched_index)
>      42     def fetch(self, possibly_batched_index):
>      43         if self.auto_collation:
> ---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
>      45         else:
>      46             data = self.dataset[possibly_batched_index]
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagestorchutilsdata_utilsfetch.py
> in <listcomp>(.0)
>      42     def fetch(self, possibly_batched_index):
>      43         if self.auto_collation:
> ---> 44             data = [self.dataset[idx] for idx in possibly_batched_index]
>      45         else:
>      46             data = self.dataset[possibly_batched_index]
> 
> <ipython-input-146-65256f8f536d> in __getitem__(self, i)
>      54         # apply preprocessing
>      55         if self.preprocessing:
> ---> 56             sample = self.preprocessing(image=image, mask=mask)
>      57             image, mask = sample['image'], sample['mask']
>      58 
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagesalbumentationscorecomposition.py
> in __call__(self, force_apply, *args, **data)
>     180                     p.preprocess(data)
>     181 
> --> 182             data = t(force_apply=force_apply, **data)
>     183 
>     184             if dual_start_end is not None and idx == dual_start_end[1]:
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagesalbumentationscoretransforms_interface.py
> in __call__(self, force_apply, *args, **kwargs)
>      87                     )
>      88                 kwargs[self.save_key][id(self)] = deepcopy(params)
> ---> 89             return self.apply_with_params(params, **kwargs)
>      90 
>      91         return kwargs
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagesalbumentationscoretransforms_interface.py
> in apply_with_params(self, params, force_apply, **kwargs)
>     100                 target_function = self._get_target_function(key)
>     101                 target_dependencies = {k: kwargs[k] for k in self.target_dependence.get(key, [])}
> --> 102                 res[key] = target_function(arg, **dict(params, **target_dependencies))
>     103             else:
>     104                 res[key] = None
> 
> C:ProgramDataAnaconda3envssegmentationlibsite-packagesalbumentationsaugmentationstransforms.py
> in apply_to_mask(self, mask, **params)    3068     def
> apply_to_mask(self, mask, **params):    3069         fn =
> self.custom_apply_fns["mask"]
> -> 3070         return fn(mask, **params)    3071     3072     def apply_to_bbox(self, bbox, **params):
> 
> <ipython-input-186-4f194a842931> in to_tensor(x, **kwargs)
>      52 
>      53 
> ---> 54     return x.transpose(0,2,1).astype('float32')
>      55 
>      56 
> 
> ValueError: axes don't match array

上面的代码有2个问题;

  1. 掩码图像大小错误,应为(x,y,1(,但它是(x,y,3(

  2. 模型需要大小相等的行和列。

经过上述更改后,代码运行良好。

相关内容

最新更新