索引错误:索引 39092 超出大小为 39092 的轴 0 的界限;我正在尝试训练多标签分类



我认为问题是由我从csv文件加载数据的方式引起的,但我不知道如何修复

以下是我的train csv文件的一小部分:列车数据集(我从第1列到最后使用了15个标签(

错误如下:

IndexError

IndexError

代码===>

import csv
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
class FashionData(torch.utils.data.Dataset):
def __init__(self, csv_file, mode='train', transform=None):
self.mode=mode
#label(img1) = [0, 0, 0, 1], label(img3) = [1, 0, 1, 0], ...),
self.transform = transform

self.data_info = pd.read_csv(csv_file, header=None)
#print(self.data_info)


# First column contains the image paths
self.image_arr = np.asarray(self.data_info.iloc[1:, 0])
if mode !='test':
self.label_arr = np.asarray(self.data_info.iloc[1:, 2:]) # columns 1 to N
self.label_arr=self.label_arr.astype('float32')

# Calculate len
self.data_len = len(self.data_info.index)
def __getitem__(self, index):
# Get image name from the pandas df
single_image_name = self.image_arr[index]
# Open image
img_as_img = Image.open(single_image_name)

if self.transform is not None:
img_as_img = self.transform(img_as_img)
if self.mode=='test':
return img_as_img
# Get label(class) of the image based on the cropped pandas column
single_image_label = self.label_arr[index]
#single_image_label = torch.from_numpy(self.label_arr[index]).float()
#img = torch.from_numpy(img).float().to(device)
#label = torch.tensor(int(self.labels[index]))
return (img_as_img, single_image_label)
def __len__(self):
return self.data_len
transforms_train = transforms.Compose([transforms.Resize((224,224)),transforms.RandomHorizontalFlip(),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
transforms_test = transforms.Compose([transforms.Resize((224,224)),transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
dataset_train = FashionData('./deep_fashion/train.csv', mode='train', transform=transforms_train)
dataset_val = FashionData('./deep_fashion/val.csv', mode='val', transform=transforms_test)
dataset_test = FashionData('./deep_fashion/test.csv', mode='test', transform=transforms_test)
from torch.utils.data import DataLoader
train_loader = DataLoader(dataset_train, batch_size=128, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=128, shuffle=False)
test_loader = DataLoader(dataset_test, batch_size=128, shuffle=False)
model=models.resnet50(pretrained=True)
for params in model.parameters():
params.requires_grad=False
model.fc=nn.Sequential(
nn.Linear(2048,15),
nn.Sigmoid()
)
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model=model.to(device)
print(model)
criterion=nn.BCELoss()
#criterion=nn.BCEWithLogitsLoss()
optimizer=optim.Adam(model.parameters(),lr=0.001)
criterion=criterion.to(device)
def train(train_loader,model,criterion,optimizer):
model.train()
loss_list=[]
total_count=0
acc_count=0

for x,y in train_loader:

x=x.to(device)
y=y.to(device)
optimizer.zero_grad()
output=model(x)

loss=criterion(output,y)
loss.backward()
optimizer.step()

#_,predicted=torch.max(output,1)
predicted=(output>0.5).float()
total_count+=y.size(0)
acc_count+=(predicted==y).sum().item()

loss_list.append(loss.item())
acc=acc_count/total_count
loss=sum(loss_list)/len(loss_list)
return acc, loss
def val(valid_loader,model,criterion):
model.eval()
loss_list=[]
total_count=0
acc_count=0
with torch.no_grad():
for x,y in valid_loader:
x=x.to(device)

y=y.to(device)

output=model(x)
loss=criterion(output,y)
#_,predicted=torch.max(output,1)
predicted=(output>0.5).float()

total_count+=y.size(0)
acc_count+=(predicted==y).sum().item()
loss_list.append(loss.item())
acc=acc_count/total_count
loss=sum(loss_list)/len(loss_list)
return acc, loss
train_acc_list = []
train_loss_list = []
val_acc_list = []
val_loss_list = []
for epoch in range(10):
train_acc, train_loss = train(train_loader, model, criterion, optimizer)
val_acc, val_loss=val(val_loader, model, criterion)
train_acc_list.append(train_acc)
train_loss_list.append(train_loss)
val_acc_list.append(val_acc)
val_loss_list.append(val_loss)
print('epoch',epoch)     
print('Train Acc: {:.6f} Train Loss: {:.6f}'.format(train_acc, train_loss))
print('  Val Acc: {:.6f}   Val Loss: {:.6f}'.format(val_acc, val_loss))

我在加载数据部分做错了吗?或者是另一个问题?

111111111111111111111111111111111111111111111111111

您的__len__仅比实际数据大1,因为您使用header=None加载了df。

只需将__init__的最后一行更改为self.data_len = len(self.image_arr)即可。这应该可以用最少的更改来解决您的问题。

(或者,在加载df时设置header=True,在这种情况下,您必须将iloc[1:, ...]更改为iloc[:, ...],因为您不再需要跳过第一行。(

最新更新