从一周开始,我就遇到了这个错误,我尝试了所有的方法,所以事实是我对正在发生的事情没有深入了解(我是pytorch实现的新手(。无论如何,我试图实现一个Bert分类器来区分2个序列类,并调整AX超参数。这是我的所有代码,预期由我的数据集样本实现(我有3 csv,train-test-val(。非常感谢!
0 1
M A T T D R P T P D G T D A I D L T T R V R R... 1
M K K L F Q T E P L L E L F N C N E L R I I G... 0
M L V A A A V C P H P P L L I P E L A A G A A... 1
M I V A W G N S G S G L L I L I L S L A V S A... 0
M V E E G R R L A A L H P N I V V K L P T T E... 1
M G S K V S K N A L V F N V L Q A L R E G L T... 1
M P S K E T S P A E R M A R D E Y Y M R L A M... 1
M V K E Y A L E W I D G Y R E R L V K V S D A... 1
M G T A A S Q D R A A M A E A A Q R V G D S F... 0
df_train=pd.read_csv('CLASSIFIER_train',sep=',',header=None)
df_train
class SequenceDataset(Dataset):
def __init__(self, sequences, targets, tokenizer, max_len):
self.sequences = sequences
self.targets = targets
self.tokenizer = tokenizer
self.max_len = max_len
def __len__(self):
return len(self.sequences)
def __getitem__(self, item):
sequences = str(self.sequences[item])
target = self.targets[item]
encoding = self.tokenizer.encode_plus(
sequences,
add_special_tokens=True,
max_length=self.max_len,
return_token_type_ids=False,
pad_to_max_length=True,
return_attention_mask=True,
return_tensors='pt',
)
return {
'sequences_text': sequences,
'input_ids': encoding['input_ids'].flatten(),
'attention_mask': encoding['attention_mask'].flatten(),
'targets': torch.tensor(target, dtype=torch.long)
}
def create_data_loader(df, tokenizer, max_len, batch_size):
ds = SequenceDataset(
sequences=df[0].to_numpy(),
targets=df[1].to_numpy(),
tokenizer=tokenizer,
max_len=max_len
)
return DataLoader(
ds,
batch_size=batch_size,
num_workers=2,
shuffle=True
)
BATCH_SIZE = 16
train_data_loader = create_data_loader(df_train, tokenizer, MAX_LEN, BATCH_SIZE)
val_data_loader = create_data_loader(df_val, tokenizer, MAX_LEN, BATCH_SIZE)
test_data_loader = create_data_loader(df_test, tokenizer, MAX_LEN, BATCH_SIZE)
def net_train(net, train_data_loader, parameters, dtype, device):
net.to(dtype=dtype, device=device)
# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), # or any optimizer you prefer
lr=parameters.get("lr", 0.001), # 0.001 is used if no lr is specified
momentum=parameters.get("momentum", 0.9)
)
scheduler = optim.lr_scheduler.StepLR(
optimizer,
step_size=int(parameters.get("step_size", 30)),
gamma=parameters.get("gamma", 1.0), # default is no learning rate decay
)
num_epochs = parameters.get("num_epochs", 3) # Play around with epoch number
# Train Network
for _ in range(num_epochs):
for inputs, labels in train_data_loader:
# move data to proper dtype and device
inputs = inputs.to(dtype=dtype, device=device)
labels = labels.to(device=device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
scheduler.step()
return net
def init_net(parameterization):
model = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
# The depth of unfreezing is also a hyperparameter
for param in model.parameters():
param.requires_grad = False # Freeze feature extractor
Hs = 512 # Hidden layer size; you can optimize this as well
model.fc = nn.Sequential(nn.Linear(2048, Hs), # attach trainable classifier
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(Hs, 10),
nn.LogSoftmax(dim=1))
return model # return untrained model
def train_evaluate(parameterization):
# constructing a new training data loader allows us to tune the batch size
train_data_loader=create_data_loader(df_train, tokenizer, MAX_LEN, batch_size=parameterization.get("batchsize", 32))
# Get neural net
untrained_net = init_net(parameterization)
# train
trained_net = net_train(net=untrained_net, train_data_loader=train_data_loader,
parameters=parameterization, dtype=dtype, device=device)
# return the accuracy of the model as it was trained in this run
return evaluate(
net=trained_net,
data_loader=test_data_loader,
dtype=dtype,
device=device,
)
classes=('0','1')
dtype = torch.float
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
best_parameters, values, experiment, model = optimize(
parameters=[
{"name": "lr", "type": "range", "bounds": [1e-6, 0.4], "log_scale": True},
{"name": "batchsize", "type": "range", "bounds": [16, 128]},
{"name": "momentum", "type": "range", "bounds": [0.0, 1.0]},
#{"name": "max_epoch", "type": "range", "bounds": [1, 30]},
#{"name": "stepsize", "type": "range", "bounds": [20, 40]},
],
evaluation_function=train_evaluate,
objective_name='accuracy',
)
print(best_parameters)
means, covariances = values
print(means)
print(covariances)
File "<ipython-input-71-e52ebc0d7b5b>", line 14, in train_evaluate
parameters=parameterization, dtype=dtype, device=device)
File "<ipython-input-61-66c57e7138fa>", line 20, in net_train
for inputs, labels in train_data_loader:
ValueError: too many values to unpack (expected 2)
您的数据加载器返回一个字典,因此循环和访问它的方式是错误的,应该这样做:
# Train Network
for _ in range(num_epochs):
# Your dataloader returns a dictionary
# so access it as such
for batch in train_data_loader:
# move data to proper dtype and device
labels = batch['targets'].to(device=device)
atten_mask = batch['attention_mask'].to(device=device)
input_ids = batch['input_ids'].to(device=device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(input_ids, attention_mask=atten_mask)