训练时不计算损失的模型返回ValueError (Huggingface/BERT)



我无法通过Huggingface通过Trainer正确传递我的编码数据(带有隐藏状态)。下面是带参数的对Trainer的调用和完整的回溯。我真的不确定从哪里开始这个错误,因为我相信我已经满足了传递编码数据的所有要求,除非传递的输入应该包括标签。

from sklearn.metrics import accuracy_score, f1_score
def compute_metrics(pred):
labels = pred.label_ids
pred = pred.predictions.argmax(-1)
f1 = f1_score(labels, pred, average="weighted")
acc = accuracy_score(labels, preds)
return {"accuracy": acc, "f1": f1}
from transformers import Trainer, TrainingArguments
batch_size = 10
logging_steps = len(transcripts_encoded["train"]) // batch_size
model_name = f"{model_checkpoint}-finetuned-transcripts"
training_args = TrainingArguments(output_dir=model_name,
num_train_epochs=2,
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
weight_decay=0.01,
evaluation_strategy="epoch",
disable_tqdm=False,
logging_steps=logging_steps,
push_to_hub=False,
log_level="error")
from transformers import Trainer
trainer = Trainer(model=model, args=training_args,
compute_metrics=compute_metrics,
train_dataset=transcripts_encoded["train"],
eval_dataset=transcripts_encoded["valid"],
tokenizer=tokenizer)
trainer.train();

Here is the full traceback:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-124-76d295da3120> in <module>
24                  tokenizer=tokenizer)
25 
---> 26 trainer.train();
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1503             resume_from_checkpoint=resume_from_checkpoint,
1504             trial=trial,
-> 1505             ignore_keys_for_eval=ignore_keys_for_eval,
1506         )
1507 
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1747                         tr_loss_step = self.training_step(model, inputs)
1748                 else:
-> 1749                     tr_loss_step = self.training_step(model, inputs)
1750 
1751                 if (
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in training_step(self, model, inputs)
2506 
2507         with self.compute_loss_context_manager():
-> 2508             loss = self.compute_loss(model, inputs)
2509 
2510         if self.args.n_gpu > 1:
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
2552             if isinstance(outputs, dict) and "loss" not in outputs:
2553                 raise ValueError(
-> 2554                     "The model did not return a loss from the inputs, only the following keys: "
2555                     f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
2556                 )
ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.

我期待它的训练细节(f1,损失,准确性等)。我的假设是,具有隐藏状态的编码数据的结构不适合模型根据参数集进行训练。

已更新的模型代码:这里是我加载和分割的地方

category_data = load_dataset("csv", data_files="testdatafinal.csv")
category_data = category_data.remove_columns(["someid", "someid", "somedimension"])
category_data = category_data['train']
train_testvalid = category_data.train_test_split(test_size=0.3)
test_valid = train_testvalid['test'].train_test_split(test_size=0.5)
from datasets.dataset_dict import DatasetDict
cd = DatasetDict({
'train': train_testvalid['train'],
'test': test_valid['test'],
'valid': test_valid['train']})
print(cd)
DatasetDict({
train: Dataset({
features: ['Transcript', 'Primary Label'],
num_rows: 646
})
test: Dataset({
features: ['Transcript', 'Primary Label'],
num_rows: 139
})
valid: Dataset({
features: ['Transcript', 'Primary Label'],
num_rows: 139
})
})

这里是我抓取模型检查点的地方

model_checkpoint = 'distilbert-base-uncased'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained(model_checkpoint).to(device)

这里是我映射编码文本的地方

transcripts_encoded_one = transcripts_encoded.set_format("torch",
columns=["input_ids", "attention_mask", "Primary Label"])

这里是我提取隐藏状态然后映射的地方

def extract_hidden_states(batch):
#Place model inputs on the GPU/CPU
inputs = {k:v.to(device) for k, v in batch.items()
if k in tokenizer.model_input_names}
#Extract last hidden states
with torch.no_grad():
last_hidden_state = model(**inputs).last_hidden_state
# Return vecot for [CLS] Token
return {"hidden_state": last_hidden_state[:,0].cpu().numpy()}
transcripts_hidden = transcripts_encoded.map(extract_hidden_states, batched=True)

调用AutoModel

from transformers import AutoModelForSequenceClassification
num_labels = 10
model =(AutoModelForSequenceClassification
.from_pretrained(model_checkpoint, num_labels=num_labels)
.to(device))

精度指标

from sklearn.metrics import accuracy_score, f1_score
def compute_metrics(pred):
labels = pred.label_ids
pred = pred.predictions.argmax(-1)
f1 = f1_score(labels, pred, average="weighted")
acc = accuracy_score(labels, preds)
return {"accuracy": acc, "f1": f1}
<<p>教练/strong>
from transformers import Trainer, TrainingArguments
batch_size = 10
logging_steps = len(transcripts_encoded_one["train"]) // batch_size
model_name = f"{model_checkpoint}-finetuned-transcripts"
training_args = TrainingArguments(output_dir=model_name,
num_train_epochs=2,
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
weight_decay=0.01,
evaluation_strategy="epoch",
disable_tqdm=False,
logging_steps=logging_steps,
push_to_hub=False,
log_level="error")
from transformers import Trainer
trainer = Trainer(model=model, args=training_args,
compute_metrics=compute_metrics,
train_dataset=transcripts_encoded_one["train"],
eval_dataset=transcripts_encoded_one["valid"],
tokenizer=tokenizer)
trainer.train();

我已经尝试通过"transcripts_encoded(没有隐藏状态)和"transcripts_hidden(与隐藏状态)作为训练和验证分裂,两者都产生相同的错误

trainer.train_dataset[0]
{'Primary Label': 'cancel',
'input_ids': tensor([  101,  2047,  3446,  2003,  2205,  6450,  2005,  1996,  2051,  1045,
2064,  5247,  3752,  4790,  1012,  2009,  2001,  2026,  5165,  2000,
6509,  2017,  2651,   999,  4067,  2017,  2005,  3967,  2075,  1996,
2047,  2259,  2335,   999,  2031,  1037,  6919,  2717,  1997,  1996,
2154,   999,  2994,  3647,  1998,  7965,   999,  2065,  2045,  2003,
2505,  2842,  2057,  2089,  2022,  2583,  2000,  6509,  2017,  2007,
3531,  2514,  2489,  2000,  3967,  2149,  2153,  1012,  1045,  2001,
2074,  2667,  2000, 17542,  2026, 15002,  1012,  2038,  2009,  2042,
13261,  1029,  7632,  1010,  2045,   999,  1045,  3246,  2017,  1005,
2128,  2725,  2092,  2651,  1012,  4067,  2017,  2005,  3967,  2075,
102]),
'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1])}

如果可能的话,您可以添加您的模型代码吗?根据你的指示和描述,你应该使用BartForSequenceClassification。如果你使用的是BartForSequenceClassification,我认为最大的可能性是你的训练数据集没有标签。

loss = None
if labels is not None:
...
if not return_dict:
output = (logits,) + outputs[1:]
return ((loss,) + output) if loss is not None else output
return Seq2SeqSequenceClassifierOutput(
loss=loss,
logits=logits,
past_key_values=outputs.past_key_values,
decoder_hidden_states=outputs.decoder_hidden_states,
decoder_attentions=outputs.decoder_attentions,
cross_attentions=outputs.cross_attentions,
encoder_last_hidden_state=outputs.encoder_last_hidden_state,
encoder_hidden_states=outputs.encoder_hidden_states,
encoder_attentions=outputs.encoder_attentions,
)

transformers中的modeling_outputs将删除值为None的键,然后将增加您描述的ValueError


谢谢这么详细的代码。我找到了问题所在。您应该将TrainingArguments.label_names设置为["Primary label"]或将Primary Label更改为包含小写字母"label"的任何标签字符串。比如Primary label。详情请参见transformers.utils.generic.find_labels。否则,它将使用默认的标签名称而不是Primary Label。此外,您必须将标签映射到连续整数,而不是cancel!!

相关内容

  • 没有找到相关文章

最新更新