如何保存和加载自定义拥抱面部模型,包括配置.使用pytorch创建Json文件



模型描述

我在TokenClassification model之上添加了简单的自定义pytorch-crf layer。这将使模型更加健壮。

我训练模型成功,但当我保存模式。文件夹里面没有config.json文件。如何保存配置。这个自定义模型的Json文件

当我加载自定义训练模型时,last CRF layer不存在?

from torchcrf import CRF
model_checkpoint = "dslim/bert-base-NER"
tokenizer = BertTokenizer.from_pretrained(model_checkpoint,add_prefix_space=True)
bert_model = BertForTokenClassification.from_pretrained(
model_checkpoint,id2label=id2label,label2id=label2id)
bert_model.config.output_hidden_states=True

class BERT_CRF(nn.Module):

def __init__(self, bert_model, num_labels):
super(BERT_CRF, self).__init__()
self.bert = bert_model
self.dropout = nn.Dropout(0.25)

self.classifier = nn.Linear(768, num_labels)
self.crf = CRF(num_labels, batch_first = True)

def forward(self, input_ids, attention_mask,  labels=None, token_type_ids=None):
outputs = self.bert(input_ids, attention_mask=attention_mask)

sequence_output = torch.stack((outputs[1][-1], outputs[1][-2], outputs[1][-3], outputs[1][-4])).mean(dim=0)
sequence_output = self.dropout(sequence_output)

emission = self.classifier(sequence_output) # [32,256,17]
labels=labels.reshape(attention_mask.size()[0],attention_mask.size()[1])

if labels is not None:    
loss = -self.crf(log_soft(emission, 2), labels, mask=attention_mask.type(torch.uint8), reduction='mean')
prediction = self.crf.decode(emission, mask=attention_mask.type(torch.uint8))
return [loss, prediction]

else:         
prediction = self.crf.decode(emission, mask=attention_mask.type(torch.uint8))
return prediction

model = BERT_CRF(bert_model, num_labels=len(label2id))
model.to(device)
args = TrainingArguments(
"spanbert_crf_ner2",
# evaluation_strategy="epoch",
save_strategy="epoch",
learning_rate=2e-5,
num_train_epochs=1,
weight_decay=0.01,
per_device_train_batch_size=8,
# per_device_eval_batch_size=32
fp16=True
# bf16=True #Ampere GPU
)
trainer = Trainer(
model=model,
args=args,
train_dataset=train_data,
# eval_dataset=train_data,
# data_collator=data_collator,
# compute_metrics=compute_metrics,
tokenizer=tokenizer)
trainer.train()
trainer.save_model("model_spanbert_ner")
<<p>保存模型/strong>
Saving model checkpoint to spanbert_crf_ner2/checkpoint-62500
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
tokenizer config file saved in spanbert_crf_ner2/checkpoint-62500/tokenizer_config.json
Special tokens file saved in spanbert_crf_ner2/checkpoint-62500/special_tokens_map.json

Training completed. Do not forget to share your model on huggingface.co/models =)

100%|██████████| 62500/62500 [15:30:27<00:00,  1.12it/s]
Saving model checkpoint to model_spanbert_ner
Trainer.model is not a `PreTrainedModel`, only saving its state dict.
{'train_runtime': 55837.6817, 'train_samples_per_second': 17.909, 'train_steps_per_second': 1.119, 'train_loss': 1.8942613859863282, 'epoch': 2.0}
tokenizer config file saved in model_spanbert_ner/tokenizer_config.json
Special tokens file saved in model_spanbert_ner/special_tokens_map.json

训练模型最后一层

(11): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
)
(dropout): Dropout(p=0.1, inplace=False)
(classifier): Linear(in_features=768, out_features=21, bias=True)
)
(dropout): Dropout(p=0.25, inplace=False)
(classifier): Linear(in_features=768, out_features=21, bias=True)
(crf): CRF(num_tags=21)
)

当我在训练后加载时:

model = AutoModelForTokenClassification.from_pretrained("model_spanbert_ner",ignore_mismatched_sizes=True)
tokenizer = AutoTokenizer.from_pretrained("model_spanbert_ner",model_max_length=512)

(11): BertLayer(
(attention): BertAttention(
(self): BertSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): BertSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): BertIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): BertOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
)
(dropout): Dropout(p=0.1, inplace=False)
(classifier): Linear(in_features=768, out_features=21, bias=True)

CRF层不存在??

您正在从原始模型体系结构加载。尝试用您自己的模型类加载检查点:

model = BERT_CRF(bert_model, num_labels=len(label2id))
model.load_state_dict(torch.load("model_spanbert_ner/pytorch_model.bin"))
model.eval()

相关内容

  • 没有找到相关文章

最新更新