类型错误:调用 Bert 方法来生成嵌入时,Int' 对象不可调用



我有以下代码,我得到了'TypeError:'tuple'object is not callable'(在new_time中(,但我不明白为什么。我是根据这个教程写的https://jalammar.github.io/a-visual-guide-to-using-bert-for-the-first-time/和https://github.com/getalp/Flaubert

我的代码:

#torch == 1.8.1
#numpy == 1.20.2
#pandas == 1.0.3
#transformers == 4.6.1

from transformers import logging
logging.set_verbosity_warning()
import numpy as np
import torch
from transformers import FlaubertModel, FlaubertTokenizer
language_model_dir = 'flaubert/flaubert_small_cased' # version > 2.0.0
flaubert, info = FlaubertModel.from_pretrained(language_model_dir, output_loading_info=True)
flaubert_tokenizer = FlaubertTokenizer.from_pretrained(language_model_dir)
# f_verbatim is a " <class 'pandas.core.series.Series'>", table of sentences
tokenized = f_verbatim.apply((lambda x: flaubert_tokenizer.encode(x, add_special_tokens=True, max_length=512, padding=True, truncation=True))) 
#print(tokenized)
#Padding
max_len = 0
for i in tokenized.values:
if len(i) > max_len:
max_len = len(i)
padded = np.array([i + [0] * (max_len - len(i)) for i in tokenized.values])
# set data to tensor format
input_ids = torch.tensor(padded)
print(type(input_ids)) #<class 'torch.Tensor'>
attention_mask = np.where(padded != 0, 1, 0)
print(type(attention_mask))  #<class 'numpy.ndarray'>
# this line is causing the error
hidden_state = flaubert(input_ids, attention_mask=attention_mask) 

错误:#Stacktrace​

--------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-8d53b819c31a> in <module>
1 print(flaubert)
----> 2 hidden_state = flaubert(input_ids, attention_mask=attention_mask)
~Anaconda3envsbertlibsite-packagestorchnnmodulesmodule.py in _call_impl(self, *input, **kwargs)
887             result = self._slow_forward(*input, **kwargs)
888         else:
--> 889             result = self.forward(*input, **kwargs)
890         for hook in itertools.chain(
891                 _global_forward_hooks.values(),
~Anaconda3envsbertlibsite-packagestransformersmodelsflaubertmodeling_flaubert.py in forward(self, input_ids, attention_mask, langs, token_type_ids, position_ids, lengths, cache, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)
195 
196         # generate masks
--> 197         mask, attn_mask = get_masks(slen, lengths, self.causal, padding_mask=attention_mask)
198         # if self.is_decoder and src_enc is not None:
199         #     src_mask = torch.arange(src_len.max(), dtype=torch.long, device=lengths.device) < src_len[:, None]
~Anaconda3envsbertlibsite-packagestransformersmodelsxlmmodeling_xlm.py in get_masks(slen, lengths, causal, padding_mask)
104 
105     # sanity check
--> 106     assert mask.size() == (bs, slen)
107     assert causal is False or attn_mask.size() == (bs, slen, slen)
108 
TypeError: 'int' object is not callable

据我所知,这个问题是由于缺少一个逗号造成的,但我想不通。打印";福楼拜;函数给定:

(FlaubertModel(
(position_embeddings): Embedding(512, 512)
(embeddings): Embedding(68729, 512, padding_idx=2)
(layer_norm_emb): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(attentions): ModuleList(
(0): MultiHeadAttention(
(q_lin): Linear(in_features=512, out_features=512, bias=True)
(k_lin): Linear(in_features=512, out_features=512, bias=True)
(v_lin): Linear(in_features=512, out_features=512, bias=True)
(out_lin): Linear(in_features=512, out_features=512, bias=True)
)
(1): MultiHeadAttention(
(q_lin): Linear(in_features=512, out_features=512, bias=True)
(k_lin): Linear(in_features=512, out_features=512, bias=True)
(v_lin): Linear(in_features=512, out_features=512, bias=True)
(out_lin): Linear(in_features=512, out_features=512, bias=True)
)
(2): MultiHeadAttention(
(q_lin): Linear(in_features=512, out_features=512, bias=True)
(k_lin): Linear(in_features=512, out_features=512, bias=True)
(v_lin): Linear(in_features=512, out_features=512, bias=True)
(out_lin): Linear(in_features=512, out_features=512, bias=True)
)
(3): MultiHeadAttention(
(q_lin): Linear(in_features=512, out_features=512, bias=True)
(k_lin): Linear(in_features=512, out_features=512, bias=True)
(v_lin): Linear(in_features=512, out_features=512, bias=True)
(out_lin): Linear(in_features=512, out_features=512, bias=True)
)
(4): MultiHeadAttention(
(q_lin): Linear(in_features=512, out_features=512, bias=True)
(k_lin): Linear(in_features=512, out_features=512, bias=True)
(v_lin): Linear(in_features=512, out_features=512, bias=True)
(out_lin): Linear(in_features=512, out_features=512, bias=True)
)
(5): MultiHeadAttention(
(q_lin): Linear(in_features=512, out_features=512, bias=True)
(k_lin): Linear(in_features=512, out_features=512, bias=True)
(v_lin): Linear(in_features=512, out_features=512, bias=True)
(out_lin): Linear(in_features=512, out_features=512, bias=True)
)
)
(layer_norm1): ModuleList(
(0): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(3): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(4): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(5): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
)
(ffns): ModuleList(
(0): TransformerFFN(
(lin1): Linear(in_features=512, out_features=2048, bias=True)
(lin2): Linear(in_features=2048, out_features=512, bias=True)
)
(1): TransformerFFN(
(lin1): Linear(in_features=512, out_features=2048, bias=True)
(lin2): Linear(in_features=2048, out_features=512, bias=True)
)
(2): TransformerFFN(
(lin1): Linear(in_features=512, out_features=2048, bias=True)
(lin2): Linear(in_features=2048, out_features=512, bias=True)
)
(3): TransformerFFN(
(lin1): Linear(in_features=512, out_features=2048, bias=True)
(lin2): Linear(in_features=2048, out_features=512, bias=True)
)
(4): TransformerFFN(
(lin1): Linear(in_features=512, out_features=2048, bias=True)
(lin2): Linear(in_features=2048, out_features=512, bias=True)
)
(5): TransformerFFN(
(lin1): Linear(in_features=512, out_features=2048, bias=True)
(lin2): Linear(in_features=2048, out_features=512, bias=True)
)
)
(layer_norm2): ModuleList(
(0): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(3): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(4): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
(5): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
)
), {'missing_keys': [], 'unexpected_keys': ['pred_layer.proj.bias', 'pred_layer.proj.weight'], 'error_msgs': []})

_逐字逐句看起来像这样:

<class 'pandas.core.series.Series'>
0      Dans le cadre de l’ATEX, il y a certains types de départ moteur qu’on va mesurer la température de pot du moteur et en cas d’anomalie il faut absolument couper le moteur. 
1      moi ce qui me dérange. C’est quand on a des enfants en bas âge. C’est dangereux, c’est trop facile                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
2      par rapport à une… enfin, à ce qui existe actuellement, si on parle du Tesys U… Enfin, sur Canopen, par exemple.                                                                                                                                                                       
3      Je ne verrais pas ça pour une machine, on va dire, une application. Ce serait pour plusieurs machines.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           
4      Spécifique : Pas n’importe qui pourrait le prendre  

attention_mmask看起来像这样:

tensor([[  0, 156,  20,  ...,   0,   0,   0],
[  0, 253,  45,  ...,   0,   0,   0],
[  0,  38, 243,  ...,   0,   0,   0],
...,
[  0, 141, 104,  ...,   0,   0,   0],
[  0,  59, 178,  ...,   0,   0,   0],
[  0, 141, 432,  ...,   0,   0,   0]], dtype=torch.int32)

input_ids如下:

[[0 1 1 ... 0 0 0]
[0 1 1 ... 0 0 0]
[0 1 1 ... 0 0 0]
...
[0 1 1 ... 0 0 0]
[0 1 1 ... 0 0 0]
[0 1 1 ... 0 0 0]]

这是因为from_pretrained函数为您提供了一个模型和字典的元组,而您没有将它们分开。像这样修改代码(添加另一个变量(:

flaubert, info = FlaubertModel.from_pretrained(language_model_dir, output_loading_info=True)

您已将output_loading_info设置为True。所以它还返回一本字典。如果不指定赋值变量,它将向flaubert变量传递一个元组(模型、字典(。由于falubert是一个元组,您不能执行它

更新:attention_mask是一个numpy数组,但您的模型需要一个torch张量。因此,在将其传递给模型之前,先将其转换为torch张量。

attention_mask = torch.from_numpy(attention_mask)
hidden_state = flaubert(input_ids, attention_mask=attention_mask)

最新更新