伯特模型显示无效参数错误条件 x <= y 没有保持元素明智



我正在训练一名伯特。

有人能阐明以下错误信息的含义吗?

Condition x == y did not hold element wise

这是参考colab笔记本

我的代码:

!pip install bert-for-tf2
import math 
import os
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
import bert
from bert import BertModelLayer
from bert.loader import StockBertConfig, map_stock_config_to_params, load_stock_weights
from bert.tokenization.bert_tokenization import FullTokenizer
%%time
bert_ckpt_dir="gs://bert_models/2018_10_18/uncased_L-12_H-768_A-12/"
bert_ckpt_file = bert_ckpt_dir + "bert_model.ckpt"
bert_config_file = bert_ckpt_dir + "bert_config.json"
bert_model_dir="2018_10_18"
bert_model_name="uncased_L-12_H-768_A-12"
!mkdir -p .model .model/$bert_model_name
for fname in ["bert_config.json", "vocab.txt", "bert_model.ckpt.meta", "bert_model.ckpt.index", "bert_model.ckpt.data-00000-of-00001"]:
cmd = f"gsutil cp gs://bert_models/{bert_model_dir}/{bert_model_name}/{fname} .model/{bert_model_name}"
!$cmd

!ls -la .model .model/$bert_model_name

bert_ckpt_dir = os.path.join(".model/",bert_model_name)
bert_ckpt_file = os.path.join(bert_ckpt_dir, "bert_model.ckpt")
bert_config_file = os.path.join(bert_ckpt_dir, "bert_config.json")
class FakeNewsData:
"""
將本文預處理為Bert功能。
max_seq_len:指定的最大序列長度
標記器:Bert標記器
"""
DATA_COLUMN = "text"
LABEL_COLUMN = "label"
def __init__(self, tokenizer, train, validation, test, max_seq_len = 150):
self.tokenizer = tokenizer
self.max_seq_len = max_seq_len
((self.train_x, self.train_y),
(self.val_x, self.val_y),
(self.test_x, self.test_y)) = map(self._prepare, [train, validation, test])
((self.train_x, self.train_x_token_types),
(self.val_x, self.val_x_token_types),
(self.test_x, self.test_x_token_types)) = map(self._pad, 
[self.train_x, self.val_x, self.test_x])
def _prepare(self, df):
"""
為每個序列添加開始和結束標記,並將本文轉換為標記ID。
"""
x, y = [], []
with tqdm(total=df.shape[0], unit_scale=True) as pbar:
for ndx, row in df.iterrows():
text, label = row[FakeNewsData.DATA_COLUMN], row[FakeNewsData.LABEL_COLUMN]
tokens = self.tokenizer.tokenize(text)
tokens = ["[CLS]"] + tokens + ["[SEP]"]
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
self.max_seq_len = max(self.max_seq_len, len(token_ids))
x.append(token_ids)
y.append(int(label))
pbar.update()
return np.array(x), np.array(y)
def _pad(self, ids):
"""
用[0]  將每個序列填充到指定的最大序列長度
"""
x, t = [], []
token_type_ids = [0] * self.max_seq_len
for input_ids in ids:
input_ids = input_ids[:min(len(input_ids), self.max_seq_len - 2)]
input_ids = input_ids + [0] * (self.max_seq_len - len(input_ids))
x.append(np.array(input_ids))
t.append(token_type_ids)
return np.array(x), np.array(t)
%%time
tokenizer = FullTokenizer(vocab_file=os.path.join(bert_ckpt_dir, "vocab.txt"))
data = FakeNewsData(tokenizer, 
train = train_df,
validation = val_df,
test = test_df,
max_seq_len= 150)
def create_model(max_seq_len,lr = 1e-5):
"""
創建一個Bert分類模型。
模型architecutre是原始輸入->Bert輸入->退出層,以防止過度擬合->密度層,輸出預測的概率。
max_seq_len:最大序列長度
lr:優化器的學習率
"""

# create the bert layer
with tf.io.gfile.GFile(bert_config_file, "r") as reader:
bc = StockBertConfig.from_json_string(reader.read())
bert_params = map_stock_config_to_params(bc)
bert = BertModelLayer.from_params(bert_params, name="bert")

input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="input_ids")
output = bert(input_ids)
print("bert shape", output.shape)
cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
# Dropout layer
cls_out = keras.layers.Dropout(0.8)(cls_out)
# Dense layer with probibility output
logits = keras.layers.Dense(units=2, activation="softmax")(cls_out)
model = keras.Model(inputs=input_ids, outputs=logits)
model.build(input_shape=(None, max_seq_len))
# load the pre-trained model weights
load_stock_weights(bert, bert_ckpt_file)
model.compile(optimizer=keras.optimizers.Adam(learning_rate = lr),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])
model.summary()

return model
model = create_model(max_seq_len = data.max_seq_len, lr = 1e-5)

这是输出

InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-25-578d63d49a0e> in <module>()
39   return model
40 
---> 41 model = create_model(max_seq_len = data.max_seq_len, lr = 1e-5)
3 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
256       except Exception as e:  # pylint:disable=broad-except
257         if hasattr(e, 'ag_error_metadata'):
--> 258           raise e.ag_error_metadata.to_exception(e)
259         else:
260           raise
InvalidArgumentError: in user code:
/usr/local/lib/python3.6/dist-packages/bert/model.py:79 call  *
embedding_output = self.embeddings_layer(inputs, mask=mask, training=training)
/usr/local/lib/python3.6/dist-packages/bert/embeddings.py:223 call  *
pos_embeddings = self.position_embeddings_layer(seq_len)
/usr/local/lib/python3.6/dist-packages/bert/embeddings.py:48 call  *
assert_op = tf.compat.v2.debugging.assert_less_equal(seq_len, self.params.max_position_embeddings)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/check_ops.py:938 assert_less_equal_v2
summarize=summarize, message=message, name=name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper
return target(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/check_ops.py:947 assert_less_equal
np.less_equal, x, y, data, summarize, message, name)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/check_ops.py:372 _binary_assert
_assert_static(condition_static, data)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/check_ops.py:87 _assert_static
message='n'.join(data_static))
InvalidArgumentError: Condition x <= y did not hold element-wise:
x (bert/embeddings/Const_2:0) = 
9755
y (bert/embeddings/position_embeddings/assert_less_equal_2/y:0) = 
512

长文档预处理

def get_split(text):
"""
Split each news text to subtexts no longer than 150 words.
"""
l_total = []
l_parcial = []
if len(text.split())//120 >0:
n = len(text.split())//120
else: 
n = 1
for w in range(n):
if w == 0:
l_parcial = text.split()[:150]
l_total.append(" ".join(l_parcial))
else:
l_parcial = text.split()[w*120:w*120 + 150]
l_total.append(" ".join(l_parcial))
return l_total
train['text_split'] = train['text'].apply(get_split)
val['text_split'] = val['text'].apply(get_split)
test['text_split'] = test['text'].apply(get_split)

def data_augumentation(df, df_name):
"""
Create a new dataframe from the original one because now one text may contain multiple subtexts of length 200. 
Text correspond to subtexts from original text, while index correspond to its index of original set.
"""
text_l = []
label_l = []
index_l = []
for idx,row in df.iterrows():
for l in row['text_split']:
text_l.append(l)
label_l.append(row['label'])
index_l.append(idx)
new_df = pd.DataFrame({'text':text_l, 'label':label_l, 'index':index_l})
print("The " + df_name +" set now has " + str(len(new_df)) + ' subtexts extracted from ' + str(len(df)) + ' texts.')
return new_df
train_df = data_augumentation(train, df_name = 'training')
val_df = data_augumentation(val, df_name  = 'validation')
test_df = data_augumentation(test, df_name = 'testing')

我将短序列输入到我的模型中。它有效。长序列是否来自长文档预处理?我找不出问题。请帮帮我。谢谢。

BERT的最大序列长度限制为512。试着给你的模型输入短序列。如果有效,请检查数据:某个地方有一个长序列。

相关内容