对于NER,不能在keras中的BERT顶部添加CRF层



我在为NER训练BERT-CRF模型时遇到未知问题。我使用keras.contrib作为CRF模型。

以下是导入的库。

!pip install transformers
!pip install git+https://www.github.com/keras-team/keras-contrib.git
import pandas as pd
import numpy as np
from transformers import TFBertModel, BertTokenizer, BertConfig
import tensorflow as tf
from tensorflow import keras
from keras_contrib.layers import CRF
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

用于创建模型的代码。

input_ids = keras.layers.Input(shape=(MAX_LEN,), dtype=tf.int32)
token_type_ids = keras.layers.Input(shape=(MAX_LEN,), dtype=tf.int32)
attention_mask = keras.layers.Input(shape=(MAX_LEN,), dtype=tf.int32)
bert_output = bert(
[input_ids,
attention_mask,
token_type_ids]
)[0]    
bert_output = keras.layers.Dropout(0.3)(bert_output)
dense_layer_output = keras.layers.Dense(num_classes+1, activation='softmax', name='output')(bert_output)
crf = CRF(num_classes)
outputs = crf(dense_layer_output)
model = keras.Model(
inputs=[input_ids, token_type_ids, attention_mask],
outputs=[outputs],
)
model.compile(
loss=crf.loss_function,
metrics=[crf.accuracy],
optimizer=keras.optimizers.Adam(5e-5)
)
model.fit(
x_train,
y_train,
epochs=1,
verbose=1,
batch_size=32,
validation_data=(x_test, y_test)
)

在尝试训练模型时,我遇到了这个错误。我无法理解它的来源和原因。

WARNING:tensorflow:The parameters `output_attentions`, `output_hidden_states` and `use_cache` cannot be updated when calling a model.They have to be set to True/False in the config object (i.e.: `config=XConfig.from_pretrained('name', output_attentions=True)`).
WARNING:tensorflow:The parameter `return_dict` cannot be set in graph mode and will always be set to `True`.
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-18-f369b38eb91d> in <module>()
5     verbose=1,
6     batch_size=32,
----> 7     validation_data=(x_test, y_test)
8 )
9 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
975           except Exception as e:  # pylint:disable=broad-except
976             if hasattr(e, "ag_error_metadata"):
--> 977               raise e.ag_error_metadata.to_exception(e)
978             else:
979               raise
AttributeError: in user code:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
return step_function(self, iterator)
/usr/local/lib/python3.7/dist-packages/keras_contrib/losses/crf_losses.py:54 crf_loss  *
crf, idx = y_pred._keras_history[:2]
AttributeError: 'Tensor' object has no attribute '_keras_history'

我在网上读到keras.contrib很复杂,但我不知道如何在BERT上使用CRF层。如果有更好的方法在keras做这件事,那么请建议我。

我不知道这个问题是否有意义,但如果有任何帮助,我们将不胜感激。提前感谢!

最简单的方法是使用TensorFlow插件的CRF层。然后利用它的输出来计算损失。

import tensorflow_addons as tfa
crf = tfa.layers.CRF(len(num_labels)+1)

此外,您还可以通过创建自己的Model类来使用它来创建模型。

from tensorflow_addons.text.crf import crf_log_likelihood
def unpack_data(data):
if len(data) == 2:
return data[0], data[1], None
elif len(data) == 3:
return data
else:
raise TypeError("Expected data to be a tuple of size 2 or 3.")

class ModelWithCRFLoss(tf.keras.Model):
"""Wrapper around the base model for custom training logic."""
def __init__(self, base_model):
super().__init__()
self.base_model = base_model
def call(self, inputs):
return self.base_model(inputs)
def compute_loss(self, x, y, sample_weight, training=False):
y_pred = self(x, training=training)
_, potentials, sequence_length, chain_kernel = y_pred
# we now add the CRF loss:
crf_loss = -crf_log_likelihood(potentials, y, sequence_length, chain_kernel)[0]
if sample_weight is not None:
crf_loss = crf_loss * sample_weight
return tf.reduce_mean(crf_loss), sum(self.losses)
def train_step(self, data):
x, y, sample_weight = unpack_data(data)
with tf.GradientTape() as tape:
crf_loss, internal_losses = self.compute_loss(
x, y, sample_weight, training=True
)
total_loss = crf_loss + internal_losses
gradients = tape.gradient(total_loss, self.trainable_variables)
self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
return {"crf_loss": crf_loss, "internal_losses": internal_losses}
def test_step(self, data):
x, y, sample_weight = unpack_data(data)
crf_loss, internal_losses = self.compute_loss(x, y, sample_weight)
return {"crf_loss_val": crf_loss, "internal_losses_val": internal_losses}

你可以沿着的代码行写

decoded_sequence, potentials, sequence_length, chain_kernel = crf(dense_layer_output, mask=attention_mask)
base_model = tf.keras.Model(
inputs=[input_ids, attention_mask],
outputs=crf_layer_outputs,
)
model = ModelWithCRFLoss(base_model)
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=5e-3, epsilon=1e-08),
metrics=tf.metrics.SparseCategoricalAccuracy(),
)

最新更新