我对Keras/Tensorflow的输入形状有问题。
EDIT-我发现,当我只为我的度量指定"准确性"时,它运行良好,我可以训练我的模型,但当我通过增加精度以另一种方式进行时,它会因错误而失败。
My Model summary is like this
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 128, 64) 2251520
_________________________________________________________________
bidirectional (Bidirectional (None, 128, 128) 66048
_________________________________________________________________
time_distributed (TimeDistri (None, 128, 18) 2322
=================================================================
Total params: 2,319,890
Trainable params: 2,319,890
Non-trainable params: 0
我在做NER,把我的句子增加到128个单词。
我的代码如下,数据集来自这里
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
physical_devices = tf.config.list_physical_devices()
tf.config.experimental.set_memory_growth(physical_devices[1], enable=True)
df = pd.read_csv("ner_dataset.csv", encoding="latin1")
# fill NaN - propogate non null values forward
df = df.fillna(method="ffill")
sent_count = len(set(df["Sentence #"].values))
print(sent_count)
words = list(set(df["Word"].values))
words_count = len(words)
print(words_count)
word2idx = {}
# add the padding and unknown token
word2idx["PAD_TOKEN"] = len(word2idx)
word2idx["UNK_TOKEN"] = len(word2idx)
# add the rest
for i in range(0, len(words)):
word2idx[words[i]] = len(word2idx)
# index to word mapping
idx2word = {i: w for w, i in word2idx.items()}
# number of unique tags
tags = list(set(df["Tag"].values))
tags_count = len(tags)
print(tags_count)
tag2idx = {}
tag2idx['PAD_TAG'] = 0 # this is the tag that is assigned to the pad tokens 'PAD_TOKEN'
for i in range(0, len(tags)):
tag2idx[tags[i]] = len(tag2idx)
# index to tag mapping
idx2tag = {i: w for w, i in tag2idx.items()}
def getSentences(dataframe):
sentences = []
groups = dataframe.groupby("Sentence #")
for name, group in groups:
zipped = zip(group["Word"], group["Tag"])
sentences.append(list(zipped))
return sentences
sents = getSentences(df)
len(sents[0]), len(sents[1]) # sentences are of different lengths
max_len = 128
y = [[tag2idx[word[1]] for word in s] for s in sents]
y = pad_sequences(maxlen=max_len, sequences=y, value=tag2idx["PAD_TAG"], padding='post', truncating='post')
x = [[word2idx[word[0]] for word in s] for s in sents]
x = pad_sequences(maxlen=max_len, sequences=x, value=word2idx["PAD_TOKEN"], padding='post', truncating='post')
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
vocab_size = len(word2idx)
vocab_size
batch_size = 32
model = models.Sequential()
embeddinglayer = layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len)
model.add(embeddinglayer)
bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=True))
model.add(bilstm)
num_tags = len(tag2idx)
timedist = layers.TimeDistributed(layers.Dense(num_tags, activation="softmax"))
model.add(timedist)
model.summary()
METRICS = [
'accuracy',
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall')
]
model.compile(optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=METRICS)
history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1)
拆分后的类型均为np阵列
type(x_train), type(x_test), type(y_train), type(y_test)
(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)
形状是
((38367, 128), (9592, 128), (38367, 128), (9592, 128))
每个训练项目(句子(是长度为128的单词索引的数组,例如x_train[0]看起来像下面的数组(y值相似(相同的长度128(,除了它们是每个单词的标签/标签的索引。(
array([25653, 1878, 26510, 12653, 33524, 15752, 30488, 14594, 33943,
3656, 22478, 596, 13235, 10080, 16432, 18190, 20273, 10254,
34463, 15526, 24899, 4359, 30488, 10525, 19165, 30439, 16205,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0], dtype=int32)
编辑:下方的错误
Epoch 1/25
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-35a0fcfc79ab> in <module>
111
112
--> 113 history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1098 _r=1):
1099 callbacks.on_train_batch_begin(step)
-> 1100 tmp_logs = self.train_function(iterator)
1101 if data_handler.should_sync:
1102 context.async_wait()
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerdef_function.py in __call__(self, *args, **kwds)
826 tracing_count = self.experimental_get_tracing_count()
827 with trace.Trace(self._name) as tm:
--> 828 result = self._call(*args, **kwds)
829 compiler = "xla" if self._experimental_compile else "nonXla"
830 new_tracing_count = self.experimental_get_tracing_count()
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerdef_function.py in _call(self, *args, **kwds)
869 # This is the first call of __call__, so we have to initialize.
870 initializers = []
--> 871 self._initialize(args, kwds, add_initializers_to=initializers)
872 finally:
873 # At this point we know that the initialization is complete (or less
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerdef_function.py in _initialize(self, args, kwds, add_initializers_to)
724 self._concrete_stateful_fn = (
725 self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
--> 726 *args, **kwds))
727
728 def invalid_creator_scope(*unused_args, **unused_kwds):
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerfunction.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2967 args, kwargs = None, None
2968 with self._lock:
-> 2969 graph_function, _ = self._maybe_define_function(args, kwargs)
2970 return graph_function
2971
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerfunction.py in _maybe_define_function(self, args, kwargs)
3359
3360 self._function_cache.missed.add(call_context_key)
-> 3361 graph_function = self._create_graph_function(args, kwargs)
3362 self._function_cache.primary[cache_key] = graph_function
3363
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerfunction.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3204 arg_names=arg_names,
3205 override_flat_arg_shapes=override_flat_arg_shapes,
-> 3206 capture_by_value=self._capture_by_value),
3207 self._function_attributes,
3208 function_spec=self.function_spec,
c:miniconda3envsnerlibsite-packagestensorflowpythonframeworkfunc_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
988 _, original_func = tf_decorator.unwrap(python_func)
989
--> 990 func_outputs = python_func(*func_args, **func_kwargs)
991
992 # invariant: `func_outputs` contains only Tensors, CompositeTensors,
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerdef_function.py in wrapped_fn(*args, **kwds)
632 xla_context.Exit()
633 else:
--> 634 out = weak_wrapped_fn().__wrapped__(*args, **kwds)
635 return out
636
c:miniconda3envsnerlibsite-packagestensorflowpythonframeworkfunc_graph.py in wrapper(*args, **kwargs)
975 except Exception as e: # pylint:disable=broad-except
976 if hasattr(e, "ag_error_metadata"):
--> 977 raise e.ag_error_metadata.to_exception(e)
978 else:
979 raise
ValueError: in user code:
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py:805 train_function *
return step_function(self, iterator)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
c:miniconda3envsnerlibsite-packagestensorflowpythondistributedistribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythondistributedistribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythondistributedistribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py:788 run_step **
outputs = model.train_step(data)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py:758 train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginecompile_utils.py:408 update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasutilsmetrics_utils.py:90 decorated
update_op = update_state_fn(*args, **kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasmetrics.py:177 update_state_fn
return ag_update_state(*args, **kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasmetrics.py:1291 update_state **
sample_weight=sample_weight)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasutilsmetrics_utils.py:354 update_confusion_matrix_variables
y_pred.shape.assert_is_compatible_with(y_true.shape)
c:miniconda3envsnerlibsite-packagestensorflowpythonframeworktensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 128, 18) and (None, 128) are incompatible
我认为model.fit()
的x
、y
参数不一致:x
是一个列表,y
是一个numpy数组。尝试:
history = model.fit(np_array(x_train), np.array(y_train))