ValueError:形状(无,128,18)和(无,28)不兼容



我对Keras/Tensorflow的输入形状有问题。

EDIT-我发现,当我只为我的度量指定"准确性"时,它运行良好,我可以训练我的模型,但当我通过增加精度以另一种方式进行时,它会因错误而失败。

My Model summary is like this
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding (Embedding)        (None, 128, 64)           2251520   
_________________________________________________________________
bidirectional (Bidirectional (None, 128, 128)          66048     
_________________________________________________________________
time_distributed (TimeDistri (None, 128, 18)           2322      
=================================================================
Total params: 2,319,890
Trainable params: 2,319,890
Non-trainable params: 0

我在做NER,把我的句子增加到128个单词。

我的代码如下,数据集来自这里

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold, KFold
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
physical_devices = tf.config.list_physical_devices()
tf.config.experimental.set_memory_growth(physical_devices[1], enable=True)
df = pd.read_csv("ner_dataset.csv", encoding="latin1")
# fill NaN - propogate non null values forward
df = df.fillna(method="ffill")
sent_count = len(set(df["Sentence #"].values))
print(sent_count)
words = list(set(df["Word"].values))
words_count = len(words)
print(words_count)
word2idx = {}
# add the padding and unknown token 
word2idx["PAD_TOKEN"] = len(word2idx)
word2idx["UNK_TOKEN"] = len(word2idx)
# add the rest
for i in range(0, len(words)):
word2idx[words[i]] = len(word2idx)
# index to word mapping
idx2word = {i: w for w, i in word2idx.items()}
# number of unique tags
tags = list(set(df["Tag"].values))
tags_count = len(tags)
print(tags_count)

tag2idx = {}
tag2idx['PAD_TAG'] = 0 # this is the tag that is assigned to the pad tokens 'PAD_TOKEN'
for i in range(0, len(tags)):
tag2idx[tags[i]] = len(tag2idx)

# index to tag mapping
idx2tag = {i: w for w, i in tag2idx.items()}
def getSentences(dataframe):

sentences = []

groups = dataframe.groupby("Sentence #")
for name, group in groups:
zipped = zip(group["Word"], group["Tag"])
sentences.append(list(zipped))

return sentences
sents = getSentences(df)
len(sents[0]), len(sents[1]) # sentences are of different lengths
max_len = 128
y = [[tag2idx[word[1]] for word in s] for s in sents]
y = pad_sequences(maxlen=max_len, sequences=y, value=tag2idx["PAD_TAG"], padding='post', truncating='post')
x = [[word2idx[word[0]] for word in s] for s in sents]
x = pad_sequences(maxlen=max_len, sequences=x, value=word2idx["PAD_TOKEN"], padding='post', truncating='post')

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)
vocab_size = len(word2idx)
vocab_size
batch_size = 32

model = models.Sequential()

embeddinglayer = layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len)
model.add(embeddinglayer)
bilstm = layers.Bidirectional(layers.LSTM(64, return_sequences=True))
model.add(bilstm)
num_tags = len(tag2idx)
timedist = layers.TimeDistributed(layers.Dense(num_tags, activation="softmax"))
model.add(timedist)
model.summary()
METRICS = [
'accuracy',
tf.keras.metrics.Precision(name='precision'),
tf.keras.metrics.Recall(name='recall')
]
model.compile(optimizer="adam",
loss="sparse_categorical_crossentropy",
metrics=METRICS)

history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1)

拆分后的类型均为np阵列

type(x_train), type(x_test), type(y_train), type(y_test)
(numpy.ndarray, numpy.ndarray, numpy.ndarray, numpy.ndarray)

形状是

((38367, 128), (9592, 128), (38367, 128), (9592, 128))

每个训练项目(句子(是长度为128的单词索引的数组,例如x_train[0]看起来像下面的数组(y值相似(相同的长度128(,除了它们是每个单词的标签/标签的索引。(

array([25653,  1878, 26510, 12653, 33524, 15752, 30488, 14594, 33943,
3656, 22478,   596, 13235, 10080, 16432, 18190, 20273, 10254,
34463, 15526, 24899,  4359, 30488, 10525, 19165, 30439, 16205,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0,     0,     0,     0,     0,     0,     0,     0,
0,     0], dtype=int32)

编辑:下方的错误

Epoch 1/25
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-1-35a0fcfc79ab> in <module>
111 
112 
--> 113 history = model.fit(x_train, np.array(y_train), batch_size=batch_size, epochs=25, validation_data=(x_test, y_test), verbose=1)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1098                 _r=1):
1099               callbacks.on_train_batch_begin(step)
-> 1100               tmp_logs = self.train_function(iterator)
1101               if data_handler.should_sync:
1102                 context.async_wait()
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerdef_function.py in __call__(self, *args, **kwds)
826     tracing_count = self.experimental_get_tracing_count()
827     with trace.Trace(self._name) as tm:
--> 828       result = self._call(*args, **kwds)
829       compiler = "xla" if self._experimental_compile else "nonXla"
830       new_tracing_count = self.experimental_get_tracing_count()
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerdef_function.py in _call(self, *args, **kwds)
869       # This is the first call of __call__, so we have to initialize.
870       initializers = []
--> 871       self._initialize(args, kwds, add_initializers_to=initializers)
872     finally:
873       # At this point we know that the initialization is complete (or less
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerdef_function.py in _initialize(self, args, kwds, add_initializers_to)
724     self._concrete_stateful_fn = (
725         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
--> 726             *args, **kwds))
727 
728     def invalid_creator_scope(*unused_args, **unused_kwds):
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerfunction.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
2967       args, kwargs = None, None
2968     with self._lock:
-> 2969       graph_function, _ = self._maybe_define_function(args, kwargs)
2970     return graph_function
2971 
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerfunction.py in _maybe_define_function(self, args, kwargs)
3359 
3360           self._function_cache.missed.add(call_context_key)
-> 3361           graph_function = self._create_graph_function(args, kwargs)
3362           self._function_cache.primary[cache_key] = graph_function
3363 
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerfunction.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3204             arg_names=arg_names,
3205             override_flat_arg_shapes=override_flat_arg_shapes,
-> 3206             capture_by_value=self._capture_by_value),
3207         self._function_attributes,
3208         function_spec=self.function_spec,
c:miniconda3envsnerlibsite-packagestensorflowpythonframeworkfunc_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
988         _, original_func = tf_decorator.unwrap(python_func)
989 
--> 990       func_outputs = python_func(*func_args, **func_kwargs)
991 
992       # invariant: `func_outputs` contains only Tensors, CompositeTensors,
c:miniconda3envsnerlibsite-packagestensorflowpythoneagerdef_function.py in wrapped_fn(*args, **kwds)
632             xla_context.Exit()
633         else:
--> 634           out = weak_wrapped_fn().__wrapped__(*args, **kwds)
635         return out
636 
c:miniconda3envsnerlibsite-packagestensorflowpythonframeworkfunc_graph.py in wrapper(*args, **kwargs)
975           except Exception as e:  # pylint:disable=broad-except
976             if hasattr(e, "ag_error_metadata"):
--> 977               raise e.ag_error_metadata.to_exception(e)
978             else:
979               raise
ValueError: in user code:
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py:805 train_function  *
return step_function(self, iterator)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py:795 step_function  **
outputs = model.distribute_strategy.run(run_step, args=(data,))
c:miniconda3envsnerlibsite-packagestensorflowpythondistributedistribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythondistributedistribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythondistributedistribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py:788 run_step  **
outputs = model.train_step(data)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginetraining.py:758 train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasenginecompile_utils.py:408 update_state
metric_obj.update_state(y_t, y_p, sample_weight=mask)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasutilsmetrics_utils.py:90 decorated
update_op = update_state_fn(*args, **kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasmetrics.py:177 update_state_fn
return ag_update_state(*args, **kwargs)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasmetrics.py:1291 update_state  **
sample_weight=sample_weight)
c:miniconda3envsnerlibsite-packagestensorflowpythonkerasutilsmetrics_utils.py:354 update_confusion_matrix_variables
y_pred.shape.assert_is_compatible_with(y_true.shape)
c:miniconda3envsnerlibsite-packagestensorflowpythonframeworktensor_shape.py:1134 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 128, 18) and (None, 128) are incompatible

我认为model.fit()xy参数不一致:x是一个列表,y是一个numpy数组。尝试:

history = model.fit(np_array(x_train), np.array(y_train))

相关内容

  • 没有找到相关文章

最新更新