Keras tuner.search搜索错误给了我一个与形状"形状(无,11,11)和(无,1000,11)不



我正在开发一个LSTM模型,用于11个类别的多标签文本分类
keras.tuner给了我一个错误,而未进行调优的模型运行良好。

如果有人能指出这些错误背后的问题,我将不胜感激。我尝试将loss更改为sparse_categorical_crossentropy,但它会给我带来其他错误。

这是我的设置。tensorflow version:2.5.0, keras version:2.4.3

文本清理

import re
df = data
REPLACE_BY_SPACE_RE = re.compile('[/(){}[]|@,;]')
BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
STOPWORDS = set(stopwords.words('english'))
def clean_text(text):
"""
text: a string

return: modified initial string
"""
text = text.lower() # lowercase text
text = REPLACE_BY_SPACE_RE.sub(' ', text) # replace REPLACE_BY_SPACE_RE symbols by space in text. substitute the matched string in REPLACE_BY_SPACE_RE with space.
text = BAD_SYMBOLS_RE.sub('', text) # remove symbols which are in BAD_SYMBOLS_RE from text. substitute the matched string in BAD_SYMBOLS_RE with nothing. 
text = text.replace('x', '')
#    text = re.sub(r'W+', '', text)
text = ' '.join(word for word in text.split() if word not in STOPWORDS) # remove stopwors from text
return text
df['text'] = df['text'].apply(clean_text)
df['text'] = df['text'].str.replace('d+', '')

嵌入

MAX_NB_WORDS = 70000
MAX_SEQUENCE_LENGTH = 1000
EMBEDDING_DIM = 200
tokenizer = Tokenizer(num_words=MAX_NB_WORDS, filters='!"#$%&()*+,-./:;<=>?@[]^_`{|}~', lower=True)
tokenizer.fit_on_texts(df['text'].values)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
X = tokenizer.texts_to_sequences(df['text'].values)
X = pad_sequences(X, maxlen=MAX_SEQUENCE_LENGTH)
print('Shape of data tensor:', X.shape)

Shape of data tensor: (10695, 1000)

Y = pd.get_dummies(df['condition_predict']).values
print('Shape of label tensor:', Y.shape)

Shape of label tensor: (10695, 11)

训练和测试数据

def get_training_testing_data(dataframe):
X_train_df, X_test_df, Y_train_df, Y_test_df = train_test_split(dataframe['text'],Y, test_size = 0.10, random_state = 42)
print(X_train_df.shape,Y_train_df.shape)
print(X_test_df.shape,Y_test_df.shape)
return X_train_df, X_test_df, Y_train_df, Y_test_df

(9625,) (9625, 11)
(1070,) (1070, 11)

标记化

X_train_df_t = tokenizer.texts_to_sequences(X_train_df.values)
X_train_df_t = pad_sequences(X_train_df_t, maxlen=MAX_SEQUENCE_LENGTH)

构建调整模型

def build_model(hp):
model = Sequential()
model = keras.Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
model.add(SpatialDropout1D(hp.Choice('rate',
values=[0.1, 0.2, 0.3])))
model.add(Dense(11, activation='softmax'))
model.compile(
optimizer=keras.optimizers.Adam(
hp.Choice('learning_rate',
values=[1e-2, 1e-3, 1e-4])),
loss='categorical_crossentropy',
metrics=['accuracy'])
return model
### setting up a random search for tuner
tuner = RandomSearch(
build_model,
objective='val_accuracy',
max_trials=5,
executions_per_trial=3,
directory='hypertuning/',
project_name='MultiClass_classification')
## searching the parameter space
epochs = 200
batch_size = 64
tuner.search(X_train_df_t, Y_train_df, 
epochs=epochs, 
validation_split=0.5)

我得到以下错误

ValueError                                Traceback (most recent call last)
<timed eval> in <module>
~anaconda3envstensorflowlibsite-packageskerastunerenginebase_tuner.py in search(self, *fit_args, **fit_kwargs)
129 
130             self.on_trial_begin(trial)
--> 131             self.run_trial(trial, *fit_args, **fit_kwargs)
132             self.on_trial_end(trial)
133         self.on_search_end()
~anaconda3envstensorflowlibsite-packageskerastunerenginemulti_execution_tuner.py in run_trial(self, trial, *fit_args, **fit_kwargs)
94             copied_fit_kwargs['callbacks'] = callbacks
95 
---> 96             history = self._build_and_fit_model(trial, fit_args, copied_fit_kwargs)
97             for metric, epoch_values in history.history.items():
98                 if self.oracle.objective.direction == 'min':
~anaconda3envstensorflowlibsite-packageskerastunerenginetuner.py in _build_and_fit_model(self, trial, fit_args, fit_kwargs)
139         """
140         model = self.hypermodel.build(trial.hyperparameters)
--> 141         return model.fit(*fit_args, **fit_kwargs)
142 
143     def run_trial(self, trial, *fit_args, **fit_kwargs):
~anaconda3envstensorflowlibsite-packagestensorflowpythonkerasenginetraining.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
1181                 _r=1):
1182               callbacks.on_train_batch_begin(step)
-> 1183               tmp_logs = self.train_function(iterator)
1184               if data_handler.should_sync:
1185                 context.async_wait()
~anaconda3envstensorflowlibsite-packagestensorflowpythoneagerdef_function.py in __call__(self, *args, **kwds)
887 
888       with OptionalXlaContext(self._jit_compile):
--> 889         result = self._call(*args, **kwds)
890 
891       new_tracing_count = self.experimental_get_tracing_count()
~anaconda3envstensorflowlibsite-packagestensorflowpythoneagerdef_function.py in _call(self, *args, **kwds)
931       # This is the first call of __call__, so we have to initialize.
932       initializers = []
--> 933       self._initialize(args, kwds, add_initializers_to=initializers)
934     finally:
935       # At this point we know that the initialization is complete (or less
~anaconda3envstensorflowlibsite-packagestensorflowpythoneagerdef_function.py in _initialize(self, args, kwds, add_initializers_to)
762     self._concrete_stateful_fn = (
763         self._stateful_fn._get_concrete_function_internal_garbage_collected(  # pylint: disable=protected-access
--> 764             *args, **kwds))
765 
766     def invalid_creator_scope(*unused_args, **unused_kwds):
~anaconda3envstensorflowlibsite-packagestensorflowpythoneagerfunction.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
3048       args, kwargs = None, None
3049     with self._lock:
-> 3050       graph_function, _ = self._maybe_define_function(args, kwargs)
3051     return graph_function
3052 
~anaconda3envstensorflowlibsite-packagestensorflowpythoneagerfunction.py in _maybe_define_function(self, args, kwargs)
3442 
3443           self._function_cache.missed.add(call_context_key)
-> 3444           graph_function = self._create_graph_function(args, kwargs)
3445           self._function_cache.primary[cache_key] = graph_function
3446 
~anaconda3envstensorflowlibsite-packagestensorflowpythoneagerfunction.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
3287             arg_names=arg_names,
3288             override_flat_arg_shapes=override_flat_arg_shapes,
-> 3289             capture_by_value=self._capture_by_value),
3290         self._function_attributes,
3291         function_spec=self.function_spec,
~anaconda3envstensorflowlibsite-packagestensorflowpythonframeworkfunc_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
997         _, original_func = tf_decorator.unwrap(python_func)
998 
--> 999       func_outputs = python_func(*func_args, **func_kwargs)
1000 
1001       # invariant: `func_outputs` contains only Tensors, CompositeTensors,
~anaconda3envstensorflowlibsite-packagestensorflowpythoneagerdef_function.py in wrapped_fn(*args, **kwds)
670         # the function a weak reference to itself to avoid a reference cycle.
671         with OptionalXlaContext(compile_with_xla):
--> 672           out = weak_wrapped_fn().__wrapped__(*args, **kwds)
673         return out
674 
~anaconda3envstensorflowlibsite-packagestensorflowpythonframeworkfunc_graph.py in wrapper(*args, **kwargs)
984           except Exception as e:  # pylint:disable=broad-except
985             if hasattr(e, "ag_error_metadata"):
--> 986               raise e.ag_error_metadata.to_exception(e)
987             else:
988               raise
ValueError: in user code:
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkerasenginetraining.py:855 train_function  *
return step_function(self, iterator)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkerasenginetraining.py:845 step_function  **
outputs = model.distribute_strategy.run(run_step, args=(data,))
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythondistributedistribute_lib.py:1285 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythondistributedistribute_lib.py:2833 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythondistributedistribute_lib.py:3608 _call_for_each_replica
return fn(*args, **kwargs)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkerasenginetraining.py:838 run_step  **
outputs = model.train_step(data)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkerasenginetraining.py:797 train_step
y, y_pred, sample_weight, regularization_losses=self.losses)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkerasenginecompile_utils.py:204 __call__
loss_value = loss_obj(y_t, y_p, sample_weight=sw)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkeraslosses.py:155 __call__
losses = call_fn(y_true, y_pred)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkeraslosses.py:259 call  **
return ag_fn(y_true, y_pred, **self._fn_kwargs)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonutildispatch.py:206 wrapper
return target(*args, **kwargs)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkeraslosses.py:1644 categorical_crossentropy
y_true, y_pred, from_logits=from_logits)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonutildispatch.py:206 wrapper
return target(*args, **kwargs)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonkerasbackend.py:4862 categorical_crossentropy
target.shape.assert_is_compatible_with(output.shape)
C:Usersfalcoanaconda3envstensorflowlibsite-packagestensorflowpythonframeworktensor_shape.py:1161 assert_is_compatible_with
raise ValueError("Shapes %s and %s are incompatible" % (self, other))
ValueError: Shapes (None, 11, 11) and (None, 1000, 11) are incompatible

看起来我已经解决了这个问题。不知怎的,我完全错过了/重写了模型生成器中的LSTM层。经过以下更改后,随机搜索工作顺利。

def build_model(hp):
model = Sequential()
model = keras.Sequential()
model.add(Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=X.shape[1]))
#model.add(SpatialDropout1D(0.2))
model.add(SpatialDropout1D(hp.Choice('rate',
values=[0.1, 0.2, 0.3])))
model.add(LSTM(hp.Choice('units',
values=[100, 300, 500]), dropout=0.2, recurrent_dropout=0))
model.add(Dense(11, activation='softmax'))
model.compile(
optimizer=keras.optimizers.Adam(
hp.Choice('learning_rate',
values=[1e-2, 1e-3, 1e-4])),
loss='categorical_crossentropy',
metrics=['accuracy'])
return model

最新更新