TF 2.0 - 方法 estimator.model_to_estimator() 失败,但 model.fit 适用



在使用TF 2.0 Alpha并将工作TF Keras模型转换为TF估计器时,我遇到了一个奇怪的错误。

下面的代码取自没有PHD序列的Tensorflow - RNN时间序列预测,没有任何修改。

当我在 model_fn_keras() 的结果上运行 model.fit() 时,它运行绝对正常,但在将其转换为 TF 估计器时它失败了。

这也在Tensorflow问题板上提出,但他们似乎并不认为这是一个错误 - https://github.com/tensorflow/tensorflow/issues/27750

def compile_keras_sequential_model(list_of_layers, msg):
# a tf.keras.Sequential model is a sequence of layers
model = tf.keras.Sequential(list_of_layers)
# keras does not have a pre-defined metric for Root Mean Square Error. Let's define one.
def rmse(y_true, y_pred): # Root Mean Squared Error
return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))
print('nModel ', msg)
#Optimizer
sgd = tf.keras.optimizers.SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
# to finalize the model, specify the loss, the optimizer and metrics
model.compile(
loss = 'mean_squared_error',
optimizer = sgd,
#         optimizer=tf.keras.optimizers.SGD(lr=0.0001, momentum=0.9),
metrics = [rmse])
# this prints a description of the model
model.summary()
return model
#Create Keras model
def model_fn_keras():
# RNN model (RMSE: 0.164 after 10 epochs)
model_layers_RNN = [
l.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
l.GRU(RNN_CELLSIZE, return_sequences=True),  # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
l.GRU(RNN_CELLSIZE), # keep only last output in sequence: output shape [BATCHSIZE, RNN_CELLSIZE]
l.Dense(1) # output shape [BATCHSIZE, 1]
]
model_RNN = compile_keras_sequential_model(model_layers_RNN, "RNN")
return(model_RNN)
#Convert
estimator = tf.keras.estimator.model_to_estimator(keras_model=model_fn_keras())

错误输出为:

Model  RNN
Model: "sequential_27"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
reshape_27 (Reshape)         (None, 16, 1)             0         
_________________________________________________________________
unified_gru_57 (UnifiedGRU)  (None, 16, 32)            3360      
_________________________________________________________________
unified_gru_58 (UnifiedGRU)  (None, 32)                6336      
_________________________________________________________________
dense_27 (Dense)             (None, 1)                 33        
=================================================================
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-52-05ea50da2f1c> in <module>()
5 #Convert Keras model to Estimator
6 # tf.disable_eager_execution()
----> 7 estimator = tf.keras.estimator.model_to_estimator(keras_model=model_fn_keras())
8 # estimator = model_fn_keras()
9 
c:usershrafiqappdatalocalprogramspythonpython35libsite-packagestensorflowpythonkerasestimator__init__.py in model_to_estimator(keras_model, keras_model_path, custom_objects, model_dir, config)
71       custom_objects=custom_objects,
72       model_dir=model_dir,
---> 73       config=config)
74 
75 # LINT.ThenChange(//tensorflow_estimator/python/estimator/keras.py)
c:usershrafiqappdatalocalprogramspythonpython35libsite-packagestensorflow_estimatorpythonestimatorkeras.py in model_to_estimator(keras_model, keras_model_path, custom_objects, model_dir, config)
488   if keras_model._is_graph_network:
489     warm_start_path = _save_first_checkpoint(keras_model, custom_objects,
--> 490                                              config)
491   elif keras_model.built:
492     logging.warning('You are creating an Estimator from a Keras model manually '
c:usershrafiqappdatalocalprogramspythonpython35libsite-packagestensorflow_estimatorpythonestimatorkeras.py in _save_first_checkpoint(keras_model, custom_objects, config)
365           # pylint: disable=protected-access
366           model._make_train_function()
--> 367           K._initialize_variables(sess)
368           # pylint: enable=protected-access
369         saver = saver_lib.Saver()
c:usershrafiqappdatalocalprogramspythonpython35libsite-packagestensorflowpythonkerasbackend.py in _initialize_variables(session)
760     # marked as initialized.
761     is_initialized = session.run(
--> 762         [variables_module.is_variable_initialized(v) for v in candidate_vars])
763     uninitialized_vars = []
764     for flag, v in zip(is_initialized, candidate_vars):
c:usershrafiqappdatalocalprogramspythonpython35libsite-packagestensorflowpythonclientsession.py in run(self, fetches, feed_dict, options, run_metadata)
928     try:
929       result = self._run(None, fetches, feed_dict, options_ptr,
--> 930                          run_metadata_ptr)
931       if run_metadata:
932         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
c:usershrafiqappdatalocalprogramspythonpython35libsite-packagestensorflowpythonclientsession.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1151     if final_fetches or final_targets or (handle and feed_dict_tensor):
1152       results = self._do_run(handle, final_targets, final_fetches,
-> 1153                              feed_dict_tensor, options, run_metadata)
1154     else:
1155       results = []
c:usershrafiqappdatalocalprogramspythonpython35libsite-packagestensorflowpythonclientsession.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1327     if handle is None:
1328       return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1329                            run_metadata)
1330     else:
1331       return self._do_call(_prun_fn, handle, feeds, fetches)
c:usershrafiqappdatalocalprogramspythonpython35libsite-packagestensorflowpythonclientsession.py in _do_call(self, fn, *args)
1347           pass
1348       message = error_interpolation.interpolate(message, self._graph)
-> 1349       raise type(e)(node_def, op, message)
1350 
1351   def _extend_graph(self):
InvalidArgumentError: Node 'training/SGD/gradients/unified_gru_58/StatefulPartitionedCall_grad/StatefulPartitionedCall': Connecting to invalid output 4 of source node unified_gru_58/StatefulPartitionedCall which has 4 outputs

正如这里所讨论的,这确实是一个错误,是由keras层中的tf.function和model_to_estimator使用的v1 tf.session组合引起的

您也可以参考以下链接,其中错误已得到修复。谢谢!

如果您觉得答案有帮助,请投赞成票。谢谢!

最新更新