为Tensorflow 2 Keras模型提供特征列和预处理(从tf 1.x估计器迁移而来)

我正在将当前使用估计器构建的Tensorflow 1.x模型迁移到Tensorflow 2.0 Keras。迁移一直相对顺利，直到将服务模型序列化。

型号规定如下

model = tf.keras.Sequential()
model.add(tf.keras.layers.DenseFeatures(feature_columns))
for units in hidden_layers:
model.add(tf.keras.layers.Dense(units, activation='relu'))
model.add(tf.keras.layers.Dense(2, activation=None))

我使用的是Tensorflow特征列api，它期望作为输入的是一个特征列字典，并在这些特征传递到模型中之前对其进行转换。

例如，在训练时

def dataset_transformation_function(feature_dict: Dict[str, tf.Tensor]):
output_dict = feature_dict.copy()
output_dict['logx1'] = tf.math.log(feature_dict['x1'])
return output_dict
train_dataset = (
tf.data.Dataset.from_tensor_slices(
(train_feature_dict, train_label_vector)
)
.shuffle(n_train)
.batch(batch_size)
.map(dataset_transformation_function)
.repeat()
.prefetch(tf.data.experimental.AUTOTUNE)
)

似乎要在发球时间执行相同的转换，我需要：

input_tensors = [tf.Tensorspec(name=...), ...]
@tf.function(input_signature=input_tensors)
def dataset_transformation_function(args) -> Dict[str, tf.Tensor]:
...

和

tf.saved_model.save(
model,
MODEL_DIR,
signatures=feature_transform,
)

然而，我无法确定输入张量或函数的正确签名。

我迁移的方法是：

def serving_input_fn():
receiver_tensors = {
'x1': tf.placeholder(dtype=tf.float32, shape=[None, ], name='x1')
'x2': tf.placeholder(dtype=tf.string, shape=[None, ], name='x2')
}
features = dataset_transformation_function(
receiver_tensors
)
return tf.estimator.export.ServingInputReceiver(features, receiver_tensors)
estimator.export_savedmodel(
MODEL_DIR,
serving_input_fn,
as_text=False,
checkpoint_path=estimator.best_checkpoint,
)

为了回答我自己的问题，解决方案似乎是提供一个函数，当被调用时，该函数同时进行预处理和调用模型。此处示例：

# tensorflow 2.0.0
import tensorflow as tf
import numpy as np
hidden_layers = [4,4]
feature_columns = [fc.numeric_column(name) for name in ['x1', 'x2', 'logx1']]
# construct a simple sequential model
model = tf.keras.Sequential()
model.add(tf.keras.layers.DenseFeatures(feature_columns))
for units in hidden_layers:
model.add(tf.keras.layers.Dense(units, activation='relu'))
model.add(tf.keras.layers.Dense(2, activation=None))
model.compile(
optimizer=tf.keras.optimizers.Adam(1e-3),
loss='mae',
metrics=['mae']
)
x_train = {'x1': np.arange(10), 'x2': np.arange(10), 'logx1': np.log1p(np.arange(10))}
x_predict = {'x1': np.arange(10), 'x2': np.arange(10)}
y = np.random.random(size=10)
model.fit(x=x_train, y=y)
trained_model_predictions = model.predict(x_train)
# preprocessing function for serving
@tf.function()
def serve_predict(x1, x2):
preprocessed_feature = tf.math.log1p(x1)
output = {
'x1': x1,
'x2': x2,
'logx1': preprocessed_feature
}
prediction = model(output)
return prediction
serve_predict = serve_predict.get_concrete_function(x1=tf.TensorSpec([None,]), x2=tf.TensorSpec([None,]))
tf.saved_model.save(
model,
'/tmp/tf',
signatures=serve_predict
)
# check the models give the same output
loaded = tf.saved_model.load('/tmp/tf')
loaded_model_predictions = loaded.serve_predict(x1=tf.range(10, dtype=tf.float32), x2=tf.range(10, dtype=tf.float32))
np.testing.assert_allclose(trained_model_predictions, loaded_model_predictions, atol=1e-6)

相关内容

最新更新

热门标签：