我尝试使用tf.function
来装饰渐变更新功能,如下图所示。
import tensorflow as tf
from tensorflow.keras import layers, activations, losses
import numpy as np
from tensorflow.keras.utils import plot_model
from tensorflow.keras.utils import Progbar
# generate data
nb_doc = 100
doc_features = np.random.random((nb_doc, 10))
doc_scores = np.random.randint(2, size=nb_doc).astype(np.float32)
class simple_model(tf.keras.Model):
def __init__(self):
super().__init__()
self.dense = [layers.Dense(16, activation=tf.nn.leaky_relu), layers.Dense(8, activation=tf.nn.leaky_relu)]
self.score = layers.Dense(1, activation='sigmoid')
def call(self, inputs):
dense_a = self.dense[0](inputs)
for dense in self.dense[1:]:
dense_a = dense(dense_a)
y = self.score(dense_a)
return y
def build_graph(self):
x = tf.keras.Input(shape=(10))
return tf.keras.Model(inputs=x, outputs=self.call(x))
batch_size = 1
train = tf.data.Dataset.from_tensor_slices((doc_features, doc_scores)).shuffle(nb_doc).batch(batch_size)
loss_object = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam()
@tf.function
def apply_gradient(optimizer, model, x, y):
with tf.GradientTape() as tape:
y_pred = model(x)
loss_value = loss_object(y, y_pred)
gradients = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
return y_pred, loss_value
def train_data_for_one_epoch(optimizer, model):
losses = []
pb_i = Progbar(np.ceil(nb_doc // batch_size), stateful_metrics=['loss'])
for step, (x, y) in enumerate(train):
y_pred, loss_value = apply_gradient(optimizer, model, x, y)
losses.append(loss_value)
pb_i.update(step+1, values=[('loss', loss_value)], finalize=False)
pb_i.update(step+1, values=[('loss', np.mean(losses))], finalize=True)
return losses
我第一次调用下面的代码就可以成功地训练一个模型。
但是当我使用相同的代码训练另一个模型时,它失败了,显示错误信息ValueError: tf.function-decorated function tried to create variables on non-first call.
# this succeed
epochs = 5
_model = simple_model()
loss_history = []
for epoch in range(epochs):
print('Epoch %d/%d'%(epoch+1, epochs))
losses_train = train_data_for_one_epoch(optimizer, _model)
loss_history.append(np.mean(losses_train))
# this fail
epochs = 5
_model_2 = simple_model()
loss_history = []
for epoch in range(epochs):
print('Epoch %d/%d'%(epoch+1, epochs))
losses_train = train_data_for_one_epoch(optimizer, _model_2)
loss_history.append(np.mean(losses_train))
这似乎是一个已知的问题,如这里所示
绕道而行
# removed @tf.function decorator
def apply_gradient(optimizer, loss_object, model, x, y):
with tf.GradientTape() as tape:
y_pred = model(x)
loss_value = loss_object(y, y_pred)
gradients = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(gradients, model.trainable_weights))
return y_pred, loss_value
def train_data_for_one_epoch(optimizer, loss_object, model):
losses = []
# added tf.function here
apply_grads =tf.function(apply_gradient)
pb_i = Progbar(np.ceil(nb_doc // batch_size), stateful_metrics=['loss'])
for step, (x, y) in enumerate(train):
y_pred, loss_value = apply_grads(optimizer, loss_object, model, x, y)
losses.append(loss_value)
pb_i.update(step+1, values=[('loss', loss_value)], finalize=False)
pb_i.update(step+1, values=[('loss', np.mean(losses))], finalize=True)
return losses
则两个模型可以训练无误。