对于实践,我正在开发Keras中的反馈循环自编码器。我使用的代码是
import tensorflow as tf
import keras
import os
class Linear(keras.layers.Layer):
def __init__(self, units=32):
super(Linear, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b
class FRAE(tf.keras.Model):
def __init__(self):
super(FRAE, self).__init__()
self.linear_1 = Linear(4)
self.linear_2 = Linear(3)
self.latent = Linear(1)
self.linear_3 = Linear(3)
self.linear_4 = Linear(2)
self.decoded = tf.zeros(shape=(1, 2))
def call(self, inputs):
#x = self.flatten(inputs)
batch_size = inputs.shape[0]
input_dim = inputs.shape[1]
# output_list = [None] * batch_size #tf.zeros(shape = (batch_size, input_dim))
output_list = tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)
for i in range(batch_size):
x = tf.concat((tf.expand_dims(inputs[i], axis=0),self.decoded),axis=1)
x = self.linear_1(x)
x = tf.nn.swish(x)
x = self.linear_2(x)
x = tf.nn.swish(x)
x = self.latent(x)
x = tf.nn.swish(x)
x = tf.concat((x,self.decoded),axis=1)
x = self.linear_3(x)
x = tf.nn.swish(x)
x = self.linear_4(x)
x = tf.nn.swish(x)
self.decoded = tf.identity(x)
output_list.write(i, x)
y = output_list.stack()
return y
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse")
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)
当我运行这段代码时,我得到错误
ValueError:没有为任何变量提供梯度:(['frae_13/linear_65/variable: 0', 'frae_13/linear_65/variable: 0', 'frae_13/linear_66/variable: 0', 'frae_13/linear_66/variable: 0', 'frae_13/linear_67/variable: 0', 'frae_13/linear_67/variable: 0', 'frae_13/linear_68/variable: 0', 'frae_13/linear_68/variable: 0', 'frae_13/linear_68/variable: 0', 'frae_13/linear_69/variable: 0', 'frae_13/linear_69/variable: 0', 'frae_13/linear_69/variable: 0', 'frae_13/linear_69/variable: 0'],)。假设
grads_and_vars
为((None, ), (None, ), (None, ), (None, ), (None, ), (None, ), (None, ), (None, ), (None, ), (None, )).
这可能源于使用TensorArray
来存储批量样品的输出。不知何故,梯度丢失/无法计算。
有人知道在这种情况下如何计算梯度吗?
我试着用谷歌搜索常见问题,但这个问题比较特殊,所以我找到的解决方案并不是很有帮助。
多亏了Alberto关于使用列表的评论。
我是这样做的:
import tensorflow as tf
import keras
import os
class Linear(keras.layers.Layer):
def __init__(self, units=32):
super(Linear, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b
class FRAE(tf.keras.Model):
def __init__(self):
super(FRAE, self).__init__()
self.linear_1 = Linear(4)
self.linear_2 = Linear(3)
self.latent = Linear(1)
self.linear_3 = Linear(3)
self.linear_4 = Linear(2)
self.decoded = [[0,0]] # tf.zeros(shape=(1, 2))
def call(self, inputs):
#x = self.flatten(inputs)
batch_size = inputs.shape[0]
input_dim = inputs.shape[1]
output_list = [None]*batch_size #tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)
#y = tf.Variable()
for i in range(batch_size):
x = tf.concat((tf.expand_dims(inputs[i], axis=0),tf.convert_to_tensor(self.decoded, dtype=tf.float32)),axis=1)
x = self.linear_1(x)
x = tf.nn.swish(x)
x = self.linear_2(x)
x = tf.nn.swish(x)
x = self.latent(x)
x = tf.nn.swish(x)
x = tf.concat((x,tf.convert_to_tensor(self.decoded, dtype=tf.float32)),axis=1)
x = self.linear_3(x)
x = tf.nn.swish(x)
x = self.linear_4(x)
x = tf.nn.swish(x)
self.decoded = x.numpy().tolist() #tf.identity(x)
output_list[i] = x #output_list.write(i, x)
y = tf.convert_to_tensor(output_list) # output_list.stack()
return y
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse", run_eagerly=True)
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)
这当然是一个原型,所以对于随机数据,输出的反馈将不会是有益的。对于依赖数据,它应该产生一些好处。