使用tensorarray存储中间结果时，没有为任何变量提供梯度

对于实践，我正在开发Keras中的反馈循环自编码器。我使用的代码是

import tensorflow as tf
import keras
import os
class Linear(keras.layers.Layer):
def __init__(self, units=32):
super(Linear, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b


class FRAE(tf.keras.Model):
def __init__(self):
super(FRAE, self).__init__()
self.linear_1 = Linear(4)
self.linear_2 = Linear(3)
self.latent   = Linear(1)
self.linear_3 = Linear(3)
self.linear_4 = Linear(2)
self.decoded  = tf.zeros(shape=(1, 2))

def call(self, inputs):
#x = self.flatten(inputs)

batch_size = inputs.shape[0]
input_dim = inputs.shape[1]
# output_list = [None] * batch_size #tf.zeros(shape = (batch_size, input_dim))
output_list = tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)
for i in range(batch_size):
x = tf.concat((tf.expand_dims(inputs[i], axis=0),self.decoded),axis=1)
x = self.linear_1(x)
x = tf.nn.swish(x)
x = self.linear_2(x)
x = tf.nn.swish(x)
x = self.latent(x)
x = tf.nn.swish(x)
x = tf.concat((x,self.decoded),axis=1)
x = self.linear_3(x)
x = tf.nn.swish(x)
x = self.linear_4(x)
x = tf.nn.swish(x)
self.decoded = tf.identity(x)
output_list.write(i,  x)
y = output_list.stack()
return y


os.environ['CUDA_VISIBLE_DEVICES'] = '-1'


xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse")
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)

当我运行这段代码时，我得到错误

ValueError:没有为任何变量提供梯度:(['frae_13/linear_65/variable: 0'， 'frae_13/linear_65/variable: 0'， 'frae_13/linear_66/variable: 0'， 'frae_13/linear_66/variable: 0'， 'frae_13/linear_67/variable: 0'， 'frae_13/linear_67/variable: 0'， 'frae_13/linear_68/variable: 0'， 'frae_13/linear_68/variable: 0'， 'frae_13/linear_68/variable: 0'， 'frae_13/linear_69/variable: 0'， 'frae_13/linear_69/variable: 0'， 'frae_13/linear_69/variable: 0'， 'frae_13/linear_69/variable: 0']，)。假设grads_and_vars为((None， )， (None， )， (None， )， (None， )， (None， )， (None， )， (None， )， (None， )， (None， )， (None， )).

这可能源于使用TensorArray来存储批量样品的输出。不知何故，梯度丢失/无法计算。

有人知道在这种情况下如何计算梯度吗?

我试着用谷歌搜索常见问题，但这个问题比较特殊，所以我找到的解决方案并不是很有帮助。

多亏了Alberto关于使用列表的评论。

我是这样做的:

import tensorflow as tf
import keras
import os
class Linear(keras.layers.Layer):
def __init__(self, units=32):
super(Linear, self).__init__()
self.units = units
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer="random_normal",
trainable=True,
)
self.b = self.add_weight(
shape=(self.units,), initializer="random_normal", trainable=True
)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b


class FRAE(tf.keras.Model):
def __init__(self):
super(FRAE, self).__init__()
self.linear_1 = Linear(4)
self.linear_2 = Linear(3)
self.latent   = Linear(1)
self.linear_3 = Linear(3)
self.linear_4 = Linear(2)
self.decoded  = [[0,0]] # tf.zeros(shape=(1, 2))


def call(self, inputs):
#x = self.flatten(inputs)

batch_size = inputs.shape[0]
input_dim = inputs.shape[1]
output_list = [None]*batch_size #tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)
#y = tf.Variable()
for i in range(batch_size):
x = tf.concat((tf.expand_dims(inputs[i], axis=0),tf.convert_to_tensor(self.decoded, dtype=tf.float32)),axis=1)
x = self.linear_1(x)
x = tf.nn.swish(x)
x = self.linear_2(x)
x = tf.nn.swish(x)
x = self.latent(x)
x = tf.nn.swish(x)
x = tf.concat((x,tf.convert_to_tensor(self.decoded, dtype=tf.float32)),axis=1)
x = self.linear_3(x)
x = tf.nn.swish(x)
x = self.linear_4(x)
x = tf.nn.swish(x)
self.decoded = x.numpy().tolist() #tf.identity(x)
output_list[i] = x #output_list.write(i,  x)
y = tf.convert_to_tensor(output_list)    #  output_list.stack()
return y


os.environ['CUDA_VISIBLE_DEVICES'] = '-1'


xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse", run_eagerly=True)
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)

这当然是一个原型，所以对于随机数据，输出的反馈将不会是有益的。对于依赖数据，它应该产生一些好处。

相关内容

最新更新

热门标签：