tensorflow自动微分中的二阶导数为None

在下面的代码中，我正在计算一个线性网络modelLinear的二阶导数(y_xx_lin(，它始终具有线性激活函数，以及一个tanh网络modelTanh的二阶衍生物(y_xx_tanh(，它在除最后一层为线性外的所有层都具有tanh激活。

我的问题是：y_xx_lin是None，但y_xx_tanh显示了一些值。在这个堆栈溢出问题之后，我猜测y_xx_lin是None，因为线性函数的二阶导数对于所有输入值都是零，因此在某种意义上与输入无关。是这样吗？

即使是这样，我也希望TensorFlow计算导数并返回它，而不是返回None。这可能吗？

# Second derivative of a linear network appears to be None
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import tensorflow.keras.backend as K
import numpy as np
import matplotlib.pyplot as plt
def build_network(activation='linear'):
input_layer  = Input(1)
inner_layer  = Dense(6, activation=activation)(input_layer)
inner_layer1 = Dense(6, activation=activation)(inner_layer)
inner_layer2 = Dense(6, activation=activation)(inner_layer1)
output_layer = Dense(1, activation='linear')(inner_layer2)
model = Model(input_layer, output_layer)
return model
def get_first_second_derivative(X_train,y_train,model):
with tf.GradientTape(persistent=True) as tape_second:
tape_second.watch(X_train)

with tf.GradientTape(persistent=True) as tape_first:
# Watch the variables with who/whom we want to compute gradients
tape_first.watch(X_train)

# get the output of the NN
output = model(X_train)

y_x  = tape_first.gradient(output,X_train)
y_xx = tape_second.gradient(y_x,X_train)

return y_x,y_xx
modelLinear = build_network(activation='linear')
modelLinear.compile(optimizer=Adam(learning_rate=0.1),loss='mse')
modelTanh = build_network(activation='tanh')
modelTanh.compile(optimizer=Adam(learning_rate=0.1),loss='mse')
X_train = np.linspace(-1,1,10).reshape((-1,1))
y_train = X_train*X_train
X_train = tf.convert_to_tensor(X_train,dtype=tf.float64)
y_train = tf.convert_to_tensor(y_train,dtype=tf.float64)
y_x_lin,y_xx_lin   = get_first_second_derivative(X_train,y_train,modelLinear)
y_x_tanh,y_xx_tanh = get_first_second_derivative(X_train,y_train,modelTanh)
print('Type of y_xx_lin = ',type(y_xx_lin))

如果您设置lambda x: x ** 1而不是像那样设置'linear'，它会起作用

...
id_func = lambda x: x ** 1
def build_network(activation=id_func):
input_layer  = Input(1)
inner_layer  = Dense(6, activation=activation)(input_layer)
inner_layer1 = Dense(6, activation=activation)(inner_layer)
inner_layer2 = Dense(6, activation=activation)(inner_layer1)
output_layer = Dense(1, activation=id_func)(inner_layer2)
model = Model(input_layer, output_layer)
return model
...
modelLinear = build_network(activation=id_func)
...

它工作的原因和代码失败的原因在你已经引用的答案中。通过身份函数的这种奇怪的实现，TensorFlow反向传播工作正常。

使用TensorfFlow 2.9.2版本进行测试。

如果你想将输入的偏差作为一个系列来计算(我看到了你的问题和意图，但你可以用模型层作为代码示例来计算，为了方便，你可以调整一下(

示例：<lt；它的生长速度就是它们的生长速度>gt；

import tensorflow as tf
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Class / Definition
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
class MyLSTMLayer( tf.keras.layers.LSTM ):
def __init__(self, units, return_sequences, return_state):
super(MyLSTMLayer, self).__init__( units, return_sequences=True, return_state=False )
self.num_units = units
def build(self, input_shape):
self.kernel = self.add_weight("kernel",
shape=[int(input_shape[-1]),
self.num_units])
def call(self, inputs):
derivative_number = tf.constant([ 2.0 ])

ZeroPadding1D_front = tf.keras.layers.ZeroPadding1D(padding=( 1, 0 ))
ZeroPadding1D_back = tf.keras.layers.ZeroPadding1D(padding=( 0, 1 ))
reshape = tf.reshape( inputs, shape=(1, 1024, 1), name="Reshape" )
subtract = tf.math.subtract( ZeroPadding1D_front( reshape ), ZeroPadding1D_back( reshape ), name="Subtract" )
devide = tf.math.divide_no_nan( subtract, derivative_number, name="Devide" )
# X = [ 1, 2, 3, 4, 5 ]
# Y = 2
# X/Y = [ ( 2 - 1 / 2 ), ( 3 - 2 / 2 ), ( 4 - 3 / 2 ), ( 5 - 4 / 2 ) ]
# X/Y = [ 0.5, 0.5, 0.5, 0.5 ]
return devide

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
start = 3
limit = 3075
delta = 3
sample = tf.range( start, limit, delta )
sample = tf.cast( sample, dtype=tf.float32 )
sample = tf.constant( sample, shape=( 1, 1, 1024 ), dtype=tf.float32 )
layer = MyLSTMLayer( 1024, True, False )
model = tf.keras.Sequential([
tf.keras.Input(shape=(1, 1024)),
layer,
])
model.summary()
print( "Sample: " )
print( sample )
print( "Predict: " )
print( model.predict(sample) )

输出：

Model: "sequential"
_________________________________________________________________
Layer (type)                Output Shape              Param #
=================================================================
my_lstm_layer (MyLSTMLayer)  (1, 1025, 1)             1048576
=================================================================
Total params: 1,048,576
Trainable params: 1,048,576
Non-trainable params: 0
_________________________________________________________________
Sample:
tf.Tensor([[[3.000e+00 6.000e+00 9.000e+00 ... 3.066e+03 3.069e+03 3.072e+03]]], shape=(1, 1, 1024), dtype=float32)
Predict:
1/1 [==============================] - 0s 69ms/step
[[[-1.500e+00]
[-1.500e+00]
[-1.500e+00]
...
[-1.500e+00]
[-1.500e+00]
[ 1.536e+03]]]

相关内容

最新更新

热门标签：