Python 3:如何在张量流 2.0 中评估亚当梯度?我想替换我的实现



我有以下代码运行良好。但是,我坚信Adam Gradient的Tensorflow 2.0实现比我幼稚的实现更有效。 如何用 Tensorflow 2.0 实现替换对 Adam 梯度的评估?

import tensorflow as tf
import numpy as np
def linearModelGenerator(numberSamples):
x = tf.random.normal(shape=(numberSamples,))
y = 3*tf.ones(shape=(numberSamples,)) + tf.constant(5.0) * x +  tf.random.normal(shape=(numberSamples,),stddev=0.01)
return x,y
class Adam:
def __init__(self,shapes,lr=0.001,beta1=0.9,beta2=0.999,epsilon=1e-07):
self.lr=lr
self.beta1=beta1
self.beta2=beta2
self.epsilon=epsilon
self.shapes=shapes
self.m=np.shape(shapes)[0]
self.listM=[]
self.listV=[]
self.t=0
for i in range(self.m):
if(np.isscalar(shapes[i])):
self.listM.append(0)#append(tf.zeros(shapes[i]))
self.listV.append(0)#append(tf.zeros(shapes[i]))
else:
self.append(tf.zeros(shapes[i]))
self.append(tf.zeros(shapes[i]))
def evalGradient(self,*args):
adamGrad=[]
self.t=self.t+1
for i in range(self.m):
grad=args[i]
self.listM[i]=self.beta1*self.listM[i]+(1-self.beta1)*grad
self.listV[i]=self.beta2*self.listV[i]+(1-self.beta2)*(grad*grad)
hatM=self.listM[i]/(1-(self.beta1)**self.t)
hatV=self.listV[i]/(1-(self.beta2)**self.t)
adamGrad.append(hatM/(tf.math.sqrt(hatV)+(tf.ones(np.shape(hatV))*self.epsilon)))
return adamGrad


class LinearModel:
def __init__(self):
self.weight = tf.Variable(-1.0)
self.bias = tf.Variable(-1.0)
def __call__(self, x):
return self.weight * x + self.bias
def loss(y, pred):
return tf.reduce_mean(tf.square(y - pred))

def trainAdam(linear_model,adam, x, y):
with tf.GradientTape() as t:
current_loss = loss(y, linear_model(x))
gradWeight, gradBias = t.gradient(current_loss, [linear_model.weight, linear_model.bias])
gradAdamList=adam.evalGradient(gradWeight,gradBias)
gradAdamWeight=gradAdamList[0]
gradAdamBias=gradAdamList[1]
linear_model.weight.assign_sub(adam.lr * gradAdamWeight)
linear_model.bias.assign_sub(adam.lr * gradAdamBias)        

if __name__=="__main__":
numberSamples=100
x,y=linearModelGenerator(numberSamples)
linear_model = LinearModel()
epochs = 1000
shapes=[]
shapes.append(1)
shapes.append(1)
adam=Adam(shapes,lr=0.1)
for epoch_count in range(epochs):
real_loss = loss(y, linear_model(x))
trainAdam(linear_model,adam, x, y)
print('w',linear_model.weight.numpy())
print('bias',linear_model.bias.numpy())
print('real_loss',real_loss.numpy())

我想保留代码的一般结构,但替换 Adam 梯度实现。

TensorFlow 2 中的内置优化器不仅可以与tf.keras.Model.fit()一起使用,还可以与tf.GradientTape()一起使用。使用后者,您可以直接调用其apply_gradients()方法。优化器对象将在内部跟踪累加器和运行时刻。粗略地说,您的代码可以按如下方式修改:

adam = tf.optimizers.Adam(learning_rate)
def trainAdam(linear_model,adam, x, y):
with tf.GradientTape() as t:
current_loss = loss(y, linear_model(x))
gradWeight, gradBias = t.gradient(current_loss, [linear_model.weight, linear_model.bias])
adam.apply_gradients(zip([gradWeight, gradBias], [linear_model.weight, linear_model.bias]))

最新更新