我正在尝试为多元线性回归构建一个小的教育示例,但是损失正在增加,直到它爆炸而不是变小,知道吗?
import tensorflow as tf
tf.__version__
import numpy as np
data = np.array(
[
[100,35,35,12,0.32],
[101,46,35,21,0.34],
[130,56,46,3412,12.42],
[131,58,48,3542,13.43]
]
)
x = data[:,1:-1]
y_target = data[:,-1]
def loss_function(y, pred):
return tf.reduce_mean(tf.square(y - pred))
def train(b, w, x, y, lr=0.012):
with tf.GradientTape() as t:
current_loss = loss_function(y, linear_model(x))
lr_weight, lr_bias = t.gradient(current_loss, [w, b])
w.assign_sub(lr * lr_weight)
b.assign_sub(lr * lr_bias)
epochs = 80
for epoch_count in range(epochs):
real_loss = loss_function(y_target, linear_model(x))
train(b, w, x, y_target, lr=0.12)
print(f"Epoch count {epoch_count}: Loss value: {real_loss.numpy()}")
如果我使用"正确"值初始化权重(通过 scikit-learn 回归器发现(,甚至会发生这种情况
w = tf.Variable([-1.76770250e-04,3.46688912e-01,2.43827475e-03],dtype=tf.float64)
b = tf.Variable(-11.837184241807234,dtype=tf.float64)
以下是将TF2 优化器用于玩具示例的方法(根据评论(。我知道这不是答案,但我不想在评论部分发布这个,因为它会弄乱缩进等等。
tf_x = tf.Variable(tf.constant(2.0,dtype=tf.float32),name='x')
optimizer = tf.optimizers.SGD(learning_rate=0.1)
# Optimizing tf_x using gradient tape
x_series, y_series = [],[]
for step in range(5):
x_series.append(tf_x.numpy().item())
with tf.GradientTape() as tape:
tf_y = tf_x**2
gradients = tape.gradient(tf_y, tf_x)
optimizer.apply_gradients(zip([gradients], [tf_x]))
根据@thushv89的输入,我在这里提供了一个使用 TF2 优化器的中间解决方案,该解决方案正在运行,尽管这不是 100% 回答我的问题
import tensorflow as tf
tf.__version__
import numpy as np
data = np.array(
[
[100,35,35,12,0.32],
[101,46,35,21,0.34],
[130,56,46,3412,12.42],
[131,58,48,3542,13.43]
]
)
x = data[:,1:-1]
y_target = data[:,-1]
w = tf.Variable([1,1,1],dtype=tf.float64)
b = tf.Variable(1,dtype=tf.float64)
def linear_model(x):
return b + tf.tensordot(x,w,axes=1)
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.MeanSquaredLogarithmicError()
def train_step(x, y):
with tf.GradientTape() as tape:
predicted = linear_model(x)
loss_value = loss_object(y, predicted)
print(f"Loss Value:{loss_value}")
grads = tape.gradient(loss_value, [b,w])
optimizer.apply_gradients(zip(grads, [b,w]))
def train(epochs):
for epoch in range(epochs):
train_step(x, y_target)
print ('Epoch {} finished'.format(epoch))
train(epochs = 1000)