Tensorflow2.0 softmax模型成本没有降低

我开始学习深度学习，我正在练习softmax分类使用MNIST数据集。当我使用sigmoid函数时是可以的，但是使用relu函数并没有降低成本。

下面是我的完整代码。

from tensorflow import keras
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
(train_images, train_labels), (test_images, test_labels) = keras.datasets.mnist.load_data(path="mnist.npz")
train_labels = tf.one_hot(train_labels, 10)
test_labels = tf.one_hot(test_labels, 10)
train_images, test_images = train_images / 255.0, test_images / 255.0
train_images = tf.reshape(train_images, (len(train_images), 28*28))
test_images = tf.reshape(test_images, (len(test_images), 28*28))
train_images = tf.cast(train_images, tf.float32)
test_images = tf.cast(test_images, tf.float32)
W1 = tf.Variable(tf.random.normal([28*28,50]), name='weight1', dtype=tf.float32)
b1 = tf.Variable(tf.random.normal([50]), name='bias1', dtype=tf.float32)
W2 = tf.Variable(tf.random.normal([50, 50]), name='weight2', dtype=tf.float32)
b2 = tf.Variable(tf.random.normal([50]), name='bias2', dtype=tf.float32)
W3 = tf.Variable(tf.random.normal([50,50]), name='weight3', dtype=tf.float32)
b3 = tf.Variable(tf.random.normal([50]), name='bias3', dtype=tf.float32)
W4 = tf.Variable(tf.random.normal([50,10]), name='weight4', dtype=tf.float32)
b4 = tf.Variable(tf.random.normal([10]), name='bias4', dtype=tf.float32)
train_ds  = tf.data.Dataset.from_tensor_slices(
(train_images, train_labels)).batch(100)
epochs = 10
learning_rate = 0.1
cost_list = []
accuracy_list = []
for epoch in range(epochs):
avg_cost = 0
for images, labels in train_ds:
with tf.GradientTape() as tape:
layer1 = tf.nn.relu(tf.matmul(images, W1)+b1)
layer2 = tf.nn.relu(tf.matmul(layer1, W2)+b2)
layer3 = tf.nn.relu(tf.matmul(layer2, W3)+b3)
hypothesis = tf.nn.softmax(tf.matmul(layer3, W4)+b4)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels= labels))
predicted = tf.math.argmax(hypothesis, 1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, tf.math.argmax(labels, 1)), dtype=tf.float32))
W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad = tape.gradient(cost, [W1, b1, W2, b2, W3, b3, W4, b4])
#print(W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad)
#print(bbb)
W1.assign_sub(learning_rate * W1_grad)
b1.assign_sub(learning_rate * b1_grad)
W2.assign_sub(learning_rate * W2_grad)
b2.assign_sub(learning_rate * b2_grad)
W3.assign_sub(learning_rate * W3_grad)
b3.assign_sub(learning_rate * b3_grad)
W4.assign_sub(learning_rate * W4_grad)
b4.assign_sub(learning_rate * b4_grad)

avg_cost += cost / len(train_ds)
cost_list.append(avg_cost)
accuracy_list.append(accuracy.numpy())
print("Epoch: {}, cost: {}, accuracy: {}". format(epoch, avg_cost, accuracy.numpy()))

plt.plot(range(epochs), cost_list)
plt.show()
plt.plot(range(epochs), accuracy_list)
plt.show()

这是我的结果!

可以看出，成本值并没有降低，准确性也在波动。请帮帮我!

你不应该用softmax的输出给softmax_cross_entropy_with_logits，你在这里做了两个softmax层，尝试删除softmax层，因为softmax_cross_entropy_with_logits同时做softmax和cross_entropy。

相关内容

最新更新

热门标签：