神经网络代价不随梯度下降而降低



我在用python做一个小的神经网络,我不知道为什么成本没有下降。如有任何意见或提示,将不胜感激

import numpy as np
X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
Y = np.array([[0, 1, 1, 0]])
w1 = np.array([[0.1, 0.2],[0.3,0.4],[0.5,0.6]])
b1 = np.zeros((3,1))
w2 = np.array([0.1, 0.2, 0.3]).reshape(1,3)
b2 = np.zeros((1,1))
cache = {}
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x) * (1 - sigmoid(x))
def forward(X, w1, w2, b1, b2):
Z1 = np.dot(w1, X) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(w2, A1) + b2
A2 = sigmoid(Z2)
return {'x': X, 'Z1': Z1, 'A1': A1, 'Z2': Z2, 'A2': A2}
def backward(X, Z1, A1, Z2, A2, error_gradient, w1, w2, b1, b2, learning_rate=0.01):
# LAYER 2
dA2 = np.multiply(error_gradient, sigmoid_prime(Z2))
dZ2 = np.dot(w2.T, dA2)
# update w and b for layer 2
dw2 = np.dot(dA2, A2.T)
db2 = dA2
w2 -= dw2 * learning_rate
b2 -= db2 * learning_rate
# LAYER 1     
dA1 = np.multiply(dZ2, sigmoid_prime(Z1))
dZ1 = np.dot(w1.T, dA1)
# update w and b for layer 1
dw1 = np.dot(dA1, X.T)
db1 = dA1
w1 -= dw1 * learning_rate
b1 -= db1 * learning_rate

return {'x': X, 'dZ1': dZ1, 'dA1': dA1, 'dZ2': dZ2, 'dA2': dA2,
'w2': w2, 'b2': b2, 'w1': w1, 'b1': b1}
def calculate_cost(y, y_guess):
cost = np.power(y - y_guess, 2)
return np.squeeze(cost)
def mse_prime(y, y_pred):
return 2 * (y - y_pred)
def predict(X, w1, w2, b1, b2):
return forward(X, w1, w2, b1, b2)
def train(X, Y, w1, w2, b1, b2, epochs=100, learning_rate=0.01):
for epoch in range(epochs):
cost = 0
for i, val in enumerate(X):
x = val.reshape(2,1)
out = predict(x, w1, w2, b1, b2)
y_guess = out["A2"]
#print(out)
cost += calculate_cost(Y[0][i], y_guess)
error_gradient = mse_prime(Y[0][i], y_guess)
# print(error_gradient)
back = backward(x, out["Z1"], out["A1"], out["Z2"], out["A2"], error_gradient, w1, w2, b1, b2)
# update params
w1 = back["w1"]
b1 = back["b1"]
w2 = back["w2"]
b2 = back["b2"]
print(f"epoch: {epoch + 1}/{epochs}, cost: {cost/X.shape[0]}")

train(X, Y, w1, w2, b1, b2, epochs=20)

成本输出

epoch: 1/20, cost: 0.25703296560961486
epoch: 2/20, cost: 0.25718506279033615
epoch: 3/20, cost: 0.25734002245320176
epoch: 4/20, cost: 0.25749789408142415
epoch: 5/20, cost: 0.25765872780276317
epoch: 6/20, cost: 0.25782257438803613
epoch: 7/20, cost: 0.25798948524907084
epoch: 8/20, cost: 0.2581595124360765
epoch: 9/20, cost: 0.2583327086344036
epoch: 10/20, cost: 0.25850912716066776
epoch: 11/20, cost: 0.2586888219582088
epoch: 12/20, cost: 0.25887184759185666
epoch: 13/20, cost: 0.2590582592419748
epoch: 14/20, cost: 0.2592481126977533
epoch: 15/20, cost: 0.2594414643497189
epoch: 16/20, cost: 0.25963837118143357
epoch: 17/20, cost: 0.2598388907603498
epoch: 18/20, cost: 0.2600430812277913
epoch: 19/20, cost: 0.2602510012880266
epoch: 20/20, cost: 0.26046271019640493

原来我有这两个错误

  1. MSE函数:改变减法的顺序(顺序无关)
  2. dw2点积使用正确矩阵(A1)

下面是完整的工作代码

import numpy as np
np.random.seed(1)
X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
#X = np.array([[0, 0]])
Y = np.array([[0, 1, 1, 0]])
w1 = np.random.randn(3, 2) * 0.1
b1 = np.zeros((3,1))
w2 = np.random.randn(1, 3) * 0.1
b2 = np.zeros((1,1))
cache = {}

def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x) * (1 - sigmoid(x))
def forward(X, w1, w2, b1, b2):
Z1 = np.dot(w1, X) + b1
A1 = sigmoid(Z1)
Z2 = np.dot(w2, A1) + b2
A2 = sigmoid(Z2)
return {'x': X, 'Z1': Z1, 'A1': A1, 'Z2': Z2, 'A2': A2}
def backward(X, Z1, A1, Z2, A2, error_gradient, w1, w2, b1, b2, learning_rate=0.1):
# LAYER 2
dA2 = np.multiply(error_gradient, sigmoid_prime(Z2))
dZ2 = np.dot(w2.T, dA2)
# update w and b for layer 2
dw2 = np.dot(dA2, A1.T)
db2 = dA2
w2 -= dw2 * learning_rate
b2 -= db2 * learning_rate
# LAYER 1     
dA1 = np.multiply(dZ2, sigmoid_prime(Z1))
dZ1 = np.dot(w1.T, dA1)
# update w and b for layer 1
dw1 = np.dot(dA1, X.T)
db1 = dA1
w1 -= dw1 * learning_rate
#print(db1 * learning_rate)
b1 -= db1 * learning_rate

return {'x': X, 'dZ1': dZ1, 'dA1': dA1, 'dZ2': dZ2, 'dA2': dA2,
'w2': w2, 'b2': b2, 'w1': w1, 'b1': b1, 'dw2': dw2, 'db2': db2, 'dw1': dw1, 'db1': db1}
def calculate_cost(y, y_guess):
cost = np.power(y - y_guess, 2)
return np.squeeze(cost)
def mse_prime(y, y_pred):
return 2 * (y_pred - y)
def predict(X, w1, w2, b1, b2):
return forward(X, w1, w2, b1, b2)
def train(X, Y, w1, w2, b1, b2, epochs=100, learning_rate=0.01):
for epoch in range(epochs):
cost = 0
for i, val in enumerate(X):
x = val.reshape(2,1)
out = predict(x, w1, w2, b1, b2)
y_guess = out["A2"]
cost += calculate_cost(Y[0][i], y_guess)
error_gradient = mse_prime(Y[0][i], y_guess)
back = backward(x, out["Z1"], out["A1"], out["Z2"], out["A2"], error_gradient, w1, w2, b1, b2)
# update params
w1 = back["w1"]
b1 = back["b1"]
w2 = back["w2"]
b2 = back["b2"]
print(f"epoch: {epoch + 1}/{epochs}, cost: {cost/X.shape[0]}")

train(X, Y, w1, w2, b1, b2, epochs=10000, learning_rate=0.1)

最新更新