反向传播中的错误:神经网络预测相同的类



我正在使用Numpy从头开始编写神经网络代码。但是,即使在训练了我的网络许多时期之后,每个类的预测都是随机的,并且无论输入如何都保持不变。

我已经根据Andrew Ng的Coursera ML课程和 towardsdatascience.com 的帖子检查了我的概念。我想我犯了一些我无法弄清楚的非常概念上的错误。

这是我的代码:

import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(y):
return y * (1 - y)
class NeuralNetwork:
def __init__(self, shape):
self.n_layers = len(shape)
self.shape = shape
self.weight = []
self.bias = []
i = 0
while i < self.n_layers - 1:
self.weight.append(np.random.normal(loc=0.0, scale=0.5, 
size=(self.shape[i + 1], self.shape[i])))
self.bias.append(np.random.normal(loc=0.0, scale=0.3,
size=(self.shape[i + 1], 1)))
i += 1
def predict(self, X):
z = self.weight[0] @ X + self.bias[0]
a = sigmoid(z)
i = 1
while i < self.n_layers - 1:
z = self.weight[i] @ a + self.bias[i]
a = sigmoid(z)
i += 1
return a
def predictVerbose(self, X):
layers = [X]
z = self.weight[0] @ X + self.bias[0]
a = sigmoid(z)
layers.append(a)
i = 1
while i < self.n_layers - 1:
z = self.weight[i] @ a + self.bias[i]
a = sigmoid(z)
layers.append(a)
i += 1
return layers
def gradOne(self, X, y):
layers = self.predictVerbose(X)
h = layers[-1]
delta_b = [(h - y) * dsigmoid(h)]
delta_w = [delta_b[0] @ layers[-2].T]
i = 1
while i < self.n_layers - 1:
buff = delta_b[-1]
delta_b.append((self.weight[-i].T @ buff) * dsigmoid(layers[-(i + 1)]))
delta_w.append(delta_b[-1] @ layers[-(i + 2)].T)
i += 1

return delta_b[::-1], delta_w[::-1]
def grad(self, data, l_reg=0):
#data: x1, x2, x3, ..., xm, y=(0, 1, 2,...)
m = len(data)
delta_b = []
delta_w = []
i = 0
while i < self.n_layers - 1:
delta_b.append(np.zeros((self.shape[i + 1], 1)))
delta_w.append(np.zeros((self.shape[i + 1], self.shape[i])))
i += 1

for row in data:
X = np.array(row[:-1])[np.newaxis].T
y = np.zeros((self.shape[-1], 1))
# print(row)
y[row[-1], 0] = 1
buff1, buff2 = self.gradOne(X, y)
i = 0
while i < len(delta_b):
delta_b[i] += buff1[i] / m
delta_w[i] += buff2[i] / m
i += 1
return delta_b, delta_w

def train(self, data, batch_size, epoch, alpha, l_reg=0):
m = len(data)
for i in range(epoch):
j = 0
while j < m:
delta_b, delta_w = self.grad(data[i: (i + batch_size + 1)])
i = 0
while i < len(self.weight):
self.weight[i] -= alpha * delta_w[i]
self.bias[i] -= alpha * delta_b[i]
i += 1
j += batch_size        

if __name__ == "__main__":
x = NeuralNetwork([2, 2, 2])
# for y in x.gradOne(np.array([[1], [2], [3]]), np.array([[0], [1]])):
#     print(y.shape)
data = [
[1, 1, 0],
[0, 0, 0],
[1, 0, 1],
[0, 1, 1]
]
x.train(data, 4, 1000, 0.1)
print(x.predict(np.array([[1], [0]])))
print(x.predict(np.array([[1], [1]])))

请指出我哪里出错了。

不幸的是,我没有足够的声誉来评论您的帖子,但这里有一个指向我制作的仅限 numpy 神经网络的链接(在 sklearn 和 mnist 的 blob 数据上进行了测试(。

https://github.com/jaymody/backpropagation/blob/master/old/NeuralNetwork.py

你还对这个问题感兴趣吗?据我了解,您尝试获得具有直接和反向输出的异或感知器?
它看起来像:
1.您需要在


train函数中将表达式delta_b, delta_w = self.grad(data[i: (i + batch_size + 1)])更改为delta_b, delta_w = self.grad(data[::])
2. 一些随机值,用于突触和偏置权重的初始化,需要更多的训练周期才能alpha=0.1。尝试使用alpha(我将其设置为2(和纪元数(我尝试了20000(。

此外,您的代码不适用于 1 层网络。我尝试训练 1 层 AND 和 OR 感知器,我得到了非常奇怪的结果(或者它可能需要更多的周期(。但是在 2 层的情况下,它工作正常。

最新更新