在进行反向传播和梯度下降时遇到 numpy 数组和矩阵的问题

我正在关注Dan Shiffman关于创建一个小型"玩具"神经网络库的视频教程系列。

本教程使用 JS 和他在本系列前面教授如何编码的矩阵库。但是,我使用numpy。

在此视频中,他对梯度下降和反向传播进行了编程。但是,因为我使用的是 numpy,所以我的代码似乎不太有效。如果有人能帮忙,我将不胜感激!


import numpy as np
import math
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(x):
return x * (1 - x)
class NeuralNetwork:
def __init__(self, Inum, Hnum, Onum):
self.Inum = Inum
self.Hnum = Hnum
self.Onum = Onum
self.lr = 0.1
self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1
self.biasH = np.random.rand(self.Hnum) * 2 - 1
self.biasO = np.random.rand(self.Onum) * 2 - 1
def feedForward(self, inputs):
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
return outputs
def train(self, inputs, targets):
# Feed Forward
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
# Calculate errors
errorsO = np.array(targets) - outputs
# Calculate gradients with derivitive of sigmoid
# TODO: Use numpy for gradient calculation (if possible)
gradients = dsigmoid(outputs)
gradients = gradients * errorsO
gradients = gradients * self.lr
# Calculate deltas
hiddenT = hidden[np.newaxis]
weightsHODeltas = np.dot(gradients, hiddenT)
# Adjust weights by deltas
self.weightsHO = self.weightsHO + weightsHODeltas
# Adjust bias by gradients
self.biasO = self.biasO + gradients

errorsH = np.transpose(self.weightsHO) * errorsO

# Calculate gradients with derivitive of sigmoid
# TODO: Use numpy for gradient calculation (if possible)
gradientsH = dsigmoid(hidden)
gradientsH = gradientsH * errorsH
gradientsH = gradientsH * self.lr
# Calculate deltas
inputsT = np.array(inputs)[np.newaxis]
weightsIHDeltas = np.dot(gradientsH, inputsT)
# Adjust weights by deltas
self.weightsIH = self.weightsIH + weightsIHDeltas
# Adjust bias by gradients
self.biasO = self.biasO + gradientsH


from NN import NeuralNetwork
from random import shuffle
def main():
nn = NeuralNetwork(2, 2, 1)
dataset = [
"inputs": [0, 0],
"outputs": 0
"inputs": [0, 1],
"outputs": 1
"inputs": [1, 0],
"outputs": 1
"inputs": [1, 1],
"outputs": 0

for x in range(100):
for data in dataset:
nn.train(data["inputs"], data["outputs"])
for data in dataset:
if __name__ == '__main__':


Traceback (most recent call last):
File "c:UsersghostDesktopNotesProgrammingMachine LearningNN From ScratchYet Another Neural Network Librarymain.py", line 38, in <module>
File "c:UsersghostDesktopNotesProgrammingMachine LearningNN From ScratchYet Another Neural Network Librarymain.py", line 30, in main
nn.train(data["inputs"], data["outputs"])
File "c:UsersghostDesktopNotesProgrammingMachine LearningNN From ScratchYet Another Neural Network LibraryNN.py", line 77, in train
weightsIHDeltas = np.dot(gradientsH, inputsT)
ValueError: shapes (2,2) and (1,2) not aligned: 2 (dim 1) != 1 (dim 0)

问题是您对 numpy 数组的维度感到困惑。在用 numpy 编写 ML 代码时,处理列向量更容易,因为这是您在纸上推导方程时所做的。此外,您的代码中存在逻辑错误。以下是更正后的代码:

import numpy as np
import math
from random import shuffle
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(x):
return x * (1 - x)
class NeuralNetwork:
def __init__(self, Inum, Hnum, Onum):
self.Inum = Inum
self.Hnum = Hnum
self.Onum = Onum
self.lr = 0.1
self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1
self.biasH = np.random.rand(self.Hnum) * 2 - 1
self.biasO = np.random.rand(self.Onum) * 2 - 1
def feedForward(self, inputs):
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
return outputs
def train(self, inputs, targets):
NOTE : Always deal with column vectors as you do in maths.
# Feed Forward
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
# Calculate errors
errorsO = np.array(targets) - outputs
errorsO = errorsO[:, np.newaxis] # errorsO is a column now
# Calculate gradients with derivitive of sigmoid
gradientsO_ = dsigmoid(outputs)
# Convert gradientsO also to column vector before taking product
gradientsO_ = gradientsO_[:, np.newaxis] * errorsO # Hadamard product to get a new column vector
gradientsO = gradientsO_ * self.lr
# Calculate deltas
hiddenT = hidden[:, np.newaxis] # hidden is a column now
weightsHODeltas = np.dot(hiddenT, gradientsO.T)
# Adjust weights by deltas
self.weightsHO = self.weightsHO + weightsHODeltas.reshape(self.weightsHO.shape)
# Adjust bias by gradients
self.biasO = self.biasO + gradientsO.reshape(self.biasO.shape)
# Hidden layer
errorsH = np.dot(np.transpose(self.weightsHO), gradientsO_) # You had a conceptual mistake here. You don't incoporate learning rate here
# Calculate gradients with derivitive of sigmoid
gradientsH = dsigmoid(hidden)
gradientsH = gradientsH[:, np.newaxis] * errorsH
gradientsH = gradientsH * self.lr
# Calculate deltas
inputsT = np.array(inputs)[:, np.newaxis]
weightsIHDeltas = np.dot(inputsT, gradientsH.T)
# Adjust weights by deltas
self.weightsIH = self.weightsIH + weightsIHDeltas.reshape(self.weightsIH.shape)
# Adjust bias by gradients
self.biasH = self.biasH + gradientsH.reshape(self.biasH.shape)
def main():
nn = NeuralNetwork(2, 2, 1)
dataset = [
"inputs": [0, 0],
"outputs": 0
"inputs": [0, 1],
"outputs": 1
"inputs": [1, 0],
"outputs": 1
"inputs": [1, 1],
"outputs": 0

for x in range(100):
for data in dataset:
# print(data)
nn.train(data["inputs"], data["outputs"])
for data in dataset:
if __name__ == '__main__':

PS :此外,您可以通过不像前馈部分那样重复代码来提高代码质量。

好的,感谢Zoma_alchemist和玩具神经网络 github 的一些帮助,我已经设法修复了我的代码以使其运行良好!

下面是 NN 类:

import numpy as np
import math
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def dsigmoid(x):
return x * (1 - x)
class NeuralNetwork:
def __init__(self, Inum, Hnum, Onum):
self.Inum = Inum
self.Hnum = Hnum
self.Onum = Onum
self.lr = 0.1
self.weightsIH = np.random.rand(self.Hnum, self.Inum) * 2 - 1
self.weightsHO = np.random.rand(self.Onum, self.Hnum) * 2 - 1
self.biasH = np.random.rand(self.Hnum) * 2 - 1
self.biasO = np.random.rand(self.Onum) * 2 - 1
def feedForward(self, inputs):
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
return outputs
def train(self, inputs, targets):
NOTE : Always deal with column vectors as you do in maths.
# Feed Forward
hidden = np.dot(self.weightsIH, np.array(inputs))
hidden = hidden + self.biasH
hidden = sigmoid(hidden)
outputs = np.dot(self.weightsHO, hidden)
outputs = outputs + self.biasO
outputs = sigmoid(outputs)
# Calculate errors
errorsO = np.array(targets) - outputs
errorsO = errorsO[:, np.newaxis] # errorsO is a column now
# Calculate gradients with derivitive of sigmoid
gradientsO_ = dsigmoid(outputs)
# Convert gradientsO also to column vector before taking product
gradientsO_ = gradientsO_[:, np.newaxis] * errorsO # Hadamard product to get a new column vector
gradientsO = gradientsO_ * self.lr
# Calculate deltas
hiddenT = hidden[np.newaxis] # hidden is a column now
weightsHODeltas = np.dot(gradientsO, hiddenT)
# Adjust weights by deltas
self.weightsHO = self.weightsHO + weightsHODeltas
# Adjust bias by gradients
self.biasO = self.biasO + gradientsO.reshape(self.biasO.shape)
# Hidden layer
errorsH = np.dot(np.transpose(self.weightsHO), errorsO) # You had a conceptual mistake here. You don't incoporate learning rate here
# Calculate gradients with derivitive of sigmoid
gradientsH = dsigmoid(hidden)
gradientsH = gradientsH[:, np.newaxis] * errorsH
gradientsH = gradientsH * self.lr
# Calculate deltas
inputsT = np.array(inputs)[np.newaxis]
weightsIHDeltas = np.dot(gradientsH, inputsT)
# Adjust weights by deltas
self.weightsIH = self.weightsIH + weightsIHDeltas
# Adjust bias by gradients
self.biasH = self.biasH + gradientsH.reshape(self.biasH.shape)


from NN import NeuralNetwork
from random import shuffle, choice
def main():
nn = NeuralNetwork(2, 2, 1)
dataset = [
"inputs": [0, 0],
"outputs": 0
"inputs": [0, 1],
"outputs": 1
"inputs": [1, 0],
"outputs": 1
"inputs": [1, 1],
"outputs": 0

for x in range(10000):
for data in dataset:
nn.train(data["inputs"], data["outputs"])
print(nn.feedForward([0, 0]))
print(nn.feedForward([0, 1]))
print(nn.feedForward([1, 0]))
print(nn.feedForward([1, 1]))
if __name__ == '__main__':


[ 0.04557893]
[ 0.96304187]
[ 0.96304197]
[ 0.03685831]
