Python中的神经网络仅使用numpy



我正在尝试对两个神经网络进行编码。第一个网络的体系结构由一个输入层、一个隐藏层和一个输出层组成。输入层是R^2,因此它接受两个输入(x1,x2(,隐藏层有两个神经元,输出层有一个神经元。所有神经元都使用校正线性单元(ReLU(激活函数。第一个和第二个神经网络之间的唯一区别是第二个在隐藏层中有四个神经元。否则,它们是相同的。

我完成了第一个网络的代码,并能够运行和绘制结果。我主要想让神经网络学习如何在我的数据集中分离两个集群。我生成2000个点来形成一个集群,然后为下一个集群再生成2000个。理想情况下,神经网络的输出将找到一个分离平面(实际上是多个平面(来分离两个聚类。当测试阶段的误差小于0.05时,我已经设置了我的绘图。我还应该解释一下,我正在努力找到理想的学习率和训练时期,所以我有几个循环来迭代不同的学习率(阿尔法(和时期。

我的第一个网络运行良好,但当我出于某种原因添加2个神经元时,我的网络错误和参数(权重和偏差(会变得不稳定。我无法使4个神经元网络的误差低于0.4。我认为这与误差和权重有关。我一直在用打印语句运行网络,看看权重发生了什么,并注意到它们没有很好地更新,因为训练过程中的错误一直停留在0上,所以权重永远不会更新,但我不能100%确定这种情况是否总是发生。

如果有人知道我的权重和误差没有正确更新的原因,我将不胜感激。如果你运行代码,当你绘制两个聚类时,你会看到神经网络的输出不会在聚类之间产生彩色分离。工作的两个神经元架构的代码是相同的,但只是从代码中删除了额外的2个神经元。

这是网络的代码:

import numpy as np
import random
import gc
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter

nData = 2000 #2000 points used on each cluster for 4000 points total
nTrain = 1000 #Used for training loop and to create clusters
nEpoch = 1 #Initial epoch value
nTest = 2000 #Used for testing loop
#alpha = 0.001
#Initializing 2D array for x which will carry the x1 and x2 values
#Also creating the radius and theta values for the cluster data
std = 0.5
x = np.zeros((2*nData,2))
t = np.zeros((2*nData))
r = np.random.normal(0,std,2*nData);
theta = 2*np.pi*np.random.rand(2*nData);
#w11f and w12f are used to plot the value of weights w11 and w12 as they update
w11f = np.zeros(nEpoch*nTrain)
w12f = np.zeros(nEpoch*nTrain)
#Creating cluster 1 and target data
h = -6 + 12*np.random.rand(nData)
v = 5 + (h**2)/6
x[0:nData,0] = h + r[0:nData]*np.cos(theta[0:nData])
x[0:nData,1] = v + r[0:nData]*np.sin(theta[0:nData])
t[0:nData] = 0
#Creating cluster 2 and target data
h = -5 + 10*np.random.rand(nData)
v = 10 + (h**2)/4
x[nData:2*nData,0] = h + r[nData:2*nData]*np.cos(theta[nData:2*nData])
x[nData:2*nData,1] = v + r[nData:2*nData]*np.sin(theta[nData:2*nData])
t[nData:2*nData] = 1
#Normalization
x[:,0] = 1 + 0.1*x[:,0]
x[:,1] = 1 + 0.1*x[:,1]
#Parameter Initialization
w11 = 0.5 - np.random.rand();
w12 = 0.5 - np.random.rand();
w21 = 0.5 - np.random.rand();
w22 = 0.5 - np.random.rand();
w31 = 0.5 - np.random.rand();
w32 = 0.5 - np.random.rand();
w41 = 0.5 - np.random.rand();
w42 = 0.5 - np.random.rand();
b4 = 0.5 - np.random.rand();
b3 = 0.5 - np.random.rand();
b2 = 0.5 - np.random.rand();
b1 = 0.5 - np.random.rand();
ww1 = 0.5 - np.random.rand();
ww2 = 0.5 - np.random.rand();
ww3 = 0.5 - np.random.rand();
ww4 = 0.5 - np.random.rand();
bb = 0.5 - np.random.rand();
#Creating a list from 0 to 3999
a = range(0,2*nData)
#Creating a 3D array (tensor) to store all the error values at the end of each 50 iteration loop
er_List = np.zeros((14,50,6))
#Creating the final array to store the counter of successful error. These are errors under 0.05 in value
#the rows represent the alpha values from 0.001 to 0.05 and the columns represent each epoch from 1 to 6. This way you can view the 2D array and see which alpha and epoch give the most successes for the lowest error.
nSuccess_Array = np.zeros((14,6))

#Part B - Creating nested loops to train for multiple alpha and epoch value
#pairs
#Training
for l in range(0,14): #loop for alpha values
alpha = [0.001, 0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.02, 0.03, 0.04, 0.05]
nEpoch=1
for n in range(0,6): #loop for incrementing epoch values
nSuccess = 0
#Initialize these again so the size updates as the epoch changes
w11f = np.zeros(nEpoch*nTrain)
w12f = np.zeros(nEpoch*nTrain)
for j in range(0,50):
#Initialize the parameters again so they are random every 50 iterations (for each new epoch 
value)
w11 = 0.5 - np.random.rand();
w12 = 0.5 - np.random.rand();
w21 = 0.5 - np.random.rand();
w22 = 0.5 - np.random.rand();
w31 = 0.5 - np.random.rand();
w32 = 0.5 - np.random.rand();
w41 = 0.5 - np.random.rand();
w42 = 0.5 - np.random.rand();
b4 = 0.5 - np.random.rand();
b3 = 0.5 - np.random.rand();
b2 = 0.5 - np.random.rand();
b1 = 0.5 - np.random.rand();
ww1 = 0.5 - np.random.rand();
ww2 = 0.5 - np.random.rand();
ww3 = 0.5 - np.random.rand();
ww4 = 0.5 - np.random.rand();
bb = 0.5 - np.random.rand();

sp = random.sample(a,nTrain + nTest)
p = 0
for epoch in range(0,nEpoch):
for i in range(0,nTrain):
#Neuron dot product
y1 = b1 + w11*x[sp[i],0] + w12*x[sp[i],1]
y2 = b2 + w21*x[sp[i],0] + w22*x[sp[i],1]
y3 = b3 + w31*x[sp[i],0] + w32*x[sp[i],1]
y4 = b4 + w41*x[sp[i],0] + w42*x[sp[i],1]
#Neuron activation function ReLU
dxx1 = y1 > 0
xx1 = y1*dxx1

dxx2 = y2 > 0
xx2 = y2*dxx2

dxx3 = y3 > 0
xx3 = y3*dxx3

dxx4 = y4 > 0
xx4 = y4*dxx4
#Output of neural network before activation function
yy = bb + ww1*xx1 + ww2*xx2 + ww3*xx3 + ww4*xx4
yy = yy > 0 #activation function
e = t[sp[i]] - yy #error calculation

#Updating parameters
ww1 = ww1 + alpha[l]*e*xx1
ww2 = ww2 + alpha[l]*e*xx2
ww3 = ww3 + alpha[l]*e*xx3
ww4 = ww4 + alpha[l]*e*xx4

bb = bb + alpha[l]*e

w11 = w11 + alpha[l]*e*ww1*dxx1*x[sp[i],0]
w12 = w12 + alpha[l]*e*ww1*dxx1*x[sp[i],1]

w21 = w21 + alpha[l]*e*ww2*dxx2*x[sp[i],0]
w22 = w22 + alpha[l]*e*ww2*dxx2*x[sp[i],1]

w31 = w31 + alpha[l]*e*ww3*dxx3*x[sp[i],0]
w32 = w32 + alpha[l]*e*ww3*dxx3*x[sp[i],1]

w41 = w41 + alpha[l]*e*ww4*dxx4*x[sp[i],0]
w42 = w42 + alpha[l]*e*ww4*dxx4*x[sp[i],1]

b1 = b1 + alpha[l]*e*ww1*dxx1
b2 = b2 + alpha[l]*e*ww2*dxx2
b3 = b3 + alpha[l]*e*ww3*dxx3
b4 = b4 + alpha[l]*e*ww4*dxx4

w11f[p] = w11
w12f[p] = w12
p = p + 1
er = 0
#Training
for k in range(nTrain,nTrain + nTest):
y1 = b1 + w11*x[sp[i],0] + w12*x[sp[i],1]
y2 = b2 + w21*x[sp[i],0] + w22*x[sp[i],1]
y3 = b3 + w31*x[sp[i],0] + w32*x[sp[i],1]
y4 = b4 + w41*x[sp[i],0] + w42*x[sp[i],1]

dxx1 = y1 > 0
xx1 = y1*dxx1

dxx2 = y2 > 0
xx2 = y2*dxx2

dxx3 = y3 > 0
xx3 = y3*dxx3

dxx4 = y4 > 0
xx4 = y4*dxx4

yy = bb + ww1*xx1 + ww2*xx2 + ww3*xx3 + ww4*xx4
yy = yy > 0
e = abs(t[sp[k]] - yy)
er = er + e #Accumulates error
er = er/nTest #Calculates average error
er_List[l,j,n] = er

if er_List[l,j,n] < 0.05:
nSuccess = nSuccess + 1
#Part C - Creating an Array that contains the success values of each
#alpha and epoch value pair
nSuccess_Array[l,n] = nSuccess #Array that contains the success

if nEpoch < 6:
nEpoch = nEpoch +1

print(er)
#Plotting
if er < 0.5:
plt.figure(1)
plt.scatter(x[0:nData,0],x[0:nData,1])
plt.scatter(x[nData:2*nData,0],x[nData:2*nData,1])

X = np.arange(0.25,1.75,0.02)
Y = np.arange(1.25,2.75,0.02)
X, Y = np.meshgrid(X,Y)

y1 = b1 + w11*X + w12*Y
y2 = b2 + w21*X + w22*Y
y3 = b3 + w31*X + w32*Y
y4 = b4 + w41*X + w42*Y

dxx1 = y1 > 0
xx1 = y1*dxx1

dxx2 = y2 > 0
xx2 = y2*dxx2

dxx3 = y3 > 0
xx3 = y3*dxx3    

dxx4 = y4 > 0
xx4 = y4*dxx4

yy = bb + ww1*xx1 + ww2*xx2 + ww3*xx3 + ww4*xx4
Z = yy > 0
plt.scatter(X,Y,c=Z+1,alpha=0.3)
plt.figure(2)
f=np.arange(0,nEpoch*nTrain,1)
plt.plot(f,w11f)

plt.figure(3)
plt.plot(f,w12f)

plt.figure(4)
ax = plt.axes(projection='3d')
ax.scatter(x[0:nData,0],x[0:nData,1],0,s=30)
ax.scatter(x[nData:2*nData,0],x[nData:2*nData,1],1,s=30)

#Plotting the separating planes
X = np.arange(0.25,1.75,0.02)
Y = np.arange(1.25,2.75,0.02)
X, Y = np.meshgrid(X,Y)

y1 = b1 + w11*X + w12*Y
y2 = b2 + w21*X + w22*Y
y3 = b3 + w31*X + w32*Y
y4 = b4 + w41*X + w42*Y

dxx1 = y1 > 0
xx1 = y1*dxx1

dxx2 = y2 > 0
xx2 = y2*dxx2

dxx3 = y3 > 0
xx3 = y3*dxx3    

dxx4 = y4 > 0
xx4 = y4*dxx4

yy = bb + ww1*xx1 + ww2*xx2 + ww3*xx3 + ww4*xx4
Z = yy > 0
ax.plot_surface(X,Y,Z,rstride=1, cstride=1,cmap='viridis',alpha=0.5)

plt.figure(5)
ax = plt.axes(projection='3d')
X = np.arange(0,5,0.02)
Y = np.arange(0,5,0.02)
X, Y = np.meshgrid(X,Y)

y1 = b1 + w11*X + w12*Y
y2 = b2 + w21*X + w22*Y
y3 = b3 + w31*X + w32*Y
y4 = b4 + w41*X + w42*Y

dxx1 = y1 > 0
xx1 = y1*dxx1

dxx2 = y2 > 0
xx2 = y2*dxx2

dxx3 = y3 > 0
xx3 = y3*dxx3    

dxx4 = y4 > 0
xx4 = y4*dxx4

yy = bb + ww1*xx1 + ww2*xx2 + ww3*xx3 + ww4*xx4
ax.plot_surface(X, Y, yy, rstride=1, cstride=1,cmap='viridis', edgecolor='none')

是的,您可以使用np.matmul(a@b(手动计算梯度。查看Fastai v3课程,第2部分https://course.fast.ai/videos/?lesson=8.Jeremy Howard操纵PyTorch张量,但你也可以在NumPy中操作。

最新更新