我的任务是在虹膜数据集上仅使用NumPy(没有TensorFlow, PyTorch等)编写ANN。我运行了2000个epoch,到epoch 40的时候,网络的精度似乎保持在0.66。此外,调试时的参数要么非常高,要么非常低(例如,对于self.layers[0]
,self.output
参数在epoch 400上是[-59.2447737,-79.13719157,-57.27055739,117.26796309,127.71775426]
)。
我的网络有4个输入节点,一个包含5个节点的单个隐藏层和一个包含3个节点的输出层,对应3种虹膜。
我不明白为什么会这样。学习率低(0.01),weights
和biases
向量初始化值低,输入数据归一化。
任何帮助,这将是非常感谢。我的代码:
main.py:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from network import NeuralNetwork
from layer import Layer
if __name__ == "__main__":
iris = load_iris()
data, target, target_names = iris.data, iris.target, iris.target_names
scaler = StandardScaler()
# One hot encoding to ap the target array to match the 3 neurons output structure
one_hot_targets = []
for i in range(len(target)):
vec = np.zeros(len(target_names))
vec[target[i]] = 1
one_hot_targets.append(vec)
one_hot_targets = np.array(one_hot_targets)
X_train, X_test, Y_train, Y_test = train_test_split(data, one_hot_targets, test_size=0.33, shuffle=True)
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
learning_rate = 0.01
# Init a network and add it's layers. Input layer is represented by the input, and not by an actual layer
network = NeuralNetwork(learning_rate)
network.add_layer(Layer(4, 5)) # hidden layer 1
network.add_layer(Layer(5, 3)) # output layer
# Train the network for a number of epochs
network.train(X_train_scaled, Y_train, epochs=2000)
# Test for the test data seperated earlier
output, accuracy = network.test(X_test_scaled, Y_test)
# Print testing output
for i in range(len(output)):
prediction = target_names[np.argmax(output[i])]
answer = target_names[np.argmax(Y_test[i])]
print(f"For testing row: {X_test[i]}, the prediction was {prediction} and the answer was {answer}")
print(f"Network test accuracy: {accuracy:.4f}")
network.py:
import numpy as np
from utils import calc_error
np.random.seed(10)
class NeuralNetwork:
def __init__(self, learning_rate=0.1):
self.layers = []
self.learning_rate = learning_rate
def add_layer(self, layer):
# Layers must be added in order
self.layers.append(layer)
def forward_propagate(self, input):
output = input
for layer in self.layers:
output = layer.forward_propagate(output)
return output
def back_propagate(self, error):
for layer in reversed(self.layers):
error = layer.back_propagate(error)
def train_iteration(self, input, target):
output = self.forward_propagate(input)
# Calculate the error between the output and the target value
error = output - target
# Backpropagate the error through the network
self.back_propagate(error)
# Update the weights and biases of the layers
for layer in self.layers:
layer.weights -= self.learning_rate * layer.d_weights
layer.biases -= self.learning_rate * layer.d_biases
def train_epoch(self, inputs, targets):
for i in range(len(inputs)):
x = inputs[i]
y = targets[i]
self.train_iteration(x, y)
def train(self, inputs, targets, epochs=4000):
for epoch in range(epochs):
self.train_epoch(inputs, targets)
if epoch % (epochs / 100) == 0:
_, accuracy = self.test(inputs, targets)
print(f"Epoch {epoch} --> Training Accuracy:{accuracy}")
def predict(self, input):
output = self.forward_propagate(input)
return output
def test(self, inputs, targets):
output, correct = [], 0
for i in range(len(inputs)):
x, y = inputs[i], targets[i]
guess = self.predict(x)
is_correct = y[guess.argmax()] == 1
correct += is_correct
output.append(guess)
return output, (correct / len(inputs))
layer.py:
import numpy as np
from utils import sigmoid, deriv_sigmoid
np.random.seed(10)
class Layer:
def __init__(self, num_inputs, num_neurons, activation_function=sigmoid, derivative_activation_function=deriv_sigmoid):
self.weights = np.random.randn(num_inputs, num_neurons) * 0.01
self.biases = np.zeros((1, num_neurons))
self.activation_function = activation_function
self.derivative_activation_function = derivative_activation_function
def forward_propagate(self, input):
self.input = input
self.output = np.dot(input, self.weights) + self.biases
self.activated_output = self.activation_function(self.output)
return self.activated_output
def back_propagate(self, error):
error = self.derivative_activation_function(error)
reshaped_input = self.input.T.reshape((np.max(self.input.shape), 1)) # ensures dot product always works
self.d_weights = np.dot(reshaped_input, error)
self.d_biases = np.sum(error, axis=0, keepdims=True)
self.d_input = np.dot(error, self.weights.T)
return self.d_input
utils.py:
import numpy as np
def sigmoid(x):
return (1 / (1 + np.exp(-x)))
def deriv_sigmoid(x):
return np.multiply(x, 1-x)
你的终端层应该使用softmax激活,因为你有三个类。你的第一层应该使用relu/leaky relu激活。你还需要给出它们各自的导数函数
Sigmoid仅适用于端层有1个神经元的二元类。中间层不能有Sigmoid激活函数
为了更清楚地说明这一点,你在输出层有3个神经元。所以你期待一个来自3个神经元的信号以此为基础,你可以决定一个记录的预测类别。这个信号以对数的形式出现。当logits通过softmax激活时,它们被转换为概率值,每个类一个,例如[0.1,0.6,0.2]。基于此,由于索引1的概率最高,因此预测的类为2。
现在来解决您在尝试实现softmax激活时将面临的问题。softmax的导数是包含偏导数的雅可比矩阵。
对于手头的问题,实现偏导数有点过头了。您可以安全地假设预测和一个热编码目标之间的差异为具有softmax的结束层的梯度。
祝你学习ML一切顺利。