Iris数据集上的神经网络收敛速度非常快



我的任务是在虹膜数据集上仅使用NumPy(没有TensorFlow, PyTorch等)编写ANN。我运行了2000个epoch,到epoch 40的时候,网络的精度似乎保持在0.66。此外,调试时的参数要么非常高,要么非常低(例如,对于self.layers[0],self.output参数在epoch 400上是[-59.2447737,-79.13719157,-57.27055739,117.26796309,127.71775426])。

我的网络有4个输入节点,一个包含5个节点的单个隐藏层和一个包含3个节点的输出层,对应3种虹膜。

我不明白为什么会这样。学习率低(0.01),weightsbiases向量初始化值低,输入数据归一化。

任何帮助,这将是非常感谢。我的代码:

main.py:

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from network import NeuralNetwork
from layer import Layer
if __name__ == "__main__":
iris = load_iris() 
data, target, target_names = iris.data, iris.target, iris.target_names
scaler = StandardScaler()
# One hot encoding to ap the target array to match the 3 neurons output structure
one_hot_targets = []
for i in range(len(target)):
vec = np.zeros(len(target_names))
vec[target[i]] = 1
one_hot_targets.append(vec)
one_hot_targets = np.array(one_hot_targets)

X_train, X_test, Y_train, Y_test = train_test_split(data, one_hot_targets, test_size=0.33, shuffle=True)
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)    
learning_rate = 0.01
# Init a network and add it's layers. Input layer is represented by the input, and not by an actual layer
network = NeuralNetwork(learning_rate)
network.add_layer(Layer(4, 5)) # hidden layer 1
network.add_layer(Layer(5, 3)) # output layer
# Train the network for a number of epochs
network.train(X_train_scaled, Y_train, epochs=2000)
# Test for the test data seperated earlier
output, accuracy = network.test(X_test_scaled, Y_test)
# Print testing output
for i in range(len(output)):
prediction = target_names[np.argmax(output[i])]
answer = target_names[np.argmax(Y_test[i])]
print(f"For testing row: {X_test[i]}, the prediction was {prediction} and the answer was {answer}")
print(f"Network test accuracy: {accuracy:.4f}")

network.py:

import numpy as np
from utils import calc_error
np.random.seed(10)
class NeuralNetwork:
def __init__(self, learning_rate=0.1):
self.layers = []
self.learning_rate = learning_rate

def add_layer(self, layer): 
# Layers must be added in order
self.layers.append(layer)

def forward_propagate(self, input):
output = input
for layer in self.layers:
output = layer.forward_propagate(output)
return output

def back_propagate(self, error):
for layer in reversed(self.layers):
error = layer.back_propagate(error)
def train_iteration(self, input, target):
output = self.forward_propagate(input)
# Calculate the error between the output and the target value
error = output - target
# Backpropagate the error through the network
self.back_propagate(error)
# Update the weights and biases of the layers
for layer in self.layers:
layer.weights -= self.learning_rate * layer.d_weights
layer.biases -= self.learning_rate * layer.d_biases

def train_epoch(self, inputs, targets):
for i in range(len(inputs)):
x = inputs[i]
y = targets[i]
self.train_iteration(x, y)
def train(self, inputs, targets, epochs=4000):
for epoch in range(epochs):
self.train_epoch(inputs, targets)
if epoch % (epochs / 100) == 0:
_, accuracy = self.test(inputs, targets)
print(f"Epoch {epoch} --> Training Accuracy:{accuracy}")

def predict(self, input):
output = self.forward_propagate(input)
return output
def test(self, inputs, targets):
output, correct = [], 0
for i in range(len(inputs)):
x, y = inputs[i], targets[i]
guess = self.predict(x)
is_correct = y[guess.argmax()] == 1
correct += is_correct
output.append(guess)
return output, (correct / len(inputs))

layer.py:

import numpy as np
from utils import sigmoid, deriv_sigmoid
np.random.seed(10)
class Layer:
def __init__(self, num_inputs, num_neurons, activation_function=sigmoid, derivative_activation_function=deriv_sigmoid):
self.weights = np.random.randn(num_inputs, num_neurons) * 0.01
self.biases = np.zeros((1, num_neurons))
self.activation_function = activation_function
self.derivative_activation_function = derivative_activation_function

def forward_propagate(self, input):
self.input = input
self.output = np.dot(input, self.weights) + self.biases
self.activated_output = self.activation_function(self.output)
return self.activated_output

def back_propagate(self, error): 
error = self.derivative_activation_function(error)
reshaped_input = self.input.T.reshape((np.max(self.input.shape), 1)) # ensures dot product always works
self.d_weights = np.dot(reshaped_input, error) 
self.d_biases = np.sum(error, axis=0, keepdims=True)
self.d_input = np.dot(error, self.weights.T)
return self.d_input

utils.py:

import numpy as np
def sigmoid(x):
return (1 / (1 + np.exp(-x)))
def deriv_sigmoid(x):
return np.multiply(x, 1-x)

你的终端层应该使用softmax激活,因为你有三个类。你的第一层应该使用relu/leaky relu激活。你还需要给出它们各自的导数函数

Sigmoid仅适用于端层有1个神经元的二元类。中间层不能有Sigmoid激活函数

为了更清楚地说明这一点,你在输出层有3个神经元。所以你期待一个来自3个神经元的信号以此为基础,你可以决定一个记录的预测类别。这个信号以对数的形式出现。当logits通过softmax激活时,它们被转换为概率值,每个类一个,例如[0.1,0.6,0.2]。基于此,由于索引1的概率最高,因此预测的类为2。

现在来解决您在尝试实现softmax激活时将面临的问题。softmax的导数是包含偏导数的雅可比矩阵。

对于手头的问题,实现偏导数有点过头了。您可以安全地假设预测和一个热编码目标之间的差异为具有softmax的结束层的梯度。

祝你学习ML一切顺利。

最新更新