仅带向量的MLP和反向问题



我对人工智能感兴趣,并开始学习它。我试图实现一个基于向量的MLP类,但它不能正常工作。

前馈函数似乎是可以的,但显然我对反向传播算法缺乏理解。训练函数是一个虚拟的,为了测试异或情况,网络总是返回相同的结果(在我的情况下~0.625915),当输入为1和0(期望1作为输出)时,将错误设置为0.264518,当输入为1和1(期望0作为输出)时,将错误设置为0.442609。

我想知道我做错了什么反向传播和下降梯度的东西。下面是这个类的完整代码和主要函数。谢谢你的帮助和你的亮点!

#include <iostream>
#include <vector>
#include <cassert>
#include <functional>
#include <stdlib.h>

using namespace std;
typedef function<double(double, bool)> func;
typedef vector < vector < vector<double> > > Matrix3d;

class Net {
public:
Net(const vector<unsigned> &topology, vector<func> &fns) {
learning_rate = 0.1;
alpha = 0.5;
global_error = 1.0;
activationFns = fns;
nbLayers = topology.size();
lastLayerId = nbLayers - 1;
gradients.resize(nbLayers);
neuron_errors.resize(nbLayers);
layers.resize(nbLayers);
weights.resize(nbLayers);
wdeltas.resize(nbLayers);
for (unsigned layerNum = 0; layerNum < nbLayers; layerNum++) {
bool isLastLayer = layerNum == lastLayerId;
unsigned nbNeuronsInLayer = isLastLayer ? topology[layerNum] : topology[layerNum] + 1;
unsigned nbWeights = isLastLayer ? 0 : topology[layerNum + 1] + 1;
gradients[layerNum].resize(nbNeuronsInLayer, 0.0);
layers[layerNum].resize(nbNeuronsInLayer);
weights[layerNum].resize(nbNeuronsInLayer);
wdeltas[layerNum].resize(nbNeuronsInLayer);
neuron_errors[layerNum].resize(nbNeuronsInLayer, 0.0);
if (! isLastLayer) {
layers[layerNum][nbNeuronsInLayer-1] = 1.0; // initialisation du bias 
}
for (unsigned n = 0; n < weights[layerNum].size(); n++) {
weights[layerNum][n].resize(nbWeights); // On affecte le nombre de neurones du layer suivant : nombre de weights de ce neurone 
wdeltas[layerNum][n].resize(nbWeights, 0.0);
InitialiseWeights(weights[layerNum][n]); // on randomise les weights des neurones de ce layer 
}
}
};
~Net() {
gradients.clear();
layers.clear();
weights.clear();
wdeltas.clear();
neuron_errors.clear();
};

// on propage à travers le réseau 
// lors du feed forward, output vaut = activationFn(somme des entrées des neurones * leurs poids)
// pour chaque neurone du layer précédent :
// on prend sa sortie : prevLayer[n] qu'on multiplie par le poids vers le neurone i du layer actuel 
void FeedForward(const vector<double> &inputs) {
assert(inputs.size() == layers[0].size() - 1);
// on assigne les entrées aux sorties des neurones du layer INPUT 
for (unsigned i = 0; i < inputs.size(); i++) {
layers[0][i] = inputs[i];
}
for (unsigned layerNum = 1; layerNum < nbLayers; layerNum++) {
vector<double> &prevLayer = layers[layerNum - 1];    

const bool isLastLayer = layerNum == lastLayerId;
const unsigned forcap = isLastLayer ? layers[layerNum].size() : layers[layerNum].size() - 1;

for (unsigned i = 0; i < forcap; i++) {
const double bias = prevLayer[prevLayer.size()-1] * weights[layerNum-1][weights[layerNum-1].size()-1][i];
double output = 0.0; 
for (unsigned n = 0; n < prevLayer.size() - 1; n++) {
output += prevLayer[n] * weights[layerNum - 1][n][i];
}
output += bias;
layers[layerNum][i] = activationFns[layerNum - 1](output, false);
}
}
//Print();
};

void BackPropagate(const vector<double> &targets) {
vector<double> &guessed = layers[lastLayerId];
func &outputActivationFn = activationFns[lastLayerId];
assert(targets.size() == guessed.size());
global_error = 0.0;
// Calcul des erreurs de la couche OUTPUT //
for (unsigned t = 0; t < targets.size(); t++) {
double diff_ =  targets[t] - guessed[t];
global_error += (diff_ * diff_); 
neuron_errors[lastLayerId][t] = targets[t] - guessed[t]; // l'erreur du neurone de sortie
gradients[lastLayerId][t] = diff_ * outputActivationFn(guessed[t], true);
}
if (guessed.size() > 1)
global_error /= guessed.size()-1;
else
global_error *= 0.5;
global_error = sqrt(global_error);
// Calcul des erreurs des neurones des autres couches 
for (unsigned l = nbLayers - 2; l < nbLayers; --l) {
// récupérer les weights reliant la couche hidden à la couche output 
for (unsigned n = 0; n < layers[l].size(); n++) { // pour chaque neurone de cette couche 
neuron_errors[l][n] = 0.0;
for (unsigned m = 0; m < layers[l+1].size(); m++) { // on target le neurone m de la couche supérieure
double &weight = weights[l][n][m];
// là on peut calculer l'erreur du neurone n
neuron_errors[l][n] += weight * gradients[l+1][m];
}
gradients[l][n] = neuron_errors[l][n] * activationFns[l](layers[l][n], true); // ?
}
}
// Mise à jour des weights (?)
for (unsigned l = nbLayers - 2; l < nbLayers; --l) {
for (unsigned n = 0; n < layers[l].size(); n++) {
for (unsigned m = 0; m < layers[l + 1].size(); m++) {
weights[l][n][m] -= (learning_rate * gradients[l][n] * layers[l][n]) + (wdeltas[l][n][m] * alpha);
wdeltas[l][n][m] = (learning_rate * gradients[l][n] * layers[l][n]) + (wdeltas[l][n][m] * alpha);
}
}
}
};

void GetResults(vector<double> &results) {
results.clear();
for (unsigned i = 0; i < layers[lastLayerId].size(); i++) {
results[i] = layers[lastLayerId][i];
}
};

void Train() {
vector < vector<double> > ins = {
{ 1.0, 0.0 },
{ 0.0, 1.0 },
{ 0.0, 0.0 },
{ 1.0, 1.0 }
};
vector < vector<double> > outs = {
{ 1.0 },
{ 1.0 },
{ 0.0 },
{ 0.0 }
};
for (unsigned i = 0; i < 1000; i++) {
unsigned r = rand() % ins.size();
vector<double> k = ins[r];
vector<double> o = outs[r];
FeedForward(k);
BackPropagate(o);
cout << "[" << i << "] " << k[0] << " & " << k[1] << " -> " << o[0] << "tresult : " << layers[lastLayerId][0] << "terror = " << global_error << endl;
}

cout << endl << "Test: [ 1 , 0 ]" << endl;
FeedForward({ 1.0, 0.0 });
BackPropagate({ 1.0 });
cout << "Result : " << layers[lastLayerId][0] << "t(error = " << global_error << endl;
cout << "Test: [ 1 , 1 ]" << endl;
FeedForward({ 0.85, 0.99 });
BackPropagate({ 0.0 });
cout << "Result : " << layers[lastLayerId][0] << "t(error = " << global_error << endl;
};

double Getglobal_error(void) const {
return global_error;
};
void Print(void) {
for (unsigned l = 0; l < nbLayers; l++) {
cout << "Layer " << l << " : " << endl;
for (unsigned n = 0; n < layers[l].size(); n++) {
cout << "t" << "Neuron " << l << "-" << n << " : ";
cout << "(" << layers[l][n] << ")" << endl;
for (unsigned w = 0; w < weights[l][n].size(); w++) {
cout << "tt" << l << "-" << n << " -> " << (l+1) << "-" << w << " | weight=" << weights[l][n][w] << endl;
}
}
}
}
private:
void InitialiseWeights(vector<double> &weights_) {
for (unsigned w = 0; w < weights_.size(); w++) {
weights_[w] = ((double) rand() / (RAND_MAX));
}
}
double global_error;
double learning_rate;
double alpha;
unsigned nbLayers;
unsigned lastLayerId;
vector<func> activationFns;
vector< vector<double> > gradients; // [layerNum][neuronNum] gradients des erreurs des neurones 
vector< vector<double> > layers; // [layerNum][neuronNum]
vector< vector<double> > neuron_errors; // [layerNum][neuronNum] // erreur des neurones 
Matrix3d weights; // [layer][neuron][outputWeight]
Matrix3d wdeltas; // [layer][neuron][outputWeight]
};


double transfer_tanh(double x, bool isDerivative) {
if (isDerivative) {
return 1.0 - (tanh(x) * tanh(x));
}
return tanh(x);
}
double transfer_sigmoid(double x, bool isDerivative) {
if (isDerivative) {
return x * (1.0 - x);
}
return 1.0 / (1.0 + exp(-x));
}

int main () {
vector<unsigned> topo = { 2, 2, 1 };
vector<func> funcs = { transfer_sigmoid, transfer_sigmoid, transfer_sigmoid };
Net mynet(topo, funcs);

/*
mynet.FeedForward({ 1.0, 0.0 });
mynet.BackPropagate({ 1.0 });
mynet.Print();
mynet.FeedForward({ 1.0, 0.0 });
mynet.BackPropagate({ 1.0 });
mynet.Print();
*/
mynet.Train();
}

我对反向道具的数学理解不足。多亏了这个资源:https://pabloinsente.github.io/the-multilayer-perceptron,我想出了这个反向传播方法:

void BackPropagate(const vector<double> &targets) {
assert(targets.size() == layers[lastLayerId].size());
global_error = 0.0;
for (unsigned l = lastLayerId; l < nbLayers; --l) {
for (unsigned n = 0; n < layers[l].size(); n++) {
neuron_errors[l][n] = 0.0;
if (l == lastLayerId) { // couche output
global_error += (targets[n] - layers[lastLayerId][n]) * (targets[n] - layers[lastLayerId][n]);
neuron_errors[lastLayerId][n] = (targets[n] - layers[lastLayerId][n]) * activationFns[lastLayerId](layers[lastLayerId][n], true);
continue;
}
for (unsigned m = 0; m < layers[l + 1].size(); m++) {
double neuron_output = (l == 0) ? inputs[n] : layers[l][n];
double delta = learning_rate * (neuron_errors[l + 1][m] * neuron_output);
neuron_errors[l][n] += (neuron_errors[l + 1][m] * weights[l][n][m]) 
* activationFns[l](layers[l][n], true);
weights[l][n][m] += delta + (wdeltas[l][n][m] * alpha);
wdeltas[l][n][m] = delta;
}
}
}
}

最新更新