神经网络反向传播不起作用



我用JavaScript编码了一个神经网络,并实现了这里描述的反向传播算法。这是代码(打字稿):

/**
 * Net
 */

export class Net {
    private layers: Layer[] = [];
    private inputLayer: Layer;
    private outputLayer: Layer;
    public error: number = Infinity;
    private eta: number = 0.15;
    private alpha: number = 0.5;
    constructor(...topology: number[]) {
        topology.forEach((topologyLayer, iTL) => {
            var nextLayerNeuronNumber = topology[iTL + 1] || 0;
            this.layers.push(new Layer(topologyLayer, nextLayerNeuronNumber));
        });
        this.inputLayer = this.layers[0];
        this.outputLayer = this.layers[this.layers.length - 1];
    }
    public loadWeights(weights) {
        /*
        [
            [Layer
                [Node weights, ..., ...]
            ]
        ]
        */
        for (var iL = 0; iL < weights.length; iL++) {
            var neuronWeights = weights[iL];
            var layer = this.layers[iL];
            for (var iN = 0; iN < neuronWeights.length; iN++) {
                // Neuron
                var connections = neuronWeights[iN];
                for (var iC = 0; iC < connections.length; iC++) {
                    var connection = connections[iC];
                    this.layer(iL).neuron(iN).setWeights(iC, connection);
                }
            }
        }
    }

    public train(data: number[][], iterartions = 2000) {
        var inputs = this.inputLayer.neurons.length - 1;
        for (var ite = 0; ite < iterartions; ite++) {
            data.forEach(node => {
                var inputData = [];
                var outputData = [];
                for (var i = 0; i < node.length; i++) {
                    if (i < inputs) {
                        inputData.push(node[i])
                    } else {
                        outputData.push(node[i])
                    }
                }
                this.feedForward(...inputData);
                this.backProb(...outputData);

            });

        }

        return this.calcDataError(data);
    }
    private calcDataError(data){
        var overallDataErrorSum = 0;
        var inputs = this.inputLayer.neurons.length - 1;
        data.forEach(node => {
            var outputData = node.splice(inputs);
            var inputData = node;
            this.feedForward(...inputData);
            overallDataErrorSum += this.getNetError(outputData);
        });
        overallDataErrorSum /= data.length;
        return overallDataErrorSum;
    }
    public saveWeights() {
        // Ignore output layer
        var ret = []
        for (var iL = 0; iL < this.layers.length - 1; iL++) {
            var layer = this.layers[iL];
            var layer_ret = [];
            layer.neurons.forEach(neuron => {
                layer_ret.push(neuron.connections.map(c => c.weight));
            });
            ret.push(layer_ret);
        }
        return ret;
    }
    feedForward(...inputs: number[]) {
        if (inputs.length != this.inputLayer.neurons.length - 1) return false;
        this.inputLayer.neurons.forEach((neuron, i) => {
            if (!neuron.isBias) {
                neuron.output(inputs[i]);
            }
        });
        this.layers.forEach((layer, i) => {
            // Skip Input Layer
            if (i > 0) {
                var prevLayer = this.layers[i - 1]
                layer.neurons.forEach(neuron => {
                    neuron.calcOutput(prevLayer);
                });
            }
        });
    }
    public getNetError(targetVals) {
        // Calc delta error of outputs
        var deltas = [];
        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
                deltas.push(neuron.delta);
            }
        });
        deltas = deltas.map(d => Math.pow(d, 2));

        var sum = 0;
        deltas.forEach(d => sum += d);
        return sum / deltas.length;

    }
    backProb(...targetVals: number[]) {

        // Calc delta error of outputs
        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
            }
        });
        // Backprop delta error through hidden layers
        for (var iL = this.layers.length - 2; iL > 0; iL--) {
            var layer = this.layers[iL];
            var nextLayer = this.layers[iL + 1]
            layer.neurons.forEach(neuron => {
                neuron.calcHiddenDelta(nextLayer);
            });
        }
        // Update weights 
        for (var iL = 1; iL < this.layers.length; iL++) {
            var layer = this.layers[iL];
            var prevLayer = this.layers[iL - 1];
            layer.neurons.forEach(neuron => {
                if (!neuron.isBias) {
                    neuron.updateWeights(prevLayer, this.eta);
                }
            });
        }
        this.error = this.getNetError(targetVals);
        return this.error;
    }
    getOutputs(...inputs: number[]) {
        var ret = [];
        this.outputLayer.neurons.forEach(neuron => {
            if (!neuron.isBias) {
                ret.push(neuron.output())
            }
        });
        return ret;
    }
    getResults(...inputs: number[]) {
        this.feedForward(...inputs)
        return this.getOutputs();
    }
    layer(i) {
        return this.layers[i];
    }
}
/**
 * Layer
 */
class Layer {
    public neurons: Neuron[] = [];
    constructor(neuronNumber: number, nextLayerNeuronNumber: number) {
        for (var iN = 0; iN < neuronNumber + 1; iN++) {
            // +1 for bias neuron, which is last
            if (iN < neuronNumber) {
                // Create normal neuron
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, false));
            } else {
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, true));
            }
        }
    }
    neuron(i) {
        return this.neurons[i];
    }
    bias() {
        return this.neurons[this.neurons.length - 1];
    }
}
/**
 * Neuron
 */
class Neuron {
    public connections: Connection[] = [];
    private outputVal: number;
    public delta: number;
    constructor(outputsTo: number, private index, public isBias = false) {
        // Creates connections
        for (var c = 0; c < outputsTo; c++) {
            this.connections.push(new Connection());
        }
        this.outputVal = isBias ? 1 : 0;
    }
    calcOutput(prevLayer: Layer) {
        // Only calcOutput when neuron is not a bias neuron
        if (!this.isBias) {
            var sum = 0;
            prevLayer.neurons.forEach(prevLayerNeuron => {
                sum += prevLayerNeuron.output() * prevLayerNeuron.getWeights(this.index).weight;
            });
            this.output(this.activationFunction(sum));
        }
    }
    private activationFunction(x) {
        //return Math.tanh(x);
        return 1 / (1 + Math.exp(-x))
        //return x;
    };
    private activationFunctionDerivative(x) {
        // Small approximation of tanh derivative
        //return 1 - x * x
        // Sigmoid
        var s = this.activationFunction(x);
        return s * (1 - s);
        // With general derivative formula where h = 1e-10
        /*var h = 0.0001;
        var dx = ((this.activationFunction(x + h) - this.activationFunction(x))/h)
        return dx;*/
        //return 1
    };
    // Backprop // Todo // Understand

    public calcOutputDelta(targetVal) {
        // Bias output neurons do not have delta error
        if (!this.isBias) {
            this.delta = targetVal - this.output();
        }
    }
    public calcHiddenDelta(nextLayer: Layer) {
        var sum = 0;
        // Go through all neurons of next layer excluding bias
        nextLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                sum += neuron.delta * this.getWeights(iN).weight;
            }
        });
        this.delta = sum;
    }
    public updateWeights(prevLayer: Layer, eta: number) {
        prevLayer.neurons.forEach((neuron, iN) => {
            var weight = neuron.getWeights(this.index).weight;
            var newWeight =
                weight + // old weight
                eta *   // learning weight
                this.delta * // delta error
                this.activationFunctionDerivative(neuron.output())
            neuron.getWeights(this.index).weight = newWeight;
        });

    }

    // Backprop end
    output(s?) {
        if (s && !this.isBias) {
            this.outputVal = s;
            return this.outputVal;
        } else {
            return this.outputVal;
        }
    }
    getWeights(i) {
        return this.connections[i];
    }
    setWeights(i, s) {
        return this.connections[i].weight = s;
    }
}
/**
 * Connection
 */
class Connection {
    public weight: number;
    public deltaWeight: number;
    constructor() {
        this.weight = Math.random();
        this.deltaWeight = 0;
    }
}

当只为一组数据训练它时,它工作得很好。(此处示例)

import {Net} from './ml';
var myNet = new Net(2, 2, 2);

var weights = [
    [
        [0.15, 0.25],
        [0.20, 0.30],
        [0.35, 0.35]
    ],
    [
        [0.40, 0.50],
        [0.45, 0.55],
        [0.60, 0.60]
    ]
];
// Just loads the weights given in the example
myNet.loadWeights(weights)
var error = myNet.train([[0.05, 0.10, 0.01, 0.99]]);
console.log('Error: ', error);
console.log(myNet.getResults(0.05, 0.10));

控制台打印:

Error:  0.0000020735174706210714
[ 0.011556397089327321, 0.9886867357304885 ]

基本上,这很好,对吧?

然后,我想教网络XOR问题:

import {Net} from './ml';
var myNet = new Net(2, 3, 1);

var trainigData = [
    [0, 0, 0],
    [1, 0, 1],
    [0, 1, 1],
    [1, 1, 0]
]
var error = myNet.train(trainigData)
console.log('Error: ', error);
console.log('Input: 0, 0: ', myNet.getResults(0, 0));
console.log('Input: 1, 0: ', myNet.getResults(1, 0));

此处网络出现故障:

Error:  0.2500007370167383
Input: 0, 0:  [ 0.5008584967899313 ]
Input: 1, 0:  [ 0.5008584967899313 ]

我做错了什么?

首先对整个批次执行梯度检查(对计算批次梯度的函数进行测量),如果您还没有这样做的话。这将确保您知道问题所在。

如果梯度计算不正确,考虑到您的实现在单个数据集上工作,那么您很可能在向后传递中混合了一些值

如果梯度计算正确,则更新函数中存在错误。

在这里可以找到javaScript中神经网络反向传播的工作实现

以下是使用反向传播的trainStep函数的代码片段

    function trainStepBatch(details){
//we compute forward pass 
//for each training sample in the batch
//and stored in the batch array 
    var batch=[];
    var ks=[];
    for(var a=0;a<details.data.in.length;a++){
    var results=[];
    var k=1;
    results[0]={output:details.data.in[a]};
    for(var i=1;i<this.layers.length;i++){
        results[i]=layers[this.layers[i].type].evalForGrad(this.layers[i],results[i-1].output);
        k++;
    }
    batch[a]=results;
    ks[a]=k;
    }
//We compute the backward pass
//first derivative of the cost function given the output
    var grad=[];
    for(i in batch)grad[i]={grad:costs[details.cost].df(batch[i][ks[i]-1].output,details.data.out[i])};
//for each layer we compute the backwards pass
//on the results of all forward passes at a given layer
    for(var i=this.layers.length-1;i>0;i--){
    var grads=[];
    var test=true;
    for(a in batch){
        grads[a]=layers[this.layers[i].type].grad(this.layers[i],batch[a][i],batch[a][i-1],grad[a]);
        if(grads[a]==null)test=false;
        else grads[a].layer=i;
    }
//we perform the update
    if(test)stepBatch(this.layers[i].par,grads,details.stepSize);
    }
}

对于步骤批处理功能

function stepBatch(params,grads, stepSize){
for(i in params.w){
    for(j in params.w[i]){
        for(a in grads){
            params.w[i][j]-=stepSize*grads[a].dw[i][j];
        }
    }
}
for(i in params.b){
    for(a in grads){
        params[a]-=stepSize*grads[a].db[i];
    }
}
function stepBatch(params,grads, stepSize){
    for(i in params.w){
        for(j in params.w[i]){
            for(a in grads){
                params.w[i][j]-=stepSize*grads[a].dw[i][j];
            }
        }
    }
    for(i in params.b){
        for(a in grads){
            params[a]-=stepSize*grads[a].db[i];
        }
    }
}

最新更新