Julia神经网络代码与PyPy速度相同



所以我用Python重写了一些神经网络代码。直接的Python代码运行大约7秒,而Julia和PyPy代码运行大约0.75秒

sigmoid(z::Float64) = 1/(1 + exp(-z))
sigmoidPrime(z::Float64) = sigmoid(z) * (1 - sigmoid(z))
### Types ###
abstract AbstractNode
type Edge
    source::AbstractNode
    target::AbstractNode
    weight::Float64
    derivative::Float64
    augmented::Bool
    Edge(source::AbstractNode, target::AbstractNode) = new(source, target, randn(1,1)[1], 0.0, false)
end
type Node <: AbstractNode
    incomingEdges::Vector{Edge}
    outgoingEdges::Vector{Edge}
    activation::Float64
    activationPrime::Float64
    Node() = new([], [], -1.0, -1.0)
end
type InputNode <: AbstractNode
    index::Int
    incomingEdges::Vector{Edge}
    outgoingEdges::Vector{Edge}
    activation::Float64
    InputNode(index::Int) = new(index, [], [], -1.0)
end
type BiasNode <: AbstractNode
    incomingEdges::Vector{Edge}
    outgoingEdges::Vector{Edge}
    activation::Float64
    BiasNode() = new([], [], 1.0)
end
type Network
    inputNodes::Vector{InputNode}
    hiddenNodes::Vector{Node}
    outputNodes::Vector{Node}
    function Network(sizes::Array, bias::Bool=true)
        inputNodes = [InputNode(i) for i in 1:sizes[1]];
        hiddenNodes = [Node() for _ in 1:sizes[2]];
        outputNodes = [Node() for _ in 1:sizes[3]];
        for inputNode in inputNodes
            for node in hiddenNodes
                edge = Edge(inputNode, node);
                push!(inputNode.outgoingEdges, edge)
                push!(node.incomingEdges, edge)
            end
        end
        for node in hiddenNodes
            for outputNode in outputNodes
                edge = Edge(node, outputNode);
                push!(node.outgoingEdges, edge)
                push!(outputNode.incomingEdges, edge)
            end
        end
        if bias == true
            biasNode = BiasNode()
            for node in hiddenNodes
                edge = Edge(biasNode, node);
                push!(biasNode.outgoingEdges, edge)
                push!(node.incomingEdges, edge)
            end
        end
        new(inputNodes, hiddenNodes, outputNodes)
    end
end

### Methods ###
function evaluate(obj::Node, inputVector::Array)
    if obj.activation > -0.5
        return obj.activation
    else
        weightedSum = sum([d.weight * evaluate(d.source, inputVector) for d in obj.incomingEdges])
        obj.activation = sigmoid(weightedSum)
        obj.activationPrime = sigmoidPrime(weightedSum)
        return obj.activation
    end
end
function evaluate(obj::InputNode, inputVector::Array)
    obj.activation = inputVector[obj.index]
    return obj.activation
end
function evaluate(obj::BiasNode, inputVector::Array)
    obj.activation = 1.0
    return obj.activation
end
function updateWeights(obj::AbstractNode, learningRate::Float64)
    for d in obj.incomingEdges
        if d.augmented == false
            d.augmented = true
            d.weight -= learningRate * d.derivative
            updateWeights(d.source, learningRate)
            d.derivative = 0.0
        end
    end
end
function compute(obj::Network, inputVector::Array)
    output = [evaluate(node, inputVector) for node in obj.outputNodes]
    for node in obj.outputNodes
        clear(node)
    end
    return output
end
function clear(obj::AbstractNode)
    for d in obj.incomingEdges
        obj.activation = -1.0
        obj.activationPrime = -1.0
        d.augmented = false
        clear(d.source)
    end
end
function propagateDerivatives(obj::AbstractNode, error::Float64)
    for d in obj.incomingEdges
        if d.augmented == false
            d.augmented = true
            d.derivative += error * obj.activationPrime * d.source.activation
            propagateDerivatives(d.source, error * d.weight * obj.activationPrime)
        end
    end
end
function backpropagation(obj::Network, example::Array)
    output = [evaluate(node, example[1]) for node in obj.outputNodes]
    error = output - example[2]
    for (node, err) in zip(obj.outputNodes, error)
        propagateDerivatives(node, err)
    end
    for node in obj.outputNodes
        clear(node)
    end
end
function train(obj::Network, labeledExamples::Array, learningRate::Float64=0.7, iterations::Int=10000)
    for _ in 1:iterations
        for ex in labeledExamples
            backpropagation(obj, ex)
        end
        for node in obj.outputNodes
            updateWeights(node, learningRate)
        end
        for node in obj.outputNodes
            clear(node)
        end
    end
end

labeledExamples = Array[Array[[0,0,0], [0]],
                        Array[[0,0,1], [1]],
                        Array[[0,1,0], [0]],
                        Array[[0,1,1], [1]],
                        Array[[1,0,0], [0]],
                        Array[[1,0,1], [1]],
                        Array[[1,1,0], [1]],
                        Array[[1,1,1], [0]]];
neuralnetwork = Network([3,4,1])
@time train(neuralnetwork, labeledExamples)

我没有提供Python代码,因为我不确定它是否必要(然而,如果你真的想要的话,我会的),我当然不希望一个人花很多时间来完全理解这段代码,我只是在寻找与适当的Julia实现相关的明显/系统的低效率(而不是算法本身)。

我这样做的动机是,用这种方式设计一个神经网络比向量化算法和使用Numpy要自然得多,但当然,在Python中,所有围绕类结构的循环和跳跃都很慢。

因此,这似乎是移植到Julia的一个自然选择,看看我是否能得到一些主要的速度提升,虽然一个数量级的速度比直接Python很酷,但我真正希望的是一个数量级的速度比PyPy(我在网上找到的一些基准测试似乎表明这是一个合理的期望)。

注意:这必须在Julia 0.3中运行才能工作

这看起来更像是一个代码审查而不是一个问题(没有任何问号),但我还是要尝试一下。唯一明显的潜在性能问题是,您通过evaluatecomputebackpropagation中的推导式分配数组。作为for循环,evaluate中的加权和计算要高效得多。对于另外两个方法,您可能希望使用预分配的数组而不是推导式。您可以使用Julia的内置分析器来查看代码在哪里花费了大部分时间——这可能会揭示一些不明显的热点,您可以进一步优化。

关于与PyPy的比较,很有可能Julia和PyPy在这段代码上都做得很好——达到或接近C的性能——在这种情况下,你不会期望Julia比PyPy快得多,因为它们都接近于最佳。比较C实现的性能将是非常有用的,因为它将显示Julia和PyPy在表上留下了多少性能。幸运的是,这段代码看起来很容易移植到c语言中。

最新更新