Tensorflow中的批归归量层没有更新其移动均值和移动方差

批处理标准化并不能保存其移动均值和移动方差

当我训练时，我会完美地贴上培训数据（如预期的）。通过批准化，训练速度也更快，也是预期的。但是，当训练步骤后，我立即在上运行相同的模型在相同的数据带有" IS_Training" = false的数据上，它给出了非常较低的结果。此外，每次我查看Move_Mean和Move_variance时，它们都是它们的默认值。他们从不更新。

(u'main/y/y/moving_mean:0', array([ 0.,  0.], dtype=float32))   
(u'main/y/y/moving_variance:0', array([ 1.,  1.], dtype=float32)) 
(u'main/y/y/moving_mean:0', array([ 0.,  0.], dtype=float32))
(u'main/y/y/moving_variance:0', array([ 1.,  1.], dtype=float32))
700 with generated means (training = true} 1.0 with saved means {training = false} 0.4911

我已经准备好了update_ops代码，但似乎并没有解决问题。update_collections = none使它发挥作用，但是由于性能原因，我被告知这是一个次优的解决方案。

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    if update_ops:
        updates = tf.group(*update_ops)
        cost = with_dependencies([updates], cost)

我的代码在

下面

import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected, softmax, batch_norm
from tensorflow.python.ops.control_flow_ops import with_dependencies
from tensorflow.python.training.adam import AdamOptimizer
batch_size = 100
input_size = 10
noise_strength = 4
class Data(object):
    def __init__(self,obs,gold):
        self.obs=obs
        self.gold=gold
def generate_data(batch_size,input_size,noise_strength):
    input = np.random.rand(batch_size, input_size) * noise_strength
    gold = np.random.randint(0, 2, (input_size,1))
    input = input + gold
    return Data(input,gold)

def ffnn_model(inputs,num_classes,batch_size,is_training,reuse=False):
    output = fully_connected(inputs,
                             num_classes * 2,
                             activation_fn=None,
                             normalizer_fn=batch_norm,
                             normalizer_params={'is_training': is_training, 'reuse': reuse, 'scope': 'y'},
                             reuse=reuse,
                             scope='y'
                             )
    y = softmax(tf.reshape(output, [batch_size, num_classes, 2]))
    return y

#objective function
def objective_function(y,gold):
    indices = tf.stack([tf.range(tf.size(gold)),tf.reshape(gold,[-1])],axis=1)
    scores = tf.gather_nd(tf.reshape(y,[-1,2]),indices=indices)
    # return tf.cast(indices,tf.float32),-tf.reduce_mean(tf.log(scores+1e-6))
    return -tf.reduce_mean(tf.log(scores+1e-6))
def train_op(y,gold):
    cost = objective_function(y,gold)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    if update_ops:
        print "yes to update_ops"
        print update_ops
        updates = tf.group(*update_ops)
        cost = with_dependencies([updates], cost)
    train_step = AdamOptimizer().minimize(cost)
    return train_step
def predictions_op(y):
    return tf.cast(tf.argmax(y, axis=len(y.get_shape()) - 1), dtype=tf.int32)
def accuracy_op(y,gold):
    return tf.reduce_mean(tf.cast(tf.equal(predictions_op(y), gold),tf.float32))
def model(batch_size, num_classes, input_size, scope, reuse):
    with tf.variable_scope(scope) as m:
        if reuse:
            m.reuse_variables()
        is_training = tf.placeholder(tf.bool)
        x = tf.placeholder(tf.float32, shape=[batch_size, input_size])
        y = ffnn_model(x, num_classes=1, batch_size=batch_size, is_training=is_training, reuse=reuse)
        g = tf.placeholder(tf.int32, shape=[batch_size, num_classes])
        return g, x, y, is_training
def train(batch_size=100,input_size = 100):
    scope = "main"
    g, x, y, is_training = model(batch_size, 1, input_size, scope,reuse=None )
    with tf.Session() as sess:
        train_step, accuracy,predictions = train_op(y, g), accuracy_op(y, g), predictions_op(y)
        cost_op = objective_function(y,g)
        init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
        sess.run(init_op)
        accs = []
        accs2 = []
        costs = []
        for i in range(10000):
            data = generate_data(batch_size, input_size, noise_strength)
            _,acc,cost = sess.run([train_step,accuracy,cost_op],feed_dict={x:data.obs,g:data.gold,is_training:True})
            acc2 = sess.run(accuracy, feed_dict={x: data.obs, g: data.gold, is_training: False})
            accs.append(acc)
            accs2.append(acc2)
            costs.append(cost)
            if i%100 == 0:
                # print scurrs
                print i,"with generated means (training = true}",np.mean(accs[-100:]),"with saved means {training = false}",np.mean(accs2[-100:])
                # print sess.run(predictions, feed_dict={x: data.obs, g: data.gold, is_training: False})
                vars = [var for var in tf.global_variables() if 'moving' in var.name]
                rv = sess.run(vars, {is_training: False})
                rt = sess.run(vars, {is_training: True})
                print"t".join([str((v.name, a)) for a, v in zip(rv, vars)]), 
                    "n", 
                    "t".join([str((v.name, a)) for a, v in zip(rt, vars)])

if __name__ == "__main__":
    train()

批处理标准化会创建必须运行的操作，以更新值。也就是说，它也将它们添加到特定的集合中，如果您使用tf.contrib.layers.optimize_loss功能，它会为您收集这些功能并在运行此操作时运行它们。

要解决，请替换：

    train_step = AdamOptimizer().minimize(cost)

    train_step = optimize_loss(loss, step, learning_rate, optimizer='ADAM')

相关内容

最新更新

热门标签：