批处理标准化并不能保存其移动均值和移动方差
当我训练时,我会完美地贴上培训数据(如预期的)。通过批准化,训练速度也更快,也是预期的。但是,当训练步骤后,我立即在上运行相同的模型在相同的数据带有" IS_Training" = false的数据上,它给出了非常较低的结果。此外,每次我查看Move_Mean和Move_variance时,它们都是它们的默认值。他们从不更新。
(u'main/y/y/moving_mean:0', array([ 0., 0.], dtype=float32))
(u'main/y/y/moving_variance:0', array([ 1., 1.], dtype=float32))
(u'main/y/y/moving_mean:0', array([ 0., 0.], dtype=float32))
(u'main/y/y/moving_variance:0', array([ 1., 1.], dtype=float32))
700 with generated means (training = true} 1.0 with saved means {training = false} 0.4911
我已经准备好了update_ops代码,但似乎并没有解决问题。update_collections = none使它发挥作用,但是由于性能原因,我被告知这是一个次优的解决方案。
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
if update_ops:
updates = tf.group(*update_ops)
cost = with_dependencies([updates], cost)
我的代码在
下面import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected, softmax, batch_norm
from tensorflow.python.ops.control_flow_ops import with_dependencies
from tensorflow.python.training.adam import AdamOptimizer
batch_size = 100
input_size = 10
noise_strength = 4
class Data(object):
def __init__(self,obs,gold):
self.obs=obs
self.gold=gold
def generate_data(batch_size,input_size,noise_strength):
input = np.random.rand(batch_size, input_size) * noise_strength
gold = np.random.randint(0, 2, (input_size,1))
input = input + gold
return Data(input,gold)
def ffnn_model(inputs,num_classes,batch_size,is_training,reuse=False):
output = fully_connected(inputs,
num_classes * 2,
activation_fn=None,
normalizer_fn=batch_norm,
normalizer_params={'is_training': is_training, 'reuse': reuse, 'scope': 'y'},
reuse=reuse,
scope='y'
)
y = softmax(tf.reshape(output, [batch_size, num_classes, 2]))
return y
#objective function
def objective_function(y,gold):
indices = tf.stack([tf.range(tf.size(gold)),tf.reshape(gold,[-1])],axis=1)
scores = tf.gather_nd(tf.reshape(y,[-1,2]),indices=indices)
# return tf.cast(indices,tf.float32),-tf.reduce_mean(tf.log(scores+1e-6))
return -tf.reduce_mean(tf.log(scores+1e-6))
def train_op(y,gold):
cost = objective_function(y,gold)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
if update_ops:
print "yes to update_ops"
print update_ops
updates = tf.group(*update_ops)
cost = with_dependencies([updates], cost)
train_step = AdamOptimizer().minimize(cost)
return train_step
def predictions_op(y):
return tf.cast(tf.argmax(y, axis=len(y.get_shape()) - 1), dtype=tf.int32)
def accuracy_op(y,gold):
return tf.reduce_mean(tf.cast(tf.equal(predictions_op(y), gold),tf.float32))
def model(batch_size, num_classes, input_size, scope, reuse):
with tf.variable_scope(scope) as m:
if reuse:
m.reuse_variables()
is_training = tf.placeholder(tf.bool)
x = tf.placeholder(tf.float32, shape=[batch_size, input_size])
y = ffnn_model(x, num_classes=1, batch_size=batch_size, is_training=is_training, reuse=reuse)
g = tf.placeholder(tf.int32, shape=[batch_size, num_classes])
return g, x, y, is_training
def train(batch_size=100,input_size = 100):
scope = "main"
g, x, y, is_training = model(batch_size, 1, input_size, scope,reuse=None )
with tf.Session() as sess:
train_step, accuracy,predictions = train_op(y, g), accuracy_op(y, g), predictions_op(y)
cost_op = objective_function(y,g)
init_op = tf.group(tf.local_variables_initializer(), tf.global_variables_initializer())
sess.run(init_op)
accs = []
accs2 = []
costs = []
for i in range(10000):
data = generate_data(batch_size, input_size, noise_strength)
_,acc,cost = sess.run([train_step,accuracy,cost_op],feed_dict={x:data.obs,g:data.gold,is_training:True})
acc2 = sess.run(accuracy, feed_dict={x: data.obs, g: data.gold, is_training: False})
accs.append(acc)
accs2.append(acc2)
costs.append(cost)
if i%100 == 0:
# print scurrs
print i,"with generated means (training = true}",np.mean(accs[-100:]),"with saved means {training = false}",np.mean(accs2[-100:])
# print sess.run(predictions, feed_dict={x: data.obs, g: data.gold, is_training: False})
vars = [var for var in tf.global_variables() if 'moving' in var.name]
rv = sess.run(vars, {is_training: False})
rt = sess.run(vars, {is_training: True})
print"t".join([str((v.name, a)) for a, v in zip(rv, vars)]),
"n",
"t".join([str((v.name, a)) for a, v in zip(rt, vars)])
if __name__ == "__main__":
train()
批处理标准化会创建必须运行的操作,以更新值。也就是说,它也将它们添加到特定的集合中,如果您使用tf.contrib.layers.optimize_loss
功能,它会为您收集这些功能并在运行此操作时运行它们。
要解决,请替换:
train_step = AdamOptimizer().minimize(cost)
train_step = optimize_loss(loss, step, learning_rate, optimizer='ADAM')