我正在尝试编写自己的批处理规范化代码。因此,我测试下面的代码。为了跟踪在线平均均值和方差,我将它们作为参数传递到 getsta() 函数中。但是,我发现"avg_mean"和"avg_variance"的名称发生了变化。虽然我可以稍后手动强制更改它们的名称,但似乎 Tensorflow 将每个名称视为一个单独的变量。
def getsta(x,avg_mean,avg_variance):
print('getsta start...')
decay=0.9
mean = tf.get_variable(
'mean', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
variance = tf.get_variable(
'howvariance', [1], tf.float32,
initializer=tf.constant_initializer(1.0, tf.float32))
'''
if (avg_mean == 0.0):
avg_mean = tf.get_variable(
'avg_mean', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
if (avg_variance == 0.0):
avg_variance = tf.get_variable(
'avg_variance', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
'''
mean, variance = tf.nn.moments(x, [0], name='moments')
avg_mean -= (1.0 - decay) * (avg_mean - mean)
avg_variance -= (1.0 - decay) * (avg_variance - variance)
return x, mean, variance, avg_mean, avg_variance
def train():
x1 = tf.constant([1,2,3,4], tf.float32)
x2 = tf.constant([5,6,7,8], tf.float32)
x3 = tf.constant([1,3,5,7], tf.float32)
x4 = tf.constant([4,8,12,16], tf.float32)
with tf.variable_scope("AAA") as scopeA:
avg_mean1 = tf.get_variable(
'test1', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
avg_variance1 = tf.get_variable(
'test2', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
print([avg_mean1.name, avg_variance1.name])
y1, mean1, variance1, avg_mean1, avg_variance1 = getsta(x1,avg_mean1,avg_variance1)
print([avg_mean1.name, avg_variance1.name])
scopeA.reuse_variables()
print([avg_mean1.name, avg_variance1.name])
y1, mean1, variance1, avg_mean1, avg_variance1 = getsta(x2,avg_mean1,avg_variance1)
print([avg_mean1.name, avg_variance1.name])
avg_mean1 = tf.identity(avg_mean1, name='avg_mean')
avg_variance1 = tf.identity(avg_variance1, name='avg_variance')
print([avg_mean1.name, avg_variance1.name])
with tf.variable_scope("BBB") as scopeB:
avg_mean2 = tf.get_variable(
'test1', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
avg_variance2 = tf.get_variable(
'test2', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
print([avg_mean2.name, avg_variance2.name])
y2, mean2, variance2, avg_mean2, avg_variance2 = getsta(x3, avg_mean2, avg_variance2)
print([avg_mean2.name, avg_variance2.name])
scopeB.reuse_variables()
print([avg_mean2.name, avg_variance2.name])
y2, mean2, variance2, avg_mean2, avg_variance2 = getsta(x4, avg_mean2, avg_variance2)
print([avg_mean2.name, avg_variance2.name])
avg_mean2 = tf.identity(avg_mean2, name='avg_mean')
avg_variance2 = tf.identity(avg_variance2, name='avg_variance')
print([avg_mean2.name, avg_variance2.name])
saver = tf.train.Saver()
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
print(sess.run([y1, mean1, variance1, avg_mean1, avg_variance1]))
print(sess.run([y2, mean2, variance2, avg_mean2, avg_variance2]))
allVars = tf.global_variables()
values = sess.run(allVars)
for var, val in zip(allVars, values):
print(var.name, val)
saver.save(sess, "/tmp/test_EMA/var_save.ckpt")
运行上面的 train() 函数后,我得到的输出为
['AAA/test1:0', 'AAA/test2:0']
getsta start...
['AAA/sub_1:0', 'AAA/sub_3:0']
['AAA/sub_1:0', 'AAA/sub_3:0']
getsta start...
['AAA/sub_5:0', 'AAA/sub_7:0']
['AAA/avg_mean:0', 'AAA/avg_variance:0']
['BBB/test1:0', 'BBB/test2:0']
getsta start...
['BBB/sub_1:0', 'BBB/sub_3:0']
['BBB/sub_1:0', 'BBB/sub_3:0']
getsta start...
['BBB/sub_5:0', 'BBB/sub_7:0']
['BBB/avg_mean:0', 'BBB/avg_variance:0']
[array([ 5., 6., 7., 8.], dtype=float32), 6.5, 1.25, array([ 0.875], dtype=float32), array([ 0.23750001], dtype=float32)]
[array([ 4., 8., 12., 16.], dtype=float32), 10.0, 20.0, array([ 1.36000001], dtype=float32), array([ 2.45000005], dtype=float32)]
AAA/test1:0 [ 0.]
AAA/test2:0 [ 0.]
AAA/mean:0 [ 0.]
AAA/howvariance:0 [ 1.]
BBB/test1:0 [ 0.]
BBB/test2:0 [ 0.]
BBB/mean:0 [ 0.]
BBB/howvariance:0 [ 1.]
我有几个问题:
- 有没有办法在不更改名称的情况下记录"avg_mean"?我想在加载模型时加载最终更新的值。
- 是否可以在 getsta() 函数中定义"avg_mean",同时仍然将其保留为静态变量,即它的初始值来自上次的调用,但并不总是从 0 开始。
当我打印出所有变量时,为什么名称和值似乎都不像输出中的最后八行那样正确?我希望输出是
AAA/avg_mean:0 [0.875] AAA/avg_variance:0 [0.23750001] BBB/avg_mean:0 [1.36000001] BBB/avg_variance:0 [2.45000005]
提前感谢您的帮助!
--------------------更新--------------------
我修改了我的代码如下
def train():
x1 = tf.constant([1,2,3,4], tf.float32)
x2 = tf.constant([5,6,7,8], tf.float32)
x3 = tf.constant([1,3,5,7], tf.float32)
x4 = tf.constant([4,8,12,16], tf.float32)
with tf.variable_scope("AAA") as scopeA:
avg_mean1 = tf.get_variable(
'avg_mean', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
avg_variance1 = tf.get_variable(
'avg_variance', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
y1, mean1, variance1 = getsta(x1,avg_mean1,avg_variance1)
scopeA.reuse_variables()
y1, mean1, variance1 = getsta(x2,avg_mean1,avg_variance1)
print([avg_mean1.name, avg_variance1.name])
with tf.variable_scope("BBB") as scopeB:
avg_mean2 = tf.get_variable(
'avg_mean', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
avg_variance2 = tf.get_variable(
'avg_variance', [1], tf.float32,
initializer=tf.constant_initializer(0.0, tf.float32))
y2, mean2, variance2 = getsta(x3, avg_mean2, avg_variance2)
scopeB.reuse_variables()
y2, mean2, variance2 = getsta(x4, avg_mean2, avg_variance2)
print([avg_mean2.name, avg_variance2.name])
saver = tf.train.Saver()
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
print(sess.run([y1, mean1, variance1, avg_mean1, avg_variance1]))
print(sess.run([y2, mean2, variance2, avg_mean2, avg_variance2]))
allVars = tf.global_variables()
values = sess.run(allVars)
for var, val in zip(allVars, values):
print(var.name, val)
saver.save(sess, "/tmp/test_EMA/var_save.ckpt")
现在变量的名称是固定的。但是,输出(变量的值)似乎不正确。输出是
[array([ 5., 6., 7., 8.], dtype=float32), 6.5, 1.25, array([ 0.], dtype=float32), array([ 0.], dtype=float32)]
[array([ 4., 8., 12., 16.], dtype=float32), 10.0, 20.0, array([ 0.], dtype=float32), array([ 0.], dtype=float32)]
AAA/avg_mean:0 [ 0.]
AAA/avg_variance:0 [ 0.]
AAA/mean:0 [ 0.]
AAA/variance:0 [ 1.]
BBB/avg_mean:0 [ 0.]
BBB/avg_variance:0 [ 0.]
BBB/mean:0 [ 0.]
我应该如何修改我的代码以获得正确的结果?谢谢。
以下是代码中需要修复的问题:
1-在下一行中,您将avg_variance1
替换为getsta
返回的operation
。你绝不能那样做。当您创建变量avg_variance1
并将其传递给getsta
变量将在计算图中的所有位置更新时,TensorFlow 将处理这个问题,您无需显式执行此操作。只要你指的是variable
的name
你就没问题。TensorFlow变量不像常规的Python变量。
y1, mean1, variance1, avg_mean1, avg_variance1 = getsta(x1,avg_mean1,avg_variance1)
(您需要为avg_mean1、avg_mean2等修复此问题)
2-avg_mean2
、avg_variance2
、avg_mean2
和avg_variance2
是Tensor
而不是variable
。因此,它们不在您要打印的variables
列表中。