我尝试在不使用contrib的情况下在Tensorflow中创建一个自动编码器。这是的原始代码
https://github.com/Machinelearninguru/Deep_Learning/blob/master/TensorFlow/neural_networks/autoencoder/simple_autoencoder.py
这是我修改的程序:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
ae_inputs = tf.placeholder(tf.float32, (None, 32, 32, 1)) # input to the network (MNIST images)
xi = tf.nn.conv2d(ae_inputs,
filter=tf.Variable(tf.random_normal([5,5,1,32])),
strides=[1,2,2,1],
padding='SAME')
print("xi {0}".format(xi))
xi = tf.nn.conv2d(xi,
filter=tf.Variable(tf.random_normal([5,5,32,16])),
strides=[1,2,2,32],
padding='SAME')
print("xi {0}".format(xi))
xi = tf.nn.conv2d(xi,
filter=tf.Variable(tf.random_normal([5,5,16,8])),
strides=[1,4,4,16],
padding='SAME')
print("xi {0}".format(xi))
xo = tf.nn.conv2d_transpose(xi,
filter=tf.Variable(tf.random_normal([5,5,16,8])),
output_shape=[1, 8, 8, 16],
strides=[1,4,4,1],
padding='SAME')
print("xo {0}".format(xo))
xo = tf.nn.conv2d_transpose(xo,
filter=tf.Variable(tf.random_normal([5,5,32,16])),
output_shape=[1, 16, 16, 32],
strides=[1,2,2,1],
padding='SAME')
print("xo {0}".format(xo))
xo = tf.nn.conv2d_transpose(xo,
filter=tf.Variable(tf.random_normal([5,5,1,32])),
output_shape=[1, 32, 32, 1],
strides=[1,2,2,1],
padding='SAME')
print("xo {0}".format(xo))
打印的结果是:
xi张量("Conv2D:0",shape=(?,16,16,32(,dtype=float32(xi张量("Conv2D_1:0",shape=(?,8,8,16(,dtype=float32(xi张量("Conv2D_2:0",shape=(?,2,2,8(,dtype=float32(xo张量("conv2d_transpose:0",shape=(1,8,8,16(,dtype=float32(xo张量("conv2d_transpose_1:0",shape=(1,16,16,32(,dtype=float32(xo张量("conv2d_transpose_2:0",shape=(1,32,32,1(,dtype=float32(
看起来输出的形状很好,但我真的不确定conv2和conv2_transpose中的所有参数。
如果需要,有人能纠正我的代码吗
编辑:@刘:我添加了relu函数,正如你告诉我的,但我不知道在哪里添加偏差:
xi = tf.nn.conv2d(ae_inputs,
filter=tf.Variable(tf.random_normal([5,5,1,32])),
strides=[1,2,2,1],
padding='SAME')
xi = tf.nn.relu(xi)
# xi = max_pool(xi,2)
print("xi {0}".format(xi))
xi = tf.nn.conv2d(xi,
filter=tf.Variable(tf.random_normal([5,5,32,16])),
strides=[1,2,2,1],
padding='SAME')
xi = tf.nn.relu(xi)
# xi = max_pool(xi,2)
print("xi {0}".format(xi))
xi = tf.nn.conv2d(xi,
filter=tf.Variable(tf.random_normal([5,5,16,8])),
strides=[1,4,4,1],
padding='SAME')
xi = tf.nn.relu(xi)
# xi = max_pool(xi,4)
print("xi {0}".format(xi))
xo = tf.nn.conv2d_transpose(xi,
filter=tf.Variable(tf.random_normal([5,5,16,8])),
output_shape=[tf.shape(xi)[0], 8, 8, 16],
strides=[1,4,4,1],
padding='SAME')
xo = tf.nn.relu(xo)
print("xo {0}".format(xo))
xo = tf.nn.conv2d_transpose(xo,
filter=tf.Variable(tf.random_normal([5,5,32,16])),
output_shape=[tf.shape(xo)[0], 16, 16, 32],
strides=[1,2,2,1],
padding='SAME')
xo = tf.nn.relu(xo)
print("xo {0}".format(xo))
xo = tf.nn.conv2d_transpose(xo,
filter=tf.Variable(tf.random_normal([5,5,1,32])),
output_shape=[tf.shape(xo)[0], 32, 32, 1],
strides=[1,2,2,1],
padding='SAME')
xo = tf.nn.tanh(xo)
print("xo {0}".format(xo))
return xo
我不明白与原始代码有什么区别:
# encoder
# 32 x 32 x 1 -> 16 x 16 x 32
# 16 x 16 x 32 -> 8 x 8 x 16
# 8 x 8 x 16 -> 2 x 2 x 8
print('inputs {0}'.format(inputs))
net = lays.conv2d(inputs, 32, [5, 5], stride=2, padding='SAME')
print('net {0}'.format(net))
net = lays.conv2d(net, 16, [5, 5], stride=2, padding='SAME')
print('net {0}'.format(net))
net = lays.conv2d(net, 8, [5, 5], stride=4, padding='SAME')
print('net {0}'.format(net))
# decoder
# 2 x 2 x 8 -> 8 x 8 x 16
# 8 x 8 x 16 -> 16 x 16 x 32
# 16 x 16 x 32 -> 32 x 32 x 1
net = lays.conv2d_transpose(net, 16, [5, 5], stride=4, padding='SAME')
print('net {0}'.format(net))
net = lays.conv2d_transpose(net, 32, [5, 5], stride=2, padding='SAME')
print('net {0}'.format(net))
net = lays.conv2d_transpose(net, 1, [5, 5], stride=2, padding='SAME', activation_fn=tf.nn.tanh)
print('net {0}'.format(net))
return net
第2版:
@刘我做了新版本的自动编码器与您的修改:
mean = 0
stdvev = 0.1
with tf.name_scope('L0'):
xi = tf.nn.conv2d(ae_inputs,
filter=tf.truncated_normal([5,5,1,32], mean = mean, stddev=stdvev),
strides=[1,1,1,1],
padding='SAME')
xi = tf.nn.bias_add(xi, bias_variable([32]))
xi = max_pool(xi,2)
print("xi {0}".format(xi))
with tf.name_scope('L1'):
xi = tf.nn.conv2d(xi,
filter=tf.truncated_normal([5,5,32,16], mean = mean, stddev=stdvev),
strides=[1,1,1,1],
padding='SAME')
xi = tf.nn.bias_add(xi, bias_variable([16]))
xi = max_pool(xi,2)
print("xi {0}".format(xi))
with tf.name_scope('L2'):
xi = tf.nn.conv2d(xi,
filter=tf.truncated_normal([5,5,16,8], mean = mean, stddev=stdvev),
strides=[1,1,1,1],
padding='SAME')
xi = tf.nn.bias_add(xi, bias_variable([8]))
xi = max_pool(xi,4)
print("xi {0}".format(xi))
with tf.name_scope('L3'):
xo = tf.nn.conv2d_transpose(xi,
filter=tf.truncated_normal([5,5,16,8], mean = mean, stddev=stdvev),
output_shape=[tf.shape(xi)[0], 8, 8, 16],
strides=[1,4,4,1],
padding='SAME')
xo = tf.nn.bias_add(xo, bias_variable([16]))
print("xo {0}".format(xo))
with tf.name_scope('L4'):
xo = tf.nn.conv2d_transpose(xo,
filter=tf.truncated_normal([5,5,32,16], mean = mean, stddev=stdvev),
output_shape=[tf.shape(xo)[0], 16, 16, 32],
strides=[1,2,2,1],
padding='SAME')
xo = tf.nn.bias_add(xo, bias_variable([32]))
print("xo {0}".format(xo))
with tf.name_scope('L5'):
xo = tf.nn.conv2d_transpose(xo,
filter=tf.truncated_normal([5,5,1,32], mean = mean, stddev=stdvev),
output_shape=[tf.shape(xo)[0], 32, 32, 1],
strides=[1,2,2,1],
padding='SAME')
xo = tf.nn.bias_add(xo, bias_variable([1]))
xo = tf.nn.tanh(xo)
print("xo {0}".format(xo))
但是结果是一样的,解码的值不一样。
第3版:
我从更改过滤器定义
filter=tf.truncated_normal([5,5,16,8], mean = mean, stddev=stdvev),
至
filter= tf.get_variable('filter2',[5,5,16,8]),
结果似乎收敛到更好的结果,但仍然收敛到不同的值。在原始代码(0.006(和我的版本0.015。我认为它来自于滤波器的初始值和偏差。我该怎么办?
您忘记了偏置和激活。所以你的网络比PCA弱。我建议您改用tf.layers
。如果要使用tf.nn
,请使用tf.get_variable
。此外,您必须添加:tf.nn.bias_add
tf.nn.relu
(或任何其他激活(
如果你想知道代码是否有效,只需使用进行测试即可
sess = tf.Session()
sess.run(tf.tf.global_variables_initializer())
test_output = sess.run(xo, feed_dict={ae_inputs : np.random.random((1, 32, 32, 1))}
print(test_output)
编辑好的,所以您发布的代码基本上使用tf.layers
API,其中包括偏差和激活。tf.nn
API更基本,仅应用卷积,但没有激活或偏差。
根据您的编辑,我认为您希望在nn API中实现CAE。一个典型的编码器层是这样的:
conv = tf.nn.conv2d(
nput=input_tensor,
filter=tf.get_variable("conv_weight_name", shape=[height,
width,
number_input_feature_maps,
number_output_feature_maps]),
strides=[1, 1, 1, 1],
padding="SAME")
bias = tf.nn.bias_add(conv, tf.get_variable("name_bias",
[number_output_feature_maps]))
layer_out = tf.nn.relu(bias)
这里是一个典型的转置卷积层。
conv_transpose = tf.nn.conv2d_transpose(value=input_tensor,
filter=tf.get_variable("deconnv_weight_name", shape=[height,
width,
number_output_feature_maps,
number_input_feature_maps]),
output_shape=[batc_size, height_output, width_ouput, feature_maps_output],
strides=[1, 1, 1, 1])
bias = tf.nn.bias_add(conv_transpose, tf.get_variable("name_bias", shape=[number_output_feature_maps]))
layer_out = tf.nn.relu(bias)
`
如果你对这些名字有疑问,只要在通讯录里问就行了。