我正在用张量流训练一个自动编码器,但成本是nan,我修改了学习率和优化器,但它不起作用.我搜索了一些结果显示减少LR可能会有所帮助,但是我将LR更改为0.00001,它也不起作用。这是我的参数代码:
learning_rate =0.00001
training_epochs = 2
batch_size = 900
display_step =1
examples_to_show = 10
nextbatch = 0
#network parameters
n_input = 500
# tf Graph input
X = tf.placeholder("float",[None,n_input])
# hidden layer setting
n_hidden_1 = 400 # 1st layer num features
n_hidden_2 = 300 # 2nd layer num features
n_hidden_3 = 200 # 3nd layer num features
n_hidden_4 = 100 # 4nd layer num features
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])), # 500 * 400
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), # 400 * 300
'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])), # 300 * 200
'encoder_h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4])), # 200 * 100
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_3])), # 100 * 200
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_2])), # 200 * 300
'decoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])), # 300 * 400
'decoder_h4': tf.Variable(tf.random_normal([n_hidden_1, n_input])), # 400 * 500
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])), # 400
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])), # 300
'encoder_b3': tf.Variable(tf.random_normal([n_hidden_3])), # 200
'encoder_b4': tf.Variable(tf.random_normal([n_hidden_4])), # 100
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_3])), # 200
'decoder_b2': tf.Variable(tf.random_normal([n_hidden_2])), # 300
'decoder_b3': tf.Variable(tf.random_normal([n_hidden_1])), # 400
'decoder_b4': tf.Variable(tf.random_normal([n_input])), # 500
}
# Building the encoder
def encoder(x):
print ("i am encoder")
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x,
weights['encoder_h1']),biases['encoder_b1']))
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,
weights['encoder_h2']),biases['encoder_b2']))
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2,
weights['encoder_h3']),biases['encoder_b3']))
layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3,
weights['encoder_h4']),biases['encoder_b4']))
return layer_4
# Building the decoder
def decoder(x):
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x,
weights['decoder_h1']),biases['decoder_b1']))
print("layer1:",layer_1)
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,
weights['decoder_h2']),biases['decoder_b2']))
print("layer2:",layer_2)
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2,
weights['decoder_h3']),biases['decoder_b3']))
print("layer3:",layer_3)
layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3,
weights['decoder_h4']),biases['decoder_b4']))
print("layer4:",layer_4)
return layer_4
def normalize(x):
amin,amax = x.min(),x.max()
x = (x-amin)/(amax - amin)
return x
def main():
# Construct model
encoder_op = encoder(X)
print ("encoder_op:",encoder_op)
decoder_op = decoder(encoder_op)
print ("decoder_op:",decoder_op)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
# Define loss and optimizer, minimize the squared error
#cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
#cost =tf.reduce_sum(tf.pow((y_true - y_pred),2))
cost = tf.reduce_mean(tf.squared_difference(y_true, y_pred))
#optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
init = tf.initialize_all_variables()
else:
init = tf.global_variables_initializer()
sess.run(init)
lenx_train = len(loadvector("x","train"))
total_batch = int(lenx_train/batch_size)
# Training cycle
for epoch in range(training_epochs):
# Loop over all batches
for i in range(total_batch):
batch_xs, batch_ys = get_next_batch(batch_size) # max(x) = 1, min(x) = 0
# Run optimization op (backprop) and cost op (to get loss value)
op,c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),"cost=",c)
print("op:", '%04d' % (epoch+1),"op=",op)
print("Optimization Finished!")
if __name__ == "__main__":
x_train= loadvector("x","train")
#x_train = scale(x_train)
x_train = normalize(x_train)
y_train = loadvector("y", "train")
main()
这是我运行它时的结果
Epoch: 0001 cost= 0.373359
op: 0001 op= None
Epoch: 0002 cost= nan
op: 0002 op= None
Optimization Finished!
没有数据很难确定,但一个可能的原因是你实现了normalize()
。如果你碰巧有一个相等值的张量,你会从normalize()
得到一堆nan
。
一般来说,你可以看看第二个答案 如何在TensorFlow中调试NaN值?建议使用tf.add_check_numerics_ops()
。在大多数情况下,这应该可以很快捕获数字问题。另外,你可以看看tfdbg。