在训练自动编码器时，TensorFlow 成本是 NAN

我正在用张量流训练一个自动编码器，但成本是nan，我修改了学习率和优化器，但它不起作用.我搜索了一些结果显示减少LR可能会有所帮助，但是我将LR更改为0.00001，它也不起作用。这是我的参数代码：

learning_rate =0.00001
training_epochs = 2                                                       
batch_size = 900 
display_step =1                                                           
examples_to_show = 10
nextbatch = 0 
#network parameters
n_input = 500
# tf Graph input  
X = tf.placeholder("float",[None,n_input])                                
# hidden layer setting                                                    
n_hidden_1 = 400 # 1st layer num features
n_hidden_2 = 300 # 2nd layer num features                                 
n_hidden_3 = 200 # 3nd layer num features
n_hidden_4 = 100 # 4nd layer num features
weights = {
    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),    # 500 * 400
    'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])), # 400 * 300
    'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])), # 300 * 200
    'encoder_h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4])), # 200 * 100
    'decoder_h1': tf.Variable(tf.random_normal([n_hidden_4, n_hidden_3])), # 100 * 200
    'decoder_h2': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_2])), # 200 * 300 
    'decoder_h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_1])), # 300 * 400
    'decoder_h4': tf.Variable(tf.random_normal([n_hidden_1, n_input])),    # 400 * 500 
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),  # 400
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),  # 300 
'encoder_b3': tf.Variable(tf.random_normal([n_hidden_3])),  # 200
'encoder_b4': tf.Variable(tf.random_normal([n_hidden_4])),  # 100
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_3])),  # 200
'decoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),  # 300
'decoder_b3': tf.Variable(tf.random_normal([n_hidden_1])),  # 400
'decoder_b4': tf.Variable(tf.random_normal([n_input])),     # 500
}
# Building the encoder
def encoder(x):
    print ("i am encoder")
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, 
weights['encoder_h1']),biases['encoder_b1']))
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, 
weights['encoder_h2']),biases['encoder_b2']))
    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, 
weights['encoder_h3']),biases['encoder_b3']))
    layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3, 
weights['encoder_h4']),biases['encoder_b4']))
    return layer_4
# Building the decoder
def decoder(x):
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, 
weights['decoder_h1']),biases['decoder_b1']))
    print("layer1:",layer_1)
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, 
weights['decoder_h2']),biases['decoder_b2']))
    print("layer2:",layer_2)
    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, 
weights['decoder_h3']),biases['decoder_b3']))
    print("layer3:",layer_3)
    layer_4 = tf.nn.sigmoid(tf.add(tf.matmul(layer_3, 
weights['decoder_h4']),biases['decoder_b4']))
    print("layer4:",layer_4)
    return layer_4
def normalize(x):
    amin,amax = x.min(),x.max()
    x = (x-amin)/(amax - amin)
    return x
def main():
# Construct model
encoder_op = encoder(X)
print ("encoder_op:",encoder_op)
decoder_op = decoder(encoder_op)
print ("decoder_op:",decoder_op)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
# Define loss and optimizer, minimize the squared error
#cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
#cost =tf.reduce_sum(tf.pow((y_true - y_pred),2))
cost = tf.reduce_mean(tf.squared_difference(y_true, y_pred))
#optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
optimizer = tf.train.AdagradOptimizer(learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
    if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
        init = tf.initialize_all_variables()
    else:
        init = tf.global_variables_initializer()
    sess.run(init)
    lenx_train = len(loadvector("x","train"))
    total_batch = int(lenx_train/batch_size)
    # Training cycle
    for epoch in range(training_epochs):
        # Loop over all batches
        for i in range(total_batch):
            batch_xs, batch_ys = get_next_batch(batch_size)  # max(x) = 1, min(x) = 0
            # Run optimization op (backprop) and cost op (to get loss value)
            op,c = sess.run([optimizer, cost], feed_dict={X: batch_xs})
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch+1),"cost=",c)
            print("op:", '%04d' % (epoch+1),"op=",op)
print("Optimization Finished!")
if __name__ == "__main__":
    x_train= loadvector("x","train")
    #x_train = scale(x_train)
    x_train = normalize(x_train)
    y_train = loadvector("y", "train")
    main()

这是我运行它时的结果

Epoch: 0001 cost= 0.373359
op: 0001 op= None
Epoch: 0002 cost= nan
op: 0002 op= None
Optimization Finished!

没有数据很难确定，但一个可能的原因是你实现了normalize()。如果你碰巧有一个相等值的张量，你会从normalize()得到一堆nan。

一般来说，你可以看看第二个答案如何在TensorFlow中调试NaN值？建议使用tf.add_check_numerics_ops() 。在大多数情况下，这应该可以很快捕获数字问题。另外，你可以看看tfdbg。

相关内容

最新更新

热门标签：