如何将第一层设置为学习率为 0.00001,将最后一层的学习率为 0.001



在下面的程序中,如何将第一层设置为学习率为 0.00001,最后一层的学习率为 0.001?

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def maxpool2d(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

def convolutional_neural_network(x):
    weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 20])),
               'W_conv2': tf.Variable(tf.random_normal([5, 5, 20, 40])),
               'W_fc': tf.Variable(tf.random_normal([7 * 7 * 40, 1024])),
               'out': tf.Variable(tf.random_normal([1024, n_classes]))}
    biases = {'b_conv1': tf.Variable(tf.random_normal([20])),
              'b_conv2': tf.Variable(tf.random_normal([40])),
              'b_fc': tf.Variable(tf.random_normal([1024])),
              'out': tf.Variable(tf.random_normal([n_classes]))}
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])) + biases['b_conv1']
    conv1 = maxpool2d(conv1)
    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']))+ biases['b_conv2']
    conv2 = maxpool2d(conv2)
    fc = tf.reshape(conv2, [-1, 7 * 7 * 40])
    fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
    fc = tf.nn.dropout(fc, keep_rate)
    output = tf.matmul(fc, weights['out']) + biases['out']
    return output

有几种方法可以做到这一点。最简单的方法可能是为每个不同的学习率创建一个单独的优化器(尽管您需要重新构建内容以将变量传递给优化器,并返回训练操作(:

def convolutional_neural_network(x):
    weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 20])),
               'W_conv2': tf.Variable(tf.random_normal([5, 5, 20, 40])),
               'W_fc': tf.Variable(tf.random_normal([7 * 7 * 40, 1024])),
               'out': tf.Variable(tf.random_normal([1024, n_classes]))}
    biases = {'b_conv1': tf.Variable(tf.random_normal([20])),
              'b_conv2': tf.Variable(tf.random_normal([40])),
              'b_fc': tf.Variable(tf.random_normal([1024])),
              'out': tf.Variable(tf.random_normal([n_classes]))}
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])) + biases['b_conv1']
    conv1 = maxpool2d(conv1)
    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']))+ biases['b_conv2']
    conv2 = maxpool2d(conv2)
    fc = tf.reshape(conv2, [-1, 7 * 7 * 40])
    fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
    fc = tf.nn.dropout(fc, keep_rate)
    output = tf.matmul(fc, weights['out']) + biases['out']
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logit‌​s=prediction, labels=y))
    # Assuming that the convolutional layers are considered the "first layer".
    opt1 = tf.train.AdamOptimizer(0.00001)
    train1_op = opt1.minimize(cost, var_list=[
        weights['W_conv1'], weights['W_conv2'], biases['b_conv1'], biases['b_conv2']])
    # Assuming that the fully connected and softmax layers are considered the
    # "last layer".
    opt2 = tf.train.AdamOptimizer(0.001)
    train2_op = opt2.minimize(cost, var_list=[
        weights['W_fc'], weights['out'], biases['b_fc'], biases['out']])
    train_op = tf.group(train1_op, train2_op)
    return output, train_op
prediction, optimizer = convolutional_neural_network(x)

另一种方法是使用 tf.train.Optimizer.compute_gradients() 计算所有变量的梯度,然后将每个梯度乘以适当的因子,然后再将其传递给 tf.train.Optimizer.apply_gradients()

最新更新