张量流v2中的L2正则化子



以下模型是在TF1中定义的,我正在尝试在不使用compat API的情况下将其迁移到TF2。

# Define the tensorflow neural network
# 1. Input:
self.input_states = tf.placeholder(
tf.float32, shape=[None, 4, board_height, board_width])
self.input_state = tf.transpose(self.input_states, [0, 2, 3, 1])
# 2. Common Networks Layers
self.conv1 = tf.layers.conv2d(inputs=self.input_state,
filters=32, kernel_size=[3, 3],
padding="same", data_format="channels_last",
activation=tf.nn.relu)
self.conv2 = tf.layers.conv2d(inputs=self.conv1, filters=64,
kernel_size=[3, 3], padding="same",
data_format="channels_last",
activation=tf.nn.relu)
self.conv3 = tf.layers.conv2d(inputs=self.conv2, filters=128,
kernel_size=[3, 3], padding="same",
data_format="channels_last",
activation=tf.nn.relu)
# 3-1 Action Networks
self.action_conv = tf.layers.conv2d(inputs=self.conv3, filters=4,
kernel_size=[1, 1], padding="same",
data_format="channels_last",
activation=tf.nn.relu)
# Flatten the tensor
self.action_conv_flat = tf.reshape(
self.action_conv, [-1, 4 * board_height * board_width])
# 3-2 Full connected layer, the output is the log probability of moves
# on each slot on the board
self.action_fc = tf.layers.dense(inputs=self.action_conv_flat,
units=board_height * board_width,
activation=tf.nn.log_softmax)
# 4 Evaluation Networks
self.evaluation_conv = tf.layers.conv2d(inputs=self.conv3, filters=2,
kernel_size=[1, 1],
padding="same",
data_format="channels_last",
activation=tf.nn.relu)
self.evaluation_conv_flat = tf.reshape(
self.evaluation_conv, [-1, 2 * board_height * board_width])
self.evaluation_fc1 = tf.layers.dense(inputs=self.evaluation_conv_flat,
units=64, activation=tf.nn.relu)
# output the score of evaluation on current state
self.evaluation_fc2 = tf.layers.dense(inputs=self.evaluation_fc1,
units=1, activation=tf.nn.tanh)
# Define the Loss function
# 1. Label: the array containing if the game wins or not for each state
self.labels = tf.placeholder(tf.float32, shape=[None, 1])
# 2. Predictions: the array containing the evaluation score of each state
# which is self.evaluation_fc2
# 3-1. Value Loss function
self.value_loss = tf.losses.mean_squared_error(self.labels,
self.evaluation_fc2)
# 3-2. Policy Loss function
self.mcts_probs = tf.placeholder(
tf.float32, shape=[None, board_height * board_width])
self.policy_loss = tf.negative(tf.reduce_mean(
tf.reduce_sum(tf.multiply(self.mcts_probs, self.action_fc), 1)))
# 3-3. L2 penalty (regularization)
l2_penalty_beta = 1e-4
vars = tf.trainable_variables()
l2_penalty = l2_penalty_beta * tf.add_n(
[tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name.lower()])
# 3-4 Add up to be the Loss function
self.loss = self.value_loss + self.policy_loss + l2_penalty
# Define the optimizer we use for training
self.learning_rate = tf.placeholder(tf.float32)
self.optimizer = tf.train.AdamOptimizer(
learning_rate=self.learning_rate).minimize(self.loss)

这是我的TF2代码

l2_penalty_beta = 1e-4
# Define the tensorflow neural network
# 1. Input:
self.inputs = tf.keras.Input( shape=(4, board_height, board_width), dtype=tf.dtypes.float32)
self.transposed_inputs = tf.keras.layers.Lambda( lambda x: tf.transpose(x, [0, 2, 3, 1]) )(self.inputs)
# 2. Common Networks Layers
self.conv1 = tf.keras.layers.Conv2D( name="conv1",
filters=32,
kernel_size=(3, 3),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.transposed_inputs)
self.conv2 = tf.keras.layers.Conv2D( name="conv2", 
filters=64, 
kernel_size=(3, 3), 
padding="same", 
data_format="channels_last", 
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.conv1)
self.conv3 = tf.keras.layers.Conv2D( name="conv3",
filters=128,
kernel_size=(3, 3),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.conv2)
# 3-1 Action Networks
self.action_conv = tf.keras.layers.Conv2D( name="action_conv",
filters=4,
kernel_size=(1, 1),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.conv3)
# flatten tensor
self.action_conv_flat = tf.keras.layers.Reshape( (-1, 4 * board_height * board_width), name="action_conv_flat" )(self.action_conv)
# 3-2 Full connected layer, the output is the log probability of moves
# on each slot on the board
self.action_fc = tf.keras.layers.Dense( board_height * board_width,
activation=tf.nn.log_softmax,
name="action_fc",
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.action_conv_flat)
# 4 Evaluation Networks
self.evaluation_conv = tf.keras.layers.Conv2D( name="evaluation_conv",
filters=2,
kernel_size=(1, 1),
padding="same",
data_format="channels_last",
activation=tf.keras.activations.relu,
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.conv3)
self.evaluation_conv_flat = tf.keras.layers.Reshape( (-1, 2 * board_height * board_width),
name="evaluation_conv_flat" )(self.evaluation_conv)
self.evaluation_fc1 = tf.keras.layers.Dense( 64,
activation=tf.keras.activations.relu,
name="evaluation_fc1",
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.evaluation_conv_flat)
self.evaluation_fc2 = tf.keras.layers.Dense( 1, 
activation=tf.keras.activations.tanh,
name="evaluation_fc2",
kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta))(self.evaluation_fc1)
self.outputs = tf.keras.layers.Concatenate()([self.action_fc, self.evaluation_fc2])
self.model = tf.keras.Model(inputs=self.inputs, outputs=self.outputs, name="policy_value_model")
self.model.summary()
def custom_loss(labels, predictions):
expected_act_probs, expected_value = tf.split(labels, [self.board_height*self.board_width, -1], axis=2)
pred_act_probs, pred_value = tf.split(predictions, [self.board_height*self.board_width, -1], axis=2)
value_loss = tf.losses.mean_squared_error(expected_value[0], pred_value[0])
policy_loss = tf.negative(tf.reduce_mean(
tf.reduce_sum(tf.multiply(expected_act_probs, pred_act_probs), 1)))
return value_loss + policy_loss
#print(tf.autograph.to_code(custom_loss))
self.model.compile(optimizer=tf.keras.optimizers.Adam(),
loss=tf.function(custom_loss),
metrics=['accuracy'])

以下是该模型的摘要。

__________________________________________________________________________________________________
Layer (type)                   Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)           [(None, 4, 15, 15)]  0           []                               
                            
lambda (Lambda)                (None, 15, 15, 4)    0           ['input_1[0][0]']                
                            
conv1 (Conv2D)                 (None, 15, 15, 32)   1184        ['lambda[0][0]']                 
                            
conv2 (Conv2D)                 (None, 15, 15, 64)   18496       ['conv1[0][0]']                  
                            
conv3 (Conv2D)                 (None, 15, 15, 128)  73856       ['conv2[0][0]']                  
                            
evaluation_conv (Conv2D)       (None, 15, 15, 2)    258         ['conv3[0][0]']                  
                            
action_conv (Conv2D)           (None, 15, 15, 4)    516         ['conv3[0][0]']                  
                            
evaluation_conv_flat (Reshape)  (None, 1, 450)      0           ['evaluation_conv[0][0]']        
                            
action_conv_flat (Reshape)     (None, 1, 900)       0           ['action_conv[0][0]']            
                            
evaluation_fc1 (Dense)         (None, 1, 64)        28864       ['evaluation_conv_flat[0][0]']   
                            
action_fc (Dense)              (None, 1, 225)       202725      ['action_conv_flat[0][0]']       
                            
evaluation_fc2 (Dense)         (None, 1, 1)         65          ['evaluation_fc1[0][0]']         
                            
concatenate (Concatenate)      (None, 1, 226)       0           ['action_fc[0][0]',              
'evaluation_fc2[0][0]']         
                            
==================================================================================================

正如您所注意到的,TF1模型输出两个张量(action_fcevaluation_fc2(。在TF2模型中,我在最后添加了一个连接层,将它们组合成一个张量,这样我就可以在它们上有一个损失函数。

TF1模型中的损失函数由三部分组成。l2_penalty是模型中所有权重的L2损失。

l2_penalty_beta = 1e-4
vars = tf.trainable_variables()
l2_penalty = l2_penalty_beta * tf.add_n(
[tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name.lower()])
self.loss = self.value_loss + self.policy_loss + l2_penalty

在TF2模型中,所有可训练层都添加了kernel_regularizer=tf.keras.regularizers.L2(l2_penalty_beta)这和TF1型号一样吗?还是我犯了一些错误

TF2模型将在没有Python解释器的环境中执行,也就是说,模型将被编译成图。我想我的loss函数必须是无状态的——它不能依赖于函数范围之外的任何变量。

更新:

以下是fress TF2模型中的训练日志。loss看起来不正常。一开始它太小了。TF1模型中loss大于4.0。L2正则化子是否会成为TF2中损失的一部分?

batch i:2, episode_len:113
1/1 - 0s - loss: 0.7342 - accuracy: 0.0000e+00 - 18ms/epoch - 18ms/step
1/1 - 0s - loss: 0.4714 - accuracy: 0.0000e+00 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.1785 - accuracy: 0.0000e+00 - 14ms/epoch - 14ms/step
1/1 - 0s - loss: 0.0829 - accuracy: 0.0000e+00 - 30ms/epoch - 30ms/step
1/1 - 0s - loss: 0.0743 - accuracy: 0.0000e+00 - 13ms/epoch - 13ms/step
kl:0.00178,lr_multiplier:2.250,loss:[0.07430928945541382]
batch i:3, episode_len:92
1/1 - 0s - loss: 0.0764 - accuracy: 1.0000 - 19ms/epoch - 19ms/step
1/1 - 0s - loss: 0.0718 - accuracy: 1.0000 - 28ms/epoch - 28ms/step
1/1 - 0s - loss: 0.0705 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0693 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0681 - accuracy: 1.0000 - 34ms/epoch - 34ms/step
kl:0.00068,lr_multiplier:3.375,loss:[0.06813239306211472]
batch i:4, episode_len:118
1/1 - 0s - loss: 0.0676 - accuracy: 1.0000 - 16ms/epoch - 16ms/step
1/1 - 0s - loss: 0.0665 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0654 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0643 - accuracy: 1.0000 - 15ms/epoch - 15ms/step
1/1 - 0s - loss: 0.0631 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
kl:0.00425,lr_multiplier:5.062,loss:[0.06307009607553482]
batch i:5, episode_len:84
1/1 - 0s - loss: 4.0628 - accuracy: 0.0000e+00 - 17ms/epoch - 17ms/step
1/1 - 0s - loss: 4.0618 - accuracy: 0.0000e+00 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 4.0606 - accuracy: 0.0000e+00 - 16ms/epoch - 16ms/step
1/1 - 0s - loss: 4.0592 - accuracy: 0.0000e+00 - 11ms/epoch - 11ms/step
1/1 - 0s - loss: 4.0577 - accuracy: 0.0000e+00 - 13ms/epoch - 13ms/step
kl:0.07804,lr_multiplier:3.375,loss:[4.057666778564453]
batch i:6, episode_len:96
1/1 - 0s - loss: 0.0599 - accuracy: 1.0000 - 17ms/epoch - 17ms/step
1/1 - 0s - loss: 0.0589 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
1/1 - 0s - loss: 0.0579 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0568 - accuracy: 1.0000 - 31ms/epoch - 31ms/step
1/1 - 0s - loss: 0.0556 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
kl:0.06898,lr_multiplier:2.250,loss:[0.055556993931531906]
batch i:7, episode_len:62
1/1 - 0s - loss: 0.0577 - accuracy: 1.0000 - 28ms/epoch - 28ms/step
1/1 - 0s - loss: 0.0569 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
1/1 - 0s - loss: 0.0559 - accuracy: 1.0000 - 14ms/epoch - 14ms/step
1/1 - 0s - loss: 0.0549 - accuracy: 1.0000 - 16ms/epoch - 16ms/step
1/1 - 0s - loss: 0.0538 - accuracy: 1.0000 - 19ms/epoch - 19ms/step
kl:0.03346,lr_multiplier:2.250,loss:[0.05379907414317131]
batch i:8, episode_len:118
1/1 - 0s - loss: 0.0552 - accuracy: 1.0000 - 28ms/epoch - 28ms/step
1/1 - 0s - loss: 0.0543 - accuracy: 1.0000 - 48ms/epoch - 48ms/step
1/1 - 0s - loss: 0.0532 - accuracy: 1.0000 - 12ms/epoch - 12ms/step
1/1 - 0s - loss: 0.0521 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
1/1 - 0s - loss: 0.0510 - accuracy: 1.0000 - 11ms/epoch - 11ms/step
kl:0.04336,lr_multiplier:1.500,loss:[0.051018256694078445]

是的,我认为这是将L2正则化添加到TF2 中的权重(但不是偏差(的正确方法

我唯一的想法是,您不需要来连接TF2中的输出张量,您可以定义

self.outputs = [self.action_fc, self.evaluation_fc2]

如果比较容易的话。(然后也将您的损失指定为长度2的列表等(。

最新更新