来自TensorFlow背景,我试图将自定义层的代码片段从Keras转换为PyTorch。
Keras中的自定义层是这样的:
class Attention_module(tf.keras.layers.Layer):
def __init__(self, class_num):
super(Attention_module,self).__init__(class_num)
self.class_num = class_num
self.Ws = None
def build(self, input_shape):
embedding_length = int(input_shape[2])
self.Ws = self.add_weight(shape=(self.class_num, embedding_length),
initializer=tf.keras.initializers.get('glorot_uniform'), trainable=True)
super(Attention_module, self).build(input_shape)
def call(self, inputs):
sentence_trans = tf.transpose(inputs, [0, 2, 1])
at = tf.matmul(self.Ws, sentence_trans)
at = tf.math.tanh(at)
at = K.exp(at - K.max(at, axis=-1, keepdims=True))
at = at / K.sum(at, axis=-1, keepdims=True)
v = K.batch_dot(at, inputs)
return v
我想在火炬实现相同的;我已经做了向前传递块,但我对如何做嵌入和权重初始化与PyTorch中的上述层相同感到困惑?
class Attention_module(torch.nn.Module):
def __init__(self, class_num):
# how to initialize weight with same as above keras layer?
def forward(self, inputs):
sentence_trans = inputs.permute(0, 2, 1)
at = torch.mm(self.Ws, sentence_trans)
at = torch.nn.Tanh(at)
at = torch.exp(at - torch.max(torch.Tensor(at), dim=-1, keepdims=True).values)
at = at / torch.sum(at, dim = -1, keepdims=True)
v = torch.einsum('ijk,ikl->ijl', at, inputs)
return v
谢谢!
class Attention_module(torch.nn.Module):
def __init__(self, class_num, input_shape):
super().__init__()
self.class_num = class_num
embedding_length = int(input_shape[2])
self.Ws = torch.nn.Embedding(num_embeddings=class_num,
embedding_dim=embedding_length) # Embedding layer
torch.nn.init.xavier_uniform_(self.Ws.weight) # Glorot initialization
这是层初始化方法的参考。Xavier init是gloot init的另一个名称。
torch.nn.init.xavier_uniform_
后面的_
是pytorch约定,表示就地操作。
您也可以在运行时使用torch.nn.init
。它不一定要在__init__()
中。如:
att = Attention_module(class_num, input_shape)
torch.nn.init.xavier_uniform_(att.Ws.weight)
或:
for param in att.parameters():
torch.nn.init.xavier_uniform_(param)