这里的期望是将注意力应用于第二维(4,5,20), 64)。我正试图使用以下代码应用自我关注(此代码可重复的问题):
import numpy as np
import tensorflow as tf
from keras import layers as tfl
class Encoder(tfl.Layer):
def __init__(self,):
super().__init__()
self.embed_layer = tfl.Embedding(4500, 64, mask_zero=True)
self.attn_layer = tfl.MultiHeadAttention(num_heads=2,
attention_axes=2,
key_dim=16)
return
def call(self, x):
# Input shape: (4, 5, 20) (Batch size: 4)
x = self.embed_layer(x) # Output: (4, 5, 20, 64)
x = self.attn_layer(query=x, key=x, value=x) # Output: (4, 5, 20, 64)
return x
eg_input = tf.constant(np.random.randint(0, 150, (4, 5, 20)))
enc = Encoder()
enc(eg_input)
但是,上面定义的层抛出以下错误。有人能解释一下为什么会发生这种情况吗?如何解决这个问题?
{{function_node __wrapped__AddV2_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [4,5,2,20,20] vs. [4,5,1,5,20] [Op:AddV2]
Call arguments received by layer 'softmax_2' (type Softmax):
• inputs=tf.Tensor(shape=(4, 5, 2, 20, 20), dtype=float32)
• mask=tf.Tensor(shape=(4, 5, 1, 5, 20), dtype=bool)
PS:如果我在定义嵌入层时设置了mask_zero = False
,代码运行正常,没有任何问题。
按axis=0
连接输入
import numpy as np
import tensorflow as tf
from keras import layers as tfl
class Encoder(tfl.Layer):
def __init__(self,):
super().__init__()
self.embed_layer = tfl.Embedding(4500, 64, mask_zero=True)
self.attn_layer = tfl.MultiHeadAttention(num_heads=2,
key_dim=16,
attention_axes=2)
def call(self, x):
x = self.embed_layer(x) # Output: (4, 5, 20, 32)
x = tf.concat(x, axis=0)
x, attention_scores = self.attn_layer(query=x, key=x, value=x , return_attention_scores=True) # Output: (4, 5, 20, 32)
return x , attention_scores
eg_input = tf.constant(np.random.randint(0, 150, (4, 5, 20)))
enc = Encoder()
scores , attentions = enc(eg_input)
scores.shape , attentions.shape
#(TensorShape([4, 5, 20, 64]), TensorShape([4, 5, 2, 20, 20]))