...
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(size, return_sequences=True, dropout_W=0.2 dropout_U=0.2))
model.add(GlobalAveragePooling1D())
model.add(Dense(1))
model.add(Activation('sigmoid'))
....
我需要能够在LSTM层之后的样本中取所有时间步长的均值或最大值,然后将该均值或最大值向量赋给Keras中的密集层。
我认为timedistributedmerge
能够做到这一点,但它被弃用了。使用return_sequences=True
,我可以在LSTM层之后获得样本中所有时间步长的向量。然而,GlobalAveragePooling1D()
与屏蔽不兼容,它考虑所有时间步长,而我只需要非屏蔽时间步长。
我看到了推荐Lambda
层的帖子,但这些也没有考虑掩蔽。
Jacoxu的答案是正确的。但是,如果您使用tensorflow后端为keras,则Tensor类型不支持dimshuffle功能,请尝试以下方法。
def call(self, x, mask=None):
if mask is not None:
# mask (batch, time)
mask = K.cast(mask, K.floatx())
# mask (batch, x_dim, time)
mask = K.repeat(mask, x.shape[-1])
# mask (batch, time, x_dim)
mask = tf.transpose(mask, [0,2,1])
x = x * mask
return K.sum(x, axis=1) / K.sum(mask, axis=1)
由于平均池只在一个轴上做平均值,您只需要纠正平均值中的元素数量,因为损失屏蔽是在最后处理的,而不是在这里。你可以这样做:
class GlobalAveragePooling1DMasked(GlobalAveragePooling1D):
def call(self, x, mask=None):
if mask != None:
return K.sum(x, axis=1) / K.sum(mask, axis=1)
else:
return super().call(x)
为了使x中的遮罩值等于零,您可以这样做:
class MeanPool(Layer):
def __init__(self, **kwargs):
self.supports_masking = True
super(MeanPool, self).__init__(**kwargs)
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
if mask is not None:
# mask (batch, time)
mask = K.cast(mask, K.floatx())
# mask (batch, time, 'x')
mask = mask.dimshuffle(0, 1, 'x')
# to make the masked values in x be equal to zero
x = x * mask
return K.sum(x, axis=1) / K.sum(mask, axis=1)
def get_output_shape_for(self, input_shape):
# remove temporal dimension
return input_shape[0], input_shape[2]
这就是我在Keras 2上的做法(借用所有的答案,并固定尺寸):
class MeanPool(Layer):
def __init__(self, **kwargs):
self.supports_masking = True
super(MeanPool, self).__init__(**kwargs)
def compute_mask(self, input, input_mask=None):
# do not pass the mask to the next layers
return None
def call(self, x, mask=None):
if mask is not None:
# mask (batch, time)
mask = K.cast(mask, K.floatx())
# mask (batch, x_dim, time)
mask = K.repeat(mask, x.shape[-1])
# mask (batch, time, x_dim)
mask = tf.transpose(mask, [0,2,1])
x = x * mask
return K.sum(x, axis=1) / K.sum(mask, axis=1)
def compute_output_shape(self, input_shape):
# remove temporal dimension
return (input_shape[0], input_shape[2])