PyTorch: DecoderRNN: 运行时错误: 输入必须有 3 个维度，得到 2

我正在使用PyTorch构建一个DecoderRNN(这是一个图像标题解码器)：

class DecoderRNN(nn.Module):
def __init__(self, embed_size, hidden_size, vocab_size):
super(DecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.gru = nn.GRU(embed_size, hidden_size, hidden_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, features, captions):
print (features.shape)
print (captions.shape)
output, hidden = self.gru(features, captions)
output = self.softmax(self.out(output[0]))
return output, hidden

数据具有以下形状：

torch.Size([10, 200])  <- features.shape (10 for batch size)
torch.Size([10, 12])   <- captions.shape (10 for batch size)

然后我得到以下错误。知道我在这里错过了什么吗？谢谢！

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-2-76e05ba08b1d> in <module>()
44         # Pass the inputs through the CNN-RNN model.
45         features = encoder(images)
---> 46         outputs = decoder(features, captions)
47 
48         # Calculate the batch loss.
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323         for hook in self._forward_pre_hooks.values():
324             hook(self, input)
--> 325         result = self.forward(*input, **kwargs)
326         for hook in self._forward_hooks.values():
327             hook_result = hook(self, input, result)
/home/workspace/model.py in forward(self, features, captions)
37         print (captions.shape)
38         # features = features.unsqueeze(1)
---> 39         output, hidden = self.gru(features, captions)
40         output = self.softmax(self.out(output[0]))
41         return output, hidden
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
323         for hook in self._forward_pre_hooks.values():
324             hook(self, input)
--> 325         result = self.forward(*input, **kwargs)
326         for hook in self._forward_hooks.values():
327             hook_result = hook(self, input, result)
/opt/conda/lib/python3.6/site-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
167             flat_weight=flat_weight
168         )
--> 169         output, hidden = func(input, self.all_weights, hx)
170         if is_packed:
171             output = PackedSequence(output, batch_sizes)
/opt/conda/lib/python3.6/site-packages/torch/nn/_functions/rnn.py in forward(input, *fargs, **fkwargs)
383             return hack_onnx_rnn((input,) + fargs, output, args, kwargs)
384         else:
--> 385             return func(input, *fargs, **fkwargs)
386 
387     return forward
/opt/conda/lib/python3.6/site-packages/torch/autograd/function.py in _do_forward(self, *input)
326         self._nested_input = input
327         flat_input = tuple(_iter_variables(input))
--> 328         flat_output = super(NestedIOFunction, self)._do_forward(*flat_input)
329         nested_output = self._nested_output
330         nested_variables = _unflatten(flat_output, self._nested_output)
/opt/conda/lib/python3.6/site-packages/torch/autograd/function.py in forward(self, *args)
348     def forward(self, *args):
349         nested_tensors = _map_variable_tensor(self._nested_input)
--> 350         result = self.forward_extended(*nested_tensors)
351         del self._nested_input
352         self._nested_output = result
/opt/conda/lib/python3.6/site-packages/torch/nn/_functions/rnn.py in forward_extended(self, input, weight, hx)
292             hy = tuple(h.new() for h in hx)
293 
--> 294         cudnn.rnn.forward(self, input, hx, weight, output, hy)
295 
296         self.save_for_backward(input, hx, weight, output)
/opt/conda/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py in forward(fn, input, hx, weight, output, hy)
206         if (not is_input_packed and input.dim() != 3) or (is_input_packed and input.dim() != 2):
207             raise RuntimeError(
--> 208                 'input must have 3 dimensions, got {}'.format(input.dim()))
209         if fn.input_size != input.size(-1):
210             raise RuntimeError('input.size(-1) must be equal to input_size. Expected {}, got {}'.format(
RuntimeError: input must have 3 dimensions, got 2

您的 GRU 输入必须是三维的：

形状输入(seq_len、批处理、input_size)：包含输入序列特征的张量。

此外，您需要提供隐藏状态(在本例中为最后一个编码器隐藏状态)作为第二个参数：

self.gru(input, h_0)

其中input是您的实际输入，h_0需要三维的隐藏状态：

形状h_0(num_layers * num_directions、批处理、hidden_size)：张量包含批处理中每个元素的初始隐藏状态。如果未提供，则默认为零。

https://pytorch.org/docs/master/nn.html#torch.nn.GRU

@MBT的答案是正确的，但我想也许你最初想做的是使用GRUCell而不是GRU(它一步一步地处理输入而不是一次处理整个时间序列)。

相关内容

最新更新

热门标签：