非文本数据的Ragged张量作为LSTM的输入



我正在学习粗糙张量及其在粒子跟踪中的应用。我有下面这个最小的例子,它再现了我一直经历的错误。

import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM, Input, TimeDistributed
from tensorflow.keras.models import Sequential 
n=10
data_n = 32
batch_size=8
window_length=8
splits = [n]*data_n
#### Create a ragged tensor with shape (32, None, 8)
t0 = tf.zeros([data_n * n, window_length])        
t1 = tf.RaggedTensor.from_row_lengths(t0, splits) 
max_seq = t1.bounding_shape()[-1]
#### Define Model
def create_model(batch_size, window_length, max_seq):
lstm_model = Sequential([
Input(
batch_shape=[batch_size, None, window_length],
batch_size=batch_size,
dtype=tf.float32,
ragged=True
),
LSTM(
max_seq,
return_sequences=True,
input_shape=(window_length, None)
),
TimeDistributed(Dense(units=1))
])
return lstm_model
lstm_model = create_model(batch_size=batch_size, window_length=window_length, max_seq=max_seq)
lstm_model(t1[0:8])

当我执行上面的代码时,我得到以下错误:

---------------------------------------------------------------------------
_FallbackException                        Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py in cudnn_rnnv3(input, input_h, input_c, params, sequence_lengths, rnn_mode, input_mode, direction, dropout, seed, seed2, num_proj, is_training, time_major, name)
1889     try:
-> 1890       _result = pywrap_tfe.TFE_Py_FastPathExecute(
1891         _ctx._context_handle, tld.device_name, "CudnnRNNV3", name,
_FallbackException: Expecting float value for attr dropout, got int
During handling of the above exception, another exception occurred:
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-19-7609e2877e20> in <module>
1 lstm_model = create_model(batch_size=batch_size, window_length=window_length, max_seq=max_seq)
----> 2 lstm_model(t1[0:8])
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
966           with base_layer_utils.autocast_context_manager(
967               self._compute_dtype):
--> 968             outputs = self.call(cast_inputs, *args, **kwargs)
969           self._handle_activity_regularization(inputs, outputs)
970           self._set_mask_metadata(inputs, outputs, input_masks)
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/sequential.py in call(self, inputs, training, mask)
275       if not self.built:
276         self._init_graph_network(self.inputs, self.outputs, name=self.name)
--> 277       return super(Sequential, self).call(inputs, training=training, mask=mask)
278 
279     outputs = inputs  # handle the corner case where self.layers is empty
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py in call(self, inputs, training, mask)
715                                 ' implement a `call` method.')
716 
--> 717     return self._run_internal_graph(
718         inputs, training=training, mask=mask,
719         convert_kwargs_to_constants=base_layer_utils.call_context().saving)
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/network.py in _run_internal_graph(self, inputs, training, mask, convert_kwargs_to_constants)
886 
887           # Compute outputs.
--> 888           output_tensors = layer(computed_tensors, **kwargs)
889 
890           # Update tensor_dict.
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
652 
653     if initial_state is None and constants is None:
--> 654       return super(RNN, self).__call__(inputs, **kwargs)
655 
656     # If any of `initial_state` or `constants` are specified and are Keras
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
966           with base_layer_utils.autocast_context_manager(
967               self._compute_dtype):
--> 968             outputs = self.call(cast_inputs, *args, **kwargs)
969           self._handle_activity_regularization(inputs, outputs)
970           self._set_mask_metadata(inputs, outputs, input_masks)
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent_v2.py in call(self, inputs, mask, training, initial_state)
1178         # GPU implementation when GPU is available.
1179         if can_use_gpu:
-> 1180           last_output, outputs, new_h, new_c, runtime = gpu_lstm(
1181               **gpu_lstm_kwargs)
1182         else:
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/layers/recurrent_v2.py in gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask, time_major, go_backwards, sequence_lengths)
1404       inputs = array_ops.reverse_sequence_v2(
1405           inputs, sequence_lengths, seq_axis=seq_axis, batch_axis=batch_axis)
-> 1406     outputs, h, c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(
1407         inputs,
1408         input_h=init_h,
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py in cudnn_rnnv3(input, input_h, input_c, params, sequence_lengths, rnn_mode, input_mode, direction, dropout, seed, seed2, num_proj, is_training, time_major, name)
1899     except _core._FallbackException:
1900       try:
-> 1901         return cudnn_rnnv3_eager_fallback(
1902             input, input_h, input_c, params, sequence_lengths,
1903             rnn_mode=rnn_mode, input_mode=input_mode, direction=direction,
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py in cudnn_rnnv3_eager_fallback(input, input_h, input_c, params, sequence_lengths, rnn_mode, input_mode, direction, dropout, seed, seed2, num_proj, is_training, time_major, name, ctx)
1999   "direction", direction, "dropout", dropout, "seed", seed, "seed2", seed2,
2000   "num_proj", num_proj, "is_training", is_training, "time_major", time_major)
-> 2001   _result = _execute.execute(b"CudnnRNNV3", 5, inputs=_inputs_flat,
2002                              attrs=_attrs, ctx=ctx, name=name)
2003   if _execute.must_record_gradient():
~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
57   try:
58     ctx.ensure_initialized()
---> 59     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
60                                         inputs, attrs, num_outputs)
61   except core._NotOkStatusException as e:
InvalidArgumentError: Invalid input_h shape: [1,8,8] [10,8,8] [Op:CudnnRNNV3]

10是指LSTM-层中的单元数量,等于t1的边界形状。两个8是指batch_sizewindow_length。我认为1指的是输出形状,但事实并非如此,因为当我向Dense层添加更多单元时,它不会改变,所以数字保持不变。

使用具有可变大小序列的tf.RaggedTensor时,需要设置batch_size = 1,并确保传递给LSTM的sequence_length为None

这是因为,即使tf.RaggedTensor是以numpy数组形式存储可变大小序列的好方法,LSTM仍然期望每个批次具有相同长度的序列。但是,您可以跨批次使用可变大小的序列。

做出这些改变应该可以解决你所面临的问题。

这也是我遇到的一个错误。它特定于以下的组合:

使用GPU和RaggedTensor 实现RNN/LSTM Tensorflow版本的Keras

错误的源代码是将RNN单元的输入隐藏状态的粗糙张量的形状与隐藏状态的形状进行比较

不规则尺寸在错误消息中显示为1。我能够使用您的代码和Tensorflow 2.2.0 复制相同的错误

更改代码以删除粗糙的张量或在cpu或不同的tensorflow版本上运行我能够使此代码工作

最新更新