我正在尝试使用双向RNN并将输出通过CNN进行文本分类。但是,我在双向RNN中遇到了各种形状错误。虽然,如果我在第二层使用两个带有反向操作的动态 rnn,它似乎工作正常:
这是对我不起作用的双向 RNN 代码:
# Bidirectional LSTM layer
with tf.name_scope("bidirectional-lstm"):
lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0)
lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0)
self.lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
lstm_fw_cell,
lstm_bw_cell,
self.embedded_chars,
sequence_length=self.seqlen,
dtype=tf.float32)
self.lstm_outputs = tf.concat(self.lstm_outputs, axis=2)
这是对我有用的两层动态rnn:
# Bidirectional LSTM layer
with tf.name_scope("bidirectional-lstm"):
lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0)
lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0)
with tf.variable_scope("lstm-output-fw"):
self.lstm_outputs_fw, _ = tf.nn.dynamic_rnn(
lstm_fw_cell,
self.embedded_chars,
sequence_length=self.seqlen,
dtype=tf.float32)
with tf.variable_scope("lstm-output-bw"):
self.embedded_chars_rev = array_ops.reverse_sequence(self.embedded_chars, seq_lengths=self.seqlen, seq_dim=1)
tmp, _ = tf.nn.dynamic_rnn(
lstm_bw_cell,
self.embedded_chars_rev,
sequence_length=self.seqlen,
dtype=tf.float32)
self.lstm_outputs_bw = array_ops.reverse_sequence(tmp, seq_lengths=self.seqlen, seq_dim=1)
Concatenate outputs
self.lstm_outputs = tf.add(self.lstm_outputs_fw, self.lstm_outputs_bw, name="lstm_outputs")
我在双向RNN上做错了什么?
我正在将其输出传递给 CNN,并且在计算
以下是代码的其余部分:
# Convolution + maxpool layer for each filter size
pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
# Convolution Layer
filter_shape = [filter_size, hidden_size, 1, num_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
self.lstm_outputs_expanded,
W,
strides=[1, 1, 1, 1],
padding="VALID",
name="conv")
# Apply nonlinearity
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# Maxpooling over the outputs
pooled = tf.nn.max_pool(
h,
ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")
pooled_outputs.append(pooled)
# Combine all the pooled features
num_filters_total = num_filters * len(filter_sizes)
self.h_pool = tf.concat(axis=3, values=pooled_outputs)
self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
# Dropout layer
with tf.name_scope("dropout"):
self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
# Final (unnormalized) scores and predictions
with tf.name_scope("output"):
# Standard output weights initialization
W = tf.get_variable(
"W",
shape=[num_filters_total, num_classes],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
# # Initialized output weights to 0.0, might improve accuracy
# W = tf.Variable(tf.constant(0.0, shape=[num_filters_total, num_classes]), name="W")
# b = tf.Variable(tf.constant(0.0, shape=[num_classes]), name="b")
l2_loss += tf.nn.l2_loss(W)
l2_loss += tf.nn.l2_loss(b)
self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
self.predictions = tf.argmax(self.scores, 1, name="predictions")
# Calculate mean cross-entropy loss
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss
# Accuracy
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
这是错误消息:
Traceback (most recent call last):
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1323, in _do_call
return fn(*args)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1302, in _run_fn
status, run_metadata)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits and labels must be same size: logits_size=[7550,2] labels_size=[50,2]
[[Node: loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](loss/Reshape, loss/Reshape_1)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train_upgraded.py", line 209, in <module>
train_step(x_batch, seqlen_batch, y_batch)
File "train_upgraded.py", line 177, in train_step
feed_dict)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits and labels must be same size: logits_size=[7550,2] labels_size=[50,2]
[[Node: loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](loss/Reshape, loss/Reshape_1)]]
Caused by op 'loss/SoftmaxCrossEntropyWithLogits', defined at:
File "train_upgraded.py", line 87, in <module>
l2_reg_lambda=FLAGS.l2_reg_lambda)
File "/media/hemant/MVV/MyValueVest-local/learning/Initial Embeddings/STEP 2 lstm-context-embeddings-master/model_upgraded.py", line 138, in __init__
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 1783, in softmax_cross_entropy_with_logits
precise_logits, labels, name=name)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 4364, in _softmax_cross_entropy_with_logits
name=name)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[7550,2] labels_size=[50,2]
[[Node: loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](loss/Reshape, loss/Reshape_1)]]
我所要做的就是将隐藏大小乘以 2,因为 birrectional RNN 的输出大小是 rnn 的两倍。
filter_shape = [filter_size, hidden_size*2, 1, num_filters]3
问题解决了。