我不久前才开始研究张量流。我正在研究 seq2seq 模型,并以某种方式让教程工作,但我无法获取每个句子的状态。
据我了解,seq2seq 模型采用输入序列并通过 RNN 为序列生成隐藏状态。稍后,模型使用序列的隐藏状态生成新的数据序列。
我的问题是,如果我想直接使用输入序列的隐藏状态,该怎么办? 例如,如果我有一个经过训练的模型,我应该如何获得输入序列 [token1, token2,....,token N] 的最终隐藏状态?
我已经坚持了 2 天,我尝试了许多不同的方法,但没有一种有效。
在seq2seq模型中,编码器始终是一个RNN,通过rnn.rnn调用。
对 rnn.rnn 的调用返回输出和状态,因此要获取状态,您可以这样做:
_, encoder_state = rnn.rnn(encoder_cell, encoder_inputs, dtype=dtype)
在seq2seq模块中以相同的方式完成 https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py#L103。
好吧,我想我的问题是我真的不知道如何以张量流风格编码,所以我有点暴力破解它。
(*代表修改地点)
在 python/ops/seq2seq 中,修改 model_with_buckets()
outputs = []
*states = []
with ops.op_scope(all_inputs, name, "model_with_buckets"):
for j in xrange(len(buckets)):
if j > 0:
vs.get_variable_scope().reuse_variables()
bucket_encoder_inputs = [encoder_inputs[i]
for i in xrange(buckets[j][0])]
bucket_decoder_inputs = [decoder_inputs[i]
for i in xrange(buckets[j][1])]
*bucket_outputs, _ ,bucket_states= seq2seq(bucket_encoder_inputs,
bucket_decoder_inputs)
outputs.append(bucket_outputs)
states.append(bucket_states)
bucket_targets = [targets[i] for i in xrange(buckets[j][1])]
bucket_weights = [weights[i] for i in xrange(buckets[j][1])]
losses.append(sequence_loss(
outputs[-1], bucket_targets, bucket_weights, num_decoder_symbols,
softmax_loss_function=softmax_loss_function))
return outputs, losses,*states
在 python/ops/seq2seq 中,modify embedding_attention_seq2seq()
if isinstance(feed_previous, bool):
* outputs, states = embedding_attention_decoder(
decoder_inputs, encoder_states[-1], attention_states, cell,
num_decoder_symbols, num_heads, output_size, output_projection,
feed_previous)
* return outputs, states, tf.constant(encoder_states[-1])
else: # If feed_previous is a Tensor, we construct 2 graphs and use cond.
outputs1, states1 = embedding_attention_decoder(
decoder_inputs, encoder_states[-1], attention_states, cell,
num_decoder_symbols, num_heads, output_size, output_projection, True)
vs.get_variable_scope().reuse_variables()
outputs2, states2 = embedding_attention_decoder(
decoder_inputs, encoder_states[-1], attention_states, cell,
num_decoder_symbols, num_heads, output_size, output_projection, False)
outputs = control_flow_ops.cond(feed_previous,
lambda: outputs1, lambda: outputs2)
states = control_flow_ops.cond(feed_previous,
lambda: states1, lambda: states2)
*return outputs, states, tf.constant(encoder_states[-1])
at model/rnn/translate/seq2seq_model.py modify init()
if forward_only:
* self.outputs, self.losses, self.states = seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, self.target_vocab_size,
lambda x, y: seq2seq_f(x, y, True),
softmax_loss_function=softmax_loss_function)
# If we use output projection, we need to project outputs for decoding.
if output_projection is not None:
for b in xrange(len(buckets)):
self.outputs[b] = [tf.nn.xw_plus_b(output, output_projection[0],
output_projection[1])
for output in self.outputs[b]]
else:
* self.outputs, self.losses,_ = seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, self.target_vocab_size,
lambda x, y: seq2seq_f(x, y, False),
softmax_loss_function=softmax_loss_function)
在 model/rnn/translate/seq2seq_model.py 修改步骤()
if not forward_only:
return outputs[1], outputs[2], None # Gradient norm, loss, no outputs.
else:
*return None, outputs[0], outputs[1:-1], outputs[-1]
完成所有这些操作后,我们可以通过调用来获取编码状态:
_, _, _,states = model.step(all_other_arguements, forward_only = True)
Bearsteak上面的答案很棒,但它基于TensorFlow-0.6,这已经过时了。所以我在 tensorflow-0.8 中更新了他的答案,这也类似于最新版本中的答案。
(*代表修改地点)
losses = []
outputs = []
*states = []
with ops.op_scope(all_inputs, name, "model_with_buckets"):
for j, bucket in enumerate(buckets):
with variable_scope.variable_scope(variable_scope.get_variable_scope(),
reuse=True if j > 0 else None):
*bucket_outputs, _ ,bucket_states= seq2seq(encoder_inputs[:bucket[0]],
decoder_inputs[:bucket[1]])
outputs.append(bucket_outputs)
if per_example_loss:
losses.append(sequence_loss_by_example(
outputs[-1], targets[:bucket[1]], weights[:bucket[1]],
softmax_loss_function=softmax_loss_function))
else:
losses.append(sequence_loss(
outputs[-1], targets[:bucket[1]], weights[:bucket[1]],
softmax_loss_function=softmax_loss_function))
return outputs, losses, *states
在 python/ops/seq2seq 中,modify embedding_attention_seq2seq()
if isinstance(feed_previous, bool):
*outputs, states = embedding_attention_decoder(
decoder_inputs, encoder_state, attention_states, cell,
num_decoder_symbols, embedding_size, num_heads=num_heads,
output_size=output_size, output_projection=output_projection,
feed_previous=feed_previous,
initial_state_attention=initial_state_attention)
*return outputs, states, encoder_state
# If feed_previous is a Tensor, we construct 2 graphs and use cond.
def decoder(feed_previous_bool):
reuse = None if feed_previous_bool else True
with variable_scope.variable_scope(variable_scope.get_variable_scope(),reuse=reuse):
outputs, state = embedding_attention_decoder(
decoder_inputs, encoder_state, attention_states, cell,
num_decoder_symbols, embedding_size, num_heads=num_heads,
output_size=output_size, output_projection=output_projection,
feed_previous=feed_previous_bool,
update_embedding_for_previous=False,
initial_state_attention=initial_state_attention)
return outputs + [state]
outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False))
*return outputs_and_state[:-1], outputs_and_state[-1], encoder_state
在 model/rnn/translate/seq2seq_model.py 修改 init()
if forward_only:
*self.outputs, self.losses, self.states= tf.nn.seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
softmax_loss_function=softmax_loss_function)
# If we use output projection, we need to project outputs for decoding.
if output_projection is not None:
for b in xrange(len(buckets)):
self.outputs[b] = [
tf.matmul(output, output_projection[0]) + output_projection[1]
for output in self.outputs[b]
]
else:
*self.outputs, self.losses, _ = tf.nn.seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets,
lambda x, y: seq2seq_f(x, y, False),
softmax_loss_function=softmax_loss_function)
在 model/rnn/translate/seq2seq_model.py 修改步骤()
if not forward_only:
return outputs[1], outputs[2], None # Gradient norm, loss, no outputs.
else:
*return None, outputs[0], outputs[1:], outputs[-1] # No gradient norm, loss, outputs.
完成所有这些操作后,我们可以通过调用来获取编码状态:
_, _, output_logits, states = model.step(sess, encoder_inputs, decoder_inputs,
target_weights, bucket_id, True)
print (states)
在 translate.py 中。