如何解决"ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef"



我正在学习TensorFlow_Federated:custom_fedrated_algorithms_2的教程。当我只是复制并运行教程的代码时,一切都会正常工作。所以我想自己修改代码,以便更熟悉tff。然后bug出现了。

我的运行时环境:

python:3.8.12

tensorflow:2.5.0

tensorflow_federated:0.19.0

下面的代码是教程中测试模型的原始代码:

MODEL_SPEC = collections.OrderedDict(
weights=tf.TensorSpec(shape=[784, 10], dtype=tf.float32),
bias=tf.TensorSpec(shape=[10], dtype=tf.float32))
MODEL_TYPE = tff.to_type(MODEL_SPEC)
print(MODEL_TYPE) # <weights=float32[784,10],bias=float32[10]>

BATCH_SPEC = collections.OrderedDict(
x=tf.TensorSpec(shape=[None, 784], dtype=tf.float32),
y=tf.TensorSpec(shape=[None], dtype=tf.int32)
)
BATCH_TYPE = tff.to_type(BATCH_SPEC)
print(BATCH_TYPE) # <x=float32[?,784],y=int32[?]>

我把MODEL_TYPE改成:

MODEL_SPEC = collections.OrderedDict(
fc1=tf.TensorSpec(shape=[784, 256], dtype=tf.float32),
b1=tf.TensorSpec(shape=[256], dtype=tf.float32),
fc2=tf.TensorSpec(shape=[256, 128], dtype=tf.float32),
b2=tf.TensorSpec(shape=[128], dtype=tf.float32),
fc3=tf.TensorSpec(shape=[128, 10], dtype=tf.float32),
b3=tf.TensorSpec(shape=[10], dtype=tf.float32)
)
MODEL_TYPE = tff.to_type(MODEL_SPEC)

由于模型结构的改变,向前传球的过程也需要改变:

# original
@tf.function
def forward_pass(model, batch):
predicted_y = tf.nn.softmax(
tf.matmul(batch['x'], model['weights']) + model['bias'])
return -tf.reduce_mean(
tf.reduce_sum(
tf.one_hot(batch['y'], 10) * tf.math.log(predicted_y), axis=[1]))
@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
return forward_pass(model, batch)
# new 
@tf.function
def forward(model, batch):
logits = batch["x"] @ model["fc1"] + model["b1"]
logits = logits @ model["fc2"] + model["b2"]
logits = logits @ model["fc3"] + model["b3"]
logits = tf.nn.softmax(logits, axis=-1,)

one_hot_y = tf.one_hot(batch["y"], depth=10)
return -tf.reduce_mean(tf.reduce_sum(tf.math.log(logits) * one_hot_y, axis=[1]))

@tff.tf_computation(MODEL_TYPE, BATCH_TYPE)
def batch_loss(model, batch):
return forward(model, batch)

我没有更改batch_train()代码。

@tff.tf_computation(MODEL_TYPE, BATCH_TYPE, tf.float32)
def batch_train(initial_model, batch, learning_rate):
# Define a group of model variables and set them to `initial_model`. Must
# be defined outside the @tf.function.
model_vars = collections.OrderedDict([
(name, tf.Variable(name=name, initial_value=value))
for name, value in initial_model.items()
])
optimizer = tf.keras.optimizers.SGD(learning_rate)
@tf.function
def _train_on_batch(model_vars, batch):
# Perform one step of gradient descent using loss from `batch_loss`.
with tf.GradientTape() as tape:
loss = forward_pass(model_vars, batch)
grads = tape.gradient(loss, model_vars)
optimizer.apply_gradients(
zip(tf.nest.flatten(grads), tf.nest.flatten(model_vars)))
return model_vars
return _train_on_batch(model_vars, batch)

到目前为止效果良好。但在实现local_train()部分时,即使我只是使用原始代码,也会出现错误。

initial_model = collections.OrderedDict(
fc1=tf.zeros([784, 256]),
b1=tf.zeros([256]),
fc2=tf.zeros([256,128]),
b2=tf.zeros([128]),
fc3=tf.zeros([128, 10]),
b3=tf.zeros([10])
)
LOCAL_DATA_TYPE = tff.SequenceType(BATCH_TYPE)
@tff.federated_computation(MODEL_TYPE, tf.float32, LOCAL_DATA_TYPE)
def local_train(initial_model, learning_rate, all_batches):
@tff.tf_computation(LOCAL_DATA_TYPE, tf.float32)
def _insert_learning_rate_to_sequence(dataset, learning_rate):
return dataset.map(lambda x: (x, learning_rate))
batches_with_learning_rate = _insert_learning_rate_to_sequence(all_batches, learning_rate)
# Mapping function to apply to each batch.
@tff.federated_computation(MODEL_TYPE, batches_with_learning_rate.type_signature.element)
def batch_fn(model, batch_with_lr):
batch, lr = batch_with_lr
return batch_train(model, batch, lr)
return tff.sequence_reduce(batches_with_learning_rate, initial_model, batch_fn)
locally_trained_model = local_train(initial_model, 1e-1, mnist_train_dataset[5])
# ValueError: Unable to unpack value [] as a tf.compat.v1.GraphDef

我在快速浏览(没有筛选所有粘贴的代码)时注意到的一个问题是:

return batch_train(model, batch, lr)

要在tff.federated_computation的上下文中调用tff.tf_computation,需要使用tff.federated_map运算符。所以它可能看起来像

return tff.federated_map(batch_train, (model, batch, lr))

最后,我发现我犯了一个低级错误。‍♂️这是我在我的自定义jupyter笔记本上编码的,但一开始就忘记在教程中添加以下关键代码:

executor_factory = tff.framework.local_executor_factory(
support_sequence_ops=True
)
execution_context = tff.framework.ExecutionContext(
executor_fn=executor_factory
)
tff.framework.set_default_context(execution_context)

最新更新