我最近升级了我的钻机,并在我的1080 ti旁边添加了1070 ti以加速训练。对于常规模型,我能够更快地训练,这要归功于此。相反,我正在尝试训练一个在单个GPU上工作的GAN算法,但我无法让它在多GPU设置上工作。我覆盖tf.keras.Model,并使用自定义train_step和其他一些功能。对于我的生命,如果没有得到以下错误,我无法使此正常运行:
tensorflow.python.framework.errors_impl.InvalidArgumentError: 3 root error(s) found.
(0) Invalid argument: Incompatible shapes: [8] vs. [16]
[[node add (defined at Users<User>OneDriveDocumentenHKUYear 4PDP_and_SNSupportive NarrativeResearchAlpha_2libNN.py:120) ]]
[[replica_1/sequential_1/batch_normalization_10/Greater/_96]]
(1) Invalid argument: Incompatible shapes: [8] vs. [16]
[[node add (defined at Users<User>OneDriveDocumentenHKUYear 4PDP_and_SNSupportive NarrativeResearchAlpha_2libNN.py:120) ]]
[[Adam_1/AddN/_140]]
(2) Invalid argument: Incompatible shapes: [8] vs. [16]
[[node add (defined at Users<User>OneDriveDocumentenHKUYear 4PDP_and_SNSupportive NarrativeResearchAlpha_2libNN.py:120) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_18178]
我使用以下命令创建我的模型:
class GAN_Model(tf.keras.Model):
def __init__(self, generator, discriminator, latent_dim, batch_size):
super(GAN_Model, self).__init__()
self.discriminator = discriminator
self.generator = generator
self.latent_dim = latent_dim
self.batch_size = batch_size
def compile(self, discriminator_optimizer, generator_optimizer, loss_function):
super(GAN_Model, self).compile()
self.discriminator_optimizer = discriminator_optimizer
self.generator_optimizer = generator_optimizer
self.loss_function = loss_function
def generator_loss(self, cross_entropy,fake_output):
return cross_entropy(tf.ones_like(fake_output), fake_output)
def discriminator_loss(self, cross_entropy, real_output, fake_output):
real_loss = cross_entropy(tf.ones_like(real_output), real_output)
fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
total_loss = real_loss + fake_loss
return total_loss
def train_step(self, real_audio):
random_latent_vectors = tf.random.normal(shape=(self.batch_size, self.latent_dim))
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
generated_images = self.generator(random_latent_vectors, training = True)
real_output = self.discriminator(real_audio[0], training = True)
fake_output = self.discriminator(generated_images, training = True)
g_loss = self.generator_loss(self.loss_function, fake_output)
d_loss = self.discriminator_loss(self.loss_function, real_output, fake_output)
gradients_of_generator = gen_tape.gradient(g_loss, self.generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(d_loss, self.discriminator.trainable_variables)
self.generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
self.discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_variables))
return {"d_loss": d_loss, "g_loss": g_loss, "prediction": generated_images}
mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"],cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
with mirrored_strategy.scope():
generator = make_generator(latent_dim)
discriminator = make_discriminator(spectral_size)
g_opt = tf.keras.optimizers.Adam(0.0001,beta_1=0.5)
d_opt = tf.keras.optimizers.Adam(0.00012,beta_1=0.5)
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True,reduction=tf.keras.losses.Reduction.NONE)
gan = GAN_Model(generator,discriminator,latent_dim,batch_size)
gan.compile(
d_opt,
g_opt,
loss_fn,
)
ckpt = tf.train.Checkpoint(generator_optimizer=g_opt,
discriminator_optimizer=d_opt,
generator=generator,
disciminator=discriminator)
manager = tf.train.CheckpointManager(ckpt, ".\data\checkpoints\" + str(model_name), max_to_keep=15)
if restore_model:
ckpt.restore(manager.latest_checkpoint)
dataset = tf.data.experimental.load(dataset_dir,(tf.TensorSpec(shape=(spectral_size[0],spectral_size[1],spectral_size[2]), dtype=tf.double), tf.TensorSpec(shape=(2), dtype=tf.double)),compression="GZIP").batch(batch_size)
print(dataset)
history = gan.fit(dataset, epochs=epochs, callbacks=[generate_and_save_audio(manager,model_name)])
代码比这更扩展,但这里的某个地方应该是问题的本质。谢谢!
明白了!因为对于每个gpu,它会将批处理大小除以它(所以在我的情况下是两个gpu,它会将批处理大小除以2)。对于模型,我必须将我的输入batchsize除以2,但对于创建数据集,我可以保持batch_size相同的大小。