Dreambooth-RuntimeError:预期所有张量都在同一个设备上,但发现至少有两个设备,cpu和cuda:0



我正试图通过Dreambooth训练一个模型,但遇到了这个问题。我一直在寻找解决方案,但似乎都不起作用。我读到添加";。到(设备(";到变量有帮助,但我不确定在哪里添加它们。我想知道是否有人能通过下面的错误告诉我们在哪里添加它们,或者是否有人对如何修复它有建议。我是一个初学者,所以请耐心等待。

Traceback (most recent call last):
File "main.py", line 835, in <module>
trainer.fit(model, data)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningtrainertrainer.py", line 741, in fit
self._call_and_handle_interrupt(
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningtrainertrainer.py", line 686, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningtrainertrainer.py", line 778, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningtrainertrainer.py", line 1200, in _run
self._dispatch()
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningtrainertrainer.py", line 1280, in _dispatch
self.training_type_plugin.start_training(self)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningpluginstraining_typetraining_type_plugin.py", line 202, in start_training
self._results = trainer.run_stage()
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningtrainertrainer.py", line 1290, in run_stage
return self._run_train()
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningtrainertrainer.py", line 1312, in _run_train
self._run_sanity_check(self.lightning_module)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningtrainertrainer.py", line 1376, in _run_sanity_check
self._evaluation_loop.run()
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningloopsbase.py", line 145, in run
self.advance(*args, **kwargs)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningloopsdataloaderevaluation_loop.py", line 110, in advance
dl_outputs = self.epoch_loop.run(dataloader, dataloader_idx, dl_max_batches, self.num_dataloaders)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningloopsbase.py", line 145, in run
self.advance(*args, **kwargs)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningloopsepochevaluation_epoch_loop.py", line 122, in advance
output = self._evaluation_step(batch, batch_idx, dataloader_idx)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningloopsepochevaluation_epoch_loop.py", line 217, in _evaluation_step
output = self.trainer.accelerator.validation_step(step_kwargs)
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningacceleratorsaccelerator.py", line 236, in validation_step
return self.training_type_plugin.validation_step(*step_kwargs.values())
File "C:UsersUserminiconda3envsldmlibsite-packagespytorch_lightningpluginstraining_typetraining_type_plugin.py", line 219, in validation_step
return self.model.validation_step(*args, **kwargs)
File "C:UsersUserminiconda3envsldmlibsite-packagestorchautogradgrad_mode.py", line 28, in decorate_context
return func(*args, **kwargs)
File "C:Dreambooth-SD-optimizedldmmodelsdiffusionddpm.py", line 368, in validation_step
_, loss_dict_no_ema = self.shared_step(batch)
File "C:Dreambooth-SD-optimizedldmmodelsdiffusionddpm.py", line 908, in shared_step
loss = self(x, c)
File "C:UsersUserminiconda3envsldmlibsite-packagestorchnnmodulesmodule.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "C:Dreambooth-SD-optimizedldmmodelsdiffusionddpm.py", line 937, in forward
c = self.get_learned_conditioning(c)
File "C:Dreambooth-SD-optimizedldmmodelsdiffusionddpm.py", line 595, in get_learned_conditioning
c = self.cond_stage_model.encode(c, embedding_manager=self.embedding_manager)
File "C:Dreambooth-SD-optimizedldmmodulesencodersmodules.py", line 324, in encode
return self(text, **kwargs)
File "C:UsersUserminiconda3envsldmlibsite-packagestorchnnmodulesmodule.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "C:Dreambooth-SD-optimizedldmmodulesencodersmodules.py", line 319, in forward
z = self.transformer(input_ids=tokens, **kwargs)
File "C:UsersUserminiconda3envsldmlibsite-packagestorchnnmodulesmodule.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "C:Dreambooth-SD-optimizedldmmodulesencodersmodules.py", line 297, in transformer_forward
return self.text_model(
File "C:UsersUserminiconda3envsldmlibsite-packagestorchnnmodulesmodule.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "C:Dreambooth-SD-optimizedldmmodulesencodersmodules.py", line 258, in text_encoder_forward
hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids, embedding_manager=embedding_manager)
File "C:UsersUserminiconda3envsldmlibsite-packagestorchnnmodulesmodule.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "C:Dreambooth-SD-optimizedldmmodulesencodersmodules.py", line 180, in embedding_forward
inputs_embeds = self.token_embedding(input_ids)
File "C:UsersUserminiconda3envsldmlibsite-packagestorchnnmodulesmodule.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "C:UsersUserminiconda3envsldmlibsite-packagestorchnnmodulessparse.py", line 158, in forward
return F.embedding(
File "C:UsersUserminiconda3envsldmlibsite-packagestorchnnfunctional.py", line 2044, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper__index_select)

任何帮助都将不胜感激!

如果运行--lowvram/--medvram和其他几个命令行参数,SD中有一大堆函数将无法工作。多台设备上的张量意味着,由于您的设置,您至少在cpu上进行了部分协同处理。要测试哪种设置,如果你不确定,不要浪费时间在dreambooth进行故障排除,请使用文本反转训练器。它会出于同样的原因引发同样的崩溃。我必须禁用--medvram来创建一个嵌入,所以用它来解决问题。尝试创建一个嵌入,更改设置,重复,直到你找到所有触发cpu/cuda错误的选项

相关内容

最新更新