RuntimeError:试图为Adam实例捕获step()的CUDA图形,但此实例是用capture =False构造



当我使用Python和Pytorch库在Windows中的Google Colab Pro中运行此代码!python train.py(KiU-Net 3D)时,我得到一个错误,如下:

File "train.py", line 86, in <module>
opt.step()
File "/usr/local/lib/python3.8/dist-packages/torch/optim/lr_scheduler.py", line 68, in wrapper
return wrapped(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/optim/optimizer.py", line 140, in wrapper
out = func(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/optim/optimizer.py", line 23, in _use_grad
ret = func(self, *args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/optim/adam.py", line 178, in step
self._cuda_graph_capture_health_check()
File "/usr/local/lib/python3.8/dist-packages/torch/optim/optimizer.py", line 103, in _cuda_graph_capture_health_check
raise RuntimeError("Attempting CUDA graph capture of step() for an instance of " +
RuntimeError: Attempting CUDA graph capture of step() for an instance of Adam but this instance was constructed with capturable=False.

这是train.py文件,我在opt.step():

中得到错误。
import os
from time import time
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
from visdom import Visdom
from dataset.dataset import Dataset
from loss.Dice import DiceLoss
from loss.ELDice import ELDiceLoss
from loss.WBCE import WCELoss
from loss.Jaccard import JaccardLoss
from loss.SS import SSLoss
from loss.Tversky import TverskyLoss
from loss.Hybrid import HybridLoss
from loss.BCE import BCELoss
from net.models import net
import parameter as para
viz = Visdom(port=666)
step_list = [0]
win = viz.line(X=np.array([0]), Y=np.array([1.0]), opts=dict(title='loss'))

os.environ['CUDA_VISIBLE_DEVICES'] = para.gpu
cudnn.benchmark = para.cudnn_benchmark

net = torch.nn.DataParallel(net).cuda()
net.train()
print(para.training_set_path)
train_ds = Dataset(os.path.join(para.train_ct_path, 'ct'), os.path.join(para.train_seg_path, 'seg'))

train_dl = DataLoader(train_ds, para.batch_size, True, num_workers=para.num_workers, pin_memory=para.pin_memory)

loss_func_list = [DiceLoss(), ELDiceLoss(), WCELoss(), JaccardLoss(), SSLoss(), TverskyLoss(), HybridLoss(), BCELoss()]
loss_func = loss_func_list[5]

opt = torch.optim.Adam(net.parameters(), lr=para.learning_rate)

lr_decay = torch.optim.lr_scheduler.MultiStepLR(opt, para.learning_rate_decay)

alpha = para.alpha
start = time()
for epoch in range(para.Epoch):
lr_decay.step()
mean_loss = []
for step, (ct, seg) in enumerate(train_dl):
ct = ct.cuda()
seg = seg.cuda()
outputs = net(ct)
print(len(outputs))
loss1 = loss_func(outputs[0], seg)
loss2 = loss_func(outputs[1], seg)
loss3 = loss_func(outputs[2], seg)
loss4 = loss_func(outputs[3], seg)
loss = (loss1 + loss2 + loss3) * alpha + loss4
mean_loss.append(loss4.item())
opt.zero_grad()
loss.backward()
# opt.param_groups[0]['capturable'] = True #https://github.com/pytorch/pytorch/issues/80809
opt.step()

if step % 5 is 0:

step_list.append(step_list[-1] + 1)
viz.line(X=np.array([step_list[-1]]), Y=np.array([loss4.item()]), win=win, update='append')

print('epoch:{}, step:{}, loss1:{:.3f}, loss2:{:.3f}, loss3:{:.3f}, loss4:{:.3f}, time:{:.3f} min'
.format(epoch, step, loss1.item(), loss2.item(), loss3.item(), loss4.item(), (time() - start) / 60))
mean_loss = sum(mean_loss) / len(mean_loss)

if epoch % 50 is 0 :

torch.save(net.state_dict(), '/content/drive/MyDrive/CS/Models/KiU-Net/LiTS/saved_networks/net{}-{:.3f}-{:.3f}.pth'.format(epoch, loss, mean_loss))

if epoch % 40 is 0 and epoch is not 0:
alpha *= 0.8

我希望将神经网络作为正常训练运行。这是KiU-Net 3D的架构,以防出现错误(models.py):

import os
import sys
sys.path.append(os.path.split(sys.path[0])[0])
import torch
import torch.nn as nn
import torch.nn.functional as F
import parameter as para
class segnet(nn.Module):
def __init__(self, training):
super(segnet, self).__init__()
self.training = training
self.encoder1 = nn.Conv3d(1, 32, 3, stride=1, padding=1)  # b, 16, 10, 10
self.encoder2=   nn.Conv3d(32, 64, 3, stride=1, padding=1)  # b, 8, 3, 3
self.encoder3=   nn.Conv3d(64, 128, 3, stride=1, padding=1)
self.encoder4=   nn.Conv3d(128, 256, 3, stride=1, padding=1)
self.encoder5=   nn.Conv3d(256, 512, 3, stride=1, padding=1)

self.decoder1 = nn.Conv3d(512, 256, 3, stride=1,padding=1)  # b, 16, 5, 5
self.decoder2 =   nn.Conv3d(256, 128, 3, stride=1, padding=1)  # b, 8, 15, 1
self.decoder3 =   nn.Conv3d(128, 64, 3, stride=1, padding=1)  # b, 1, 28, 28
self.decoder4 =   nn.Conv3d(64, 32, 3, stride=1, padding=1)
self.decoder5 =   nn.Conv3d(32, 3, 3, stride=1, padding=1) # self.decoder5 =   nn.Conv3d(32, 2, 3, stride=1, padding=1)

self.map4 = nn.Sequential(
nn.Conv3d(3, 3, 1, 1), #nn.Conv3d(2, 1, 1, 1),
nn.Upsample(scale_factor=(1, 1, 1), mode='trilinear'), # nn.Upsample(scale_factor=(1, 2, 2), mode='trilinear')
nn.Sigmoid()
)
# 128*128 尺度下的映射
self.map3 = nn.Sequential(
nn.Conv3d(64, 3, 1, 1), #nn.Conv3d(64, 1, 1, 1)
nn.Upsample(scale_factor=(4, 4, 4), mode='trilinear'), #nn.Upsample(scale_factor=(4, 8, 8), mode='trilinear')
nn.Sigmoid()
)
# 64*64 尺度下的映射
self.map2 = nn.Sequential(
nn.Conv3d(128, 3, 1, 1), #nn.Conv3d(128, 1, 1, 1)
nn.Upsample(scale_factor=(8, 8, 8), mode='trilinear'), #nn.Upsample(scale_factor=(8, 16, 16), mode='trilinear')
nn.Sigmoid()
)
# 32*32 尺度下的映射
self.map1 = nn.Sequential(
nn.Conv3d(256, 3, 1, 1), #nn.Conv3d(256, 1, 1, 1)
nn.Upsample(scale_factor=(16, 16, 16), mode='trilinear'), #nn.Upsample(scale_factor=(16, 32, 32), mode='trilinear'),
nn.Sigmoid()
)
self.soft = nn.Softmax(dim =1)
def forward(self, x):
out = F.relu(F.max_pool3d(self.encoder1(x),2,2))
t1 = out
out = F.relu(F.max_pool3d(self.encoder2(out),2,2))
t2 = out
out = F.relu(F.max_pool3d(self.encoder3(out),2,2))
t3 = out
out = F.relu(F.max_pool3d(self.encoder4(out),2,2))
t4 = out
out = F.relu(F.max_pool3d(self.encoder5(out),2,2))

# t2 = out
out = F.relu(F.interpolate(self.decoder1(out),scale_factor=(2,2,2),mode ='trilinear'))
# print(out.shape,t4.shape)
out = torch.add(F.pad(out,[0,0,0,0,0,1]),t4)
output1 = self.map1(out)
out = F.relu(F.interpolate(self.decoder2(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t3)
output2 = self.map2(out)
out = F.relu(F.interpolate(self.decoder3(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t2)
output3 = self.map3(out)
out = F.relu(F.interpolate(self.decoder4(out),scale_factor=(2,2,2),mode ='trilinear'))
# out = torch.add(out,t1)

out = F.relu(F.interpolate(self.decoder5(out),scale_factor=(2,2,2),mode ='trilinear'))
output4 = self.map4(out)
# print(out.shape)
# print(output1.shape,output2.shape,output3.shape,output4.shape)
if self.training is True:
return output1, output2, output3, output4
else:
return output4
def init(module):
if isinstance(module, nn.Conv3d) or isinstance(module, nn.ConvTranspose3d):
nn.init.kaiming_normal_(module.weight.data, 0.25)
nn.init.constant_(module.bias.data, 0)

net =segnet(training=True)
net.apply(init)
# 计算网络参数 (Calculating network parameters)
print('net total parameters:', sum(param.numel() for param in net.parameters()))

这就是我如何计算损失函数(Tversky.py):

"""
Tversky loss
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
class TverskyLoss(nn.Module):

def __init__(self):
super().__init__()
def forward(self, pred, target):
smooth = 1e-7
pred = pred.squeeze()
target = target.squeeze()
pred = torch.argmax(pred, dim=0).flatten().to(torch.int64)
target = target.flatten().to(torch.int64)
# print(pred.shape)
# print(target.shape)
pred_f = F.one_hot(pred,  num_classes=3)[...,1:]
target_f = F.one_hot(target, num_classes=3)[...,1:]
# print(pred_f.shape)
# print(target_f.shape)
intersection = torch.sum(pred_f * target_f, axis=1)
denominator = torch.sum(pred_f + target_f, axis=-1)
# print(intersection.shape)
# print(denom.shape)
dice = 1 - torch.mean((2. * intersection / (denominator + smooth)))
dice.requires_grad = True
return dice 

为什么

火炬优化器抛出Attempting CUDA graph capture of step()...如果他们检测到当前CUDA流(在GPU上运行的操作队列)正在积极记录("捕获")操作。

因为你在opt.step()期间看到这个错误,在opt.step()之前运行的一些代码可能是为当前(默认)流启动CUDA图形/流捕获,但从未关闭捕获模式。

可以在opt.step()之前用print("Capturing:", torch.cuda.is_current_stream_capturing())验证这个假设。根据错误信息,这将打印Capturing: True,但您需要Capturing: False

您可以使用print("Capturing:", torch.cuda.is_current_stream_capturing())来缩小代码的哪一部分是初始化图形/流捕获。

修复吗?

如果不知道代码的哪一部分正在启动图形/流捕获,就很难提出一个优雅的修复方法。

如果流捕获发生在训练循环之前,您可以通过将训练循环包装在专用流(不会处于捕获模式)中来解决这个问题:

training_loop_stream = torch.cuda.Stream()
torch.cuda.synchronize() # make sure model is on device
with torch.cuda.stream(training_loop_stream):
# training loop, now using this dedicated stream...

最新更新