torch.mul causes param.grad to be NoneType


class Net(torch.nn.Module):
def __init__(self, D_u, D_i, D_t, D_m):
super(Net, self).__init__()
self.lin_u = nn.Linear(D_u, 1)
self.lin_i = nn.Linear(D_i, 1)
self.lin_t = nn.Linear(D_t, 1)
self.lin_m = nn.Linear(D_m, 1)

self.output = nn.Linear(4, 1)
def forward(self, args):
(u, i, t, m) = args
u = F.relu(self.lin_u(u))
i = F.relu(self.lin_i(i))
t = F.relu(self.lin_t(t))
m = F.relu(self.lin_m(m))
out = torch.mul(u, i)
out = torch.mul(out, t)
out = torch.mul(out, m)
return out

我有一个简单的模型类,它有四个输入,每个输入都有自己的线性层。我希望输出是四个节点的乘积,但由于某种原因,无论我如何将它们相乘(使用torch.mul或*(,grad总是Nonetype:

model = Net(N, 3, T, 1)
u_block, i_block, t_block, m_block, y_block = get_data_new(data)
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
for t in range(5000):
y_pred = model((u_block, i_block, t_block, m_block))
loss = loss_fn(y_pred, y_block)
if t % 100 == 99:
print(t, loss.item())
model.zero_grad()
loss.backward()
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
TypeError                                 
--->   param -= learning_rate * param.grad
TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'

我已经将输入设置为requires_grad=True,我认为问题是out不是叶子,因此没有梯度,但我不知道如何解决这个问题。

编辑:

数据u_block、i_block、t_block、m_block、y_block如下所示。ublock、i_block和t_block是一个热点向量。

u_block:  tensor([[1., 0., 0.,  ..., 0., 0., 0.],
[1., 0., 0.,  ..., 0., 0., 0.],
[1., 0., 0.,  ..., 0., 0., 0.],
...,
[0., 0., 0.,  ..., 0., 0., 1.],
[0., 0., 0.,  ..., 0., 0., 1.],
[0., 0., 0.,  ..., 0., 0., 1.]], requires_grad=True)
i_block:  tensor([[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
...,
[0., 1., 0.],
[0., 1., 0.],
[0., 1., 0.]], requires_grad=True)
t_block:  tensor([[1., 0., 0.,  ..., 0., 0., 0.],
[0., 1., 0.,  ..., 0., 0., 0.],
[0., 0., 1.,  ..., 0., 0., 0.],
...,
[0., 0., 0.,  ..., 1., 0., 0.],
[0., 0., 0.,  ..., 0., 1., 0.],
[0., 0., 0.,  ..., 0., 0., 1.]], requires_grad=True)
m_block:  tensor([[ 0.0335],
[ 0.0000],
[ 0.0000],
...,
[ 0.1515],
[-0.2261],
[-0.0402]], requires_grad=True)
y_block:  tensor([[ 0.0000],
[ 0.0000],
[ 0.0000],
...,
[-0.2261],
[-0.0402],
[-0.1318]], requires_grad=True)```

进行以下更改。你没有使用self.output,所以我已经发表了评论。这将使渐变为"无",因为您在正向过程中没有使用它,并且默认情况下该层具有requires_grad=True。

class Net(torch.nn.Module):
def __init__(self, D_u, D_i, D_t, D_m):
super(Net, self).__init__()
self.lin_u = nn.Linear(D_u, 1)
self.lin_i = nn.Linear(D_i, 1)
self.lin_t = nn.Linear(D_t, 1)
self.lin_m = nn.Linear(D_m, 1)

# self.output = nn.Linear(4, 1)
def forward(self, args):
(u, i, t, m) = args
u = F.relu(self.lin_u(u))
i = F.relu(self.lin_i(i))
t = F.relu(self.lin_t(t))
m = F.relu(self.lin_m(m))
out = torch.mul(u, i)
out = torch.mul(out, t)
out = torch.mul(out, m)
return out

我希望这能解决你的问题。

此外,我还有一些建议,

  1. 将名称args更改为其他名称,或者如果您想使用它,则通过更改为*args来充分使用它
  2. 对于输入,不要放置requiresgrad参数。因为它将计算d_ Loss/d_input。(只有在你无意的情况下才这样做(

相关内容

最新更新