Pytorch LSTM-VAE语句生成器:运行时间错误:梯度计算所需的变量之一已通过就地操作进行了修改



我正在尝试制作一个LSTM VAE,作为未来使用pytorch的学习阶段。我设法让它在一些小的测试数据上运行,但现在我想在我的实际数据上运行它,我不断地收到这个错误:

RuntimeError:梯度计算所需的变量之一已通过就地操作修改:[torch.FloatTensor[10,40]],它是TBackward的输出0,版本为2;应为版本1。提示:上面的回溯显示了未能计算其梯度的操作。有问题的变量在那里或以后的任何地方都发生了更改。祝你好运

任何关于如何解决这个错误的帮助,通常只是让我的代码更有效率,都会很棒这是回溯的适当部分:

File "<ipython-input-16-7fe0e9e30e5d>", line 190, in <module>
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
File "<ipython-input-16-7fe0e9e30e5d>", line 166, in train_batch
reconstruction, hidden, kld = model(x, G_inp, None, None)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "<ipython-input-16-7fe0e9e30e5d>", line 93, in forward
mu, logvar, z = self.encoder(x)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "<ipython-input-16-7fe0e9e30e5d>", line 37, in forward
out1, self.hidden  = self.lstm(x, self.hidden)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py", line 727, in _call_impl
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py", line 582, in forward
self.dropout, self.training, self.bidirectional, self.batch_first)
(Triggered internally at  /pytorch/torch/csrc/autograd/python_anomaly_mode.cpp:104.)
allow_unreachable=True)  # allow_unreachable flag

代码如下:(原谅所有的.clone((,我读到这可能是一个解决方案,所以我在没有帮助的情况下到处测试它(

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
from keras.preprocessing.text import Tokenizer
from keras import preprocessing
import torch.nn.functional as F
import math
import random


#encoder
class Encoder(nn.Module):
def __init__(self,embedding_dim, vocab_size,  n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size):
super(Encoder,self).__init__()

self.n_layers_E = n_layers_E
self.n_hidden_E = n_hidden_E
self.batch_size = batch_size
self.dim_z = dim_z

self.lstm = nn.LSTM(input_size = embedding_dim, hidden_size = n_hidden_E, num_layers = n_layers_E, batch_first=True, bidirectional = True)
self.hidden_to_mu = nn.Linear(2*n_hidden_E,dim_z)
self.hidden_to_logvar = nn.Linear(2*n_hidden_G, dim_z)

self.hidden = (torch.zeros(2*n_layers_E, batch_size, n_hidden_E),torch.zeros(2*n_layers_E, batch_size, n_hidden_E))

def forward(self,x):

batch_size, n_seq, n_embed = x.size()
#batch_size, n_seq = x.size()
out1, self.hidden  = self.lstm(x, self.hidden)
e_hidden = self.hidden[0].view(batch_size, 2 * self.n_hidden_E).clone()
#e_hidden = torch.cat(list(hidden),dim = 0)
mu = self.hidden_to_mu(e_hidden)
logvar = self.hidden_to_logvar(e_hidden)
epsilon = torch.randn([batch_size, self.dim_z]) 
z = mu + torch.exp(logvar*0.5)*epsilon 

return mu, logvar, z

class Generator(nn.Module):
def __init__(self,n_hidden_G,n_layers_G, embedding_dim, dim_z, vocab_size , batch_size):
super(Generator,self).__init__()

self.n_hidden_G =  n_hidden_G
self.n_layers_G = n_layers_G
self.n_z = dim_z
self.batch_size = batch_size
self.LSTM = nn.LSTM(input_size = embedding_dim + dim_z, hidden_size = n_hidden_G, num_layers = n_layers_G, batch_first = True)
self.fc = nn.Linear(n_hidden_G, vocab_size)

self.hidden = (torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
,torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G))

def forward(self,x,z, g_hidden = None):

batch_size,n_seq, n_embed = x.size()
#batch_size, n_seq= x.size()
z = torch.cat([z]*n_seq,1).view(batch_size, n_seq, self.n_z)
x = torch.cat([x,z], dim = 2)

if g_hidden is None:                                        #if we are validating
self.hidden = (torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G)
,torch.zeros(self.n_layers_G, batch_size, self.n_hidden_G))
else:                                                       #if we are training
self.hidden = g_hidden

output, self.hidden = self.LSTM(x, self.hidden)
output = self.fc(output)

return output, self.hidden

class VAE(nn.Module):
def __init__(self, embedding_dim, vocab_size,  n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size,n_layers_G ):
super(VAE, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.encoder = Encoder(embedding_dim, vocab_size,  n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size)
self.generator = Generator(n_hidden_G,n_layers_G, embedding_dim, dim_z, vocab_size, batch_size )
self.n_z = dim_z

def forward(self, x, G_inp, z, G_hidden):
if z is None:
batch_size, n_seq = x.size()
x = self.embedding(x)
mu, logvar, z = self.encoder(x)

kld = -0.5*torch.sum(logvar-mu.pow(2)-logvar.exp()+1).mean()
else:
kld = None
G_inp = self.embedding(G_inp)

logit, G_hidden = self.generator(G_inp,z, G_hidden)
return logit, G_hidden, kld

train_df = pd.read_csv("train.csv", header =None)[0:500]
test_df = pd.read_csv("test.csv",header =None)[0:500]
train = train_df.iloc[:,0]
max_words = 2000
max_len = 25

tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(train)
sequences = tok.texts_to_sequences(train)
sequences_matrix = preprocessing.sequence.pad_sequences(sequences, maxlen = max_len)
#tok.sequences_to_texts(sequences)

n_hidden_E = 10
n_layers_E = 1
embedding_dim = 10
vocab_size = max_words
n_hidden_G = 10
n_layers_G = 2
dim_z = 10 
train_size = len(train)
batch_size = 100
rec_coef = 7
lr = 0.01
epochs = 100
def create_generator_input(x, train):
G_inp = x[:, 0:max_len-1].clone()                       #input for generator should exclude last word of sequence
# if train == False:
#     return G_inp
# r = np.random.rand(G_inp.size(0), G_inp.size(1))
#                                                         #Perform word_dropout according to random values (r) generated for each word
# for i in range(len(G_inp)):
#     for j in range(1,G_inp.size(1)):
#         if r[i, j] < opt.word_dropout and G_inp[i, j] not in [vocab.stoi[opt.pad_token], vocab.stoi[opt.end_token]]:
#             G_inp[i, j] = vocab.stoi[opt.unk_token]
return G_inp
def producebatches(x,batch_size):
k = math.floor(x.shape[0]/batch_size)
total = (k)*batch_size
flatten = x[0:total].flatten()
batches = flatten.reshape((k,batch_size,x.shape[1]))
return batches
batches = producebatches(sequences_matrix, batch_size)      
model = VAE(embedding_dim, vocab_size,  n_layers_E, n_hidden_E, dim_z, n_hidden_G, batch_size,n_layers_G)
optimizer = torch.optim.Adam(model.parameters(), lr = lr)
criterion = nn.BCELoss(reduction = 'sum')
model.train()
def train_batch(x,G_inp,step,train  =True):
reconstruction, hidden, kld = model(x, G_inp, None, None)
reconstruction2 = reconstruction.view(-1, vocab_size).clone()                       #converting into shape (batch_size*(n_seq-1), n_vocab) to facilitate performing F.cross_entropy()
#y = x[:, 1:x.size(1)].clone()                                  #target for generator should exclude first word of sequence
#y = y.contiguous().view(-1)      
G_inp2 = G_inp.contiguous().view(-1)                              #converting into shape (batch_size*(n_seq-1),1) to facilitate performing F.cross_entropy()
rec_loss = F.cross_entropy(reconstruction2,G_inp2)
kld_coef = (math.tanh((step - 15000)/1000) + 1) / 2
#kld_coef = min(1,step/(200000.0))
loss = rec_coef*rec_loss + kld_coef*kld
if train == True:      
torch.autograd.set_detect_anomaly(True)                                 #skip below step if we are performing validation
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
return rec_loss.item(), kld.item()

for epoch in range(epochs):
train_rec_loss = []
train_kl_loss = []
for i in range(batches.shape[0]):   
x = torch.tensor(batches[i], dtype = torch.long)
G_inp = create_generator_input(x, train = True)
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
train_rec_loss.append(rec_loss)
train_kl_loss.append(kl_loss)


train_rec_loss = np.mean(train_rec_loss)
train_kl_loss = np.mean(train_kl_loss)
print("No.", epoch, "T_rec:", '%.2f'%rec_loss, "T_kld:", '%.2f'%kl_loss)

def generate_sentences(n):
model.eval()
sentences = []

for i in range(n):
z = torch.randn([1, dim_z])
hidden = (torch.zeros(n_layers_G,1, n_hidden_G)
,torch.zeros(n_layers_G, 1, n_hidden_G))
G_inp = torch.LongTensor(1,1).fill_(1)
str_ind = []
while len(str_ind)<49:
with torch.autograd.no_grad():
logit, G_hidden, _ = model(None, G_inp, z, hidden)

probs = F.softmax(logit[0],dim=1)
G_inp = torch.multinomial(probs,1)
str_ind.append(G_inp[0][0].item())
sentences.append(str_ind)
return sentences
t = generate_sentences(1)

首先,您可以在每个epoch之后重新初始化隐藏层。这将在没有任何重大变化的情况下克服您面临的错误:


for epoch in range(epochs):
train_rec_loss = []
train_kl_loss = []
for i in range(batches.shape[0]):   
x = torch.tensor(batches[i], dtype = torch.long)
G_inp = create_generator_input(x, train = True)
rec_loss, kl_loss = train_batch(x,G_inp,epoch,train=True)
train_rec_loss.append(rec_loss)
train_kl_loss.append(kl_loss)

model.hidden = (torch.zeros(n_layers_G, batch_size, n_hidden_G)
,torch.zeros(n_layers_G, batch_size, n_hidden_G))

train_rec_loss = np.mean(train_rec_loss)
train_kl_loss = np.mean(train_kl_loss)
print("No.", epoch, "T_rec:", '%.2f'%rec_loss, "T_kld:", '%.2f'%kl_loss)

此外,您可以避免对激活函数的就地操作和丢弃操作(inplace = False)(我认为情况并非如此(。

最新更新