如何将cuda应用于pytorch中的自定义模型



输入的类型是张量字典。所以在训练时,我将设备转换为cuda以使用gpu。我的自定义模型如上图所示。我还为模型分配了cuda。

class EmbeddingLayer(nn.Module):
def __init__(self):
super(EmbeddingLayer, self).__init__()
# other features
self.other_features_embedding = []
for feature_name in OTHER_FEATURES:
vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]
embedding_dims = int(math.sqrt(len(vocabulary)))
embedding = nn.Embedding(len(vocabulary)+1, embedding_dims)
self.other_features_embedding.append(embedding)
# transformer features
item_vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY['item']
self.item_embedding_dims = int(math.sqrt(len(item_vocabulary)))
self.item_embedding = nn.Embedding(len(item_vocabulary)+1, self.item_embedding_dims)
def forward(self, inputs):
# other features
encoded_other_features = []
for i, feature_name in enumerate(OTHER_FEATURES):
embedding = self.other_features_embedding[i](inputs[feature_name])
encoded_other_features.append(embedding)
encoded_other_features = torch.cat(encoded_other_features, -1)
# transformer features
encoded_sequence_item = self.item_embedding(inputs['sequence_item'])
encoded_target_item = self.item_embedding(inputs['target_item'])
positions = inputs['target_timestamp'].repeat(sequence_length-1, 1).transpose(0, 1) - inputs['sequence_timestamp']
encoded_positions = positions.repeat(1, self.item_embedding_dims).reshape(-1, self.item_embedding_dims, sequence_length-1).transpose(1,2)
encoded_sequence_item_with_position = encoded_sequence_item + encoded_positions
encoded_transformer_features = torch.cat((encoded_sequence_item_with_position, encoded_target_item.reshape(-1, 1, self.item_embedding_dims)), 1)
return encoded_other_features, encoded_transformer_features

class BST(nn.Module):
def __init__(self, hidden_units, dropout, num_heads):
super(BST, self).__init__()

...
self.embedding_layer = EmbeddingLayer()
...

def forward(self, inputs):
other_features, transformer_features = self.embedding_layer(inputs)

...
return self.output(features)
model = BST([256, 128], 0.3, 1)
model.to(device)
def train(model, optimizer, dataloader):
model.train()
for inputs in tqdm(dataloader, total=len(dataloader)):

for k, v in inputs.items():
inputs[k] = v.to(device)
model.zero_grad()
pred = model(inputs)
...

但出现以下错误:

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper__index_select)

我认为错误发生在嵌入EmbeddingLayer时。如何修复此错误以在训练时使用gpu?

nn.Module的列表没有将嵌入层注册为层的子模块。为了正确注册模块列表,您应该使用nn.ModuleList。因此,您应该在__init__函数的循环之后添加以下内容:

embeddings = []
for feature_name in OTHER_FEATURES:
vocabulary = CATEGORICAL_FEATURES_WITH_VOCABULARY[feature_name]
embedding_dims = int(math.sqrt(len(vocabulary)))
embedding = nn.Embedding(len(vocabulary)+1, embedding_dims)
embeddings.append(embedding)
self.other_features_embedding = nn.ModuleList(embeddings)

最新更新