我得到了以下导入:
import torch, csv, transformers, random
import torch.nn as nn
from torch.utils.data import Dataset
import torch.optim as optim
import pandas as pd
from transformers import GPT2Tokenizer, GPT2LMHeadModel, tokenize, pad_squences
我得到这个错误:
ImportError Traceback (most recent call last)
<ipython-input-35-e04c63220105> in <module>
4 import torch.optim as optim
5 import pandas as pd
----> 6 from transformers import GPT2Tokenizer, GPT2LMHeadModel, tokenize, pad_squences
ImportError: cannot import name 'tokenize' from 'transformers' (/usr/local/lib/python3.8/dist-packages/transformers/__init__.py)
这是我如何使用tokenize()
和pad_sequence()
函数:
class RephraseDataset(Dataset):
def __init__(self, data, tokenizer):
self.data = data
self.tokenizer = tokenizer
def __len__(self):
return len(self.data)
def __getitem__(self, index):
query, rephrases = self.data[index]
tokenized_query = tokenizer.encode(query, add_special_tokens=True)
# tokenized_query = tokenize(self.tokenizer, query)
padded_query = tokenized_query + [tokenizer.pad_token_id] * (max_length - len(tokenized_query))
# padded_query = pad_sequences(self.tokenizer, tokenized_query, max_length=128)
tokenized_rephrases = [tokenize(self.tokenizer, r) for r in rephrases]
padded_rephrases = [pad_sequences(self.tokenizer, r, max_length=128) for r in tokenized_rephrases]
return padded_query, padded_rephrases
# Create the dataset
dataset = RephraseDataset(data, tokenizer)
# Create a dataloader
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=32,
shuffle=True,
)
如何解决这个问题?我在文件里什么都没找到。我应该把变形金刚回滚到哪个版本?
[EDIT]
这是因为transformers
版本太旧了。请用pip install -U transformers