我当前遇到此错误。我不知道这个错误是由什么引起的,因为我已经在代码中声明了位置参数path2
和path3
,但错误表明缺少这两个参数。
错误消息:TypeError: __init__() missing 2 required positional arguments: 'path2' and 'path3'
这是我的代码:
import os
from tqdm import tqdm
from utils import SOS, EOS, UNK, process
class Corpus(object):
def __init__(self, path, path2, path3, order, lower=False, max_lines=-1):
self.order = order
self.lower = lower
self.max_lines = max_lines
self.vocab = set()
self.train = self.tokenize(os.path.join(path), training_set=True)
self.valid = self.tokenize(os.path.join(path2))
self.test = self.tokenize(os.path.join(path3))
def tokenize(self, path, training_set=False):
"""Tokenizes a text file."""
#assert os.path.exists(path)
with open(path, path2, path3) as fin:
num_lines = sum(1 for _ in fin.readlines())
with open(path, path2, path3, 'r', encoding="utf8") as f:
words = []
for i, line in enumerate(tqdm(f, total=num_lines)):
if self.max_lines > 0 and i > self.max_lines:
break
line = line.strip()
if not line:
continue # Skip empty lines.
elif line.startswith('='):
continue # Skip headers.
else:
sentence = (self.order - 1) * [SOS] +
[process(word, self.lower) for word in line.split()] + [EOS]
if training_set:
words.extend(sentence)
self.vocab.update(sentence)
else:
sentence = [word if word in self.vocab else UNK for word in sentence]
words.extend(sentence)
return words
if __name__ == '__main__':
path = 'C://Users//supre//Documents//Python Programme//kenlm//wikitext-2//wiki.train.tokens'
path2 = 'C://Users//supre//Documents//Python Programme//kenlm//wikitext-2//wiki.valid.tokens'
path3 = 'C://Users//supre//Documents//Python Programme//kenlm//wikitext-2//wiki.test.tokens'
corpus = Corpus(path, order=3)
print(len(corpus.test))
print(corpus.test[:100])
感谢您提前提供的每一个帮助和建议:(
在调用类Corpus
的对象时,需要传递这些参数corpus = Corpus(path, path2, path3, order=3)
构造函数中参数的名称与您传递给它的变量的名称无关,因此您需要将它们全部传递,没有采用相同名称变量的机制
class Corpus(object):
def __init__(self, path, path2, path3, order, lower=False, max_lines=-1):
self.order = order
self.lower = lower
self.max_lines = max_lines
self.vocab = set()
self.train = self.tokenize(path, training_set=True)
self.valid = self.tokenize(path2)
self.test = self.tokenize(path3)
if __name__ == '__main__':
pa = 'C://Users//...//kenlm//wikitext-2//wiki.train.tokens'
pa2 = 'C://Users//...//kenlm//wikitext-2//wiki.valid.tokens'
pa3 = 'C://Users//...//kenlm//wikitext-2//wiki.test.tokens'
corpus = Corpus(pa, pa2, pa3, order=3)
此外,具有1个变量的os.path.join
没有特殊的