我想做一个工作,在一个文本文件中,然后计算每个单词中的音节数,然后最终返回前10个单词与最多的音节。我相信我有它的大部分,但我得到一个错误:
File "top_10_syllable_count.py", line 84, in get_syllable_count_pair return (syllables(word), word, ) TypeError: 'module' object is not callable.
import re
from sys import stderr
from mrjob.job import MRJob
from mrjob.step import MRStep
WORD_RE = re.compile(r"[w']+")
import syllables
class MRMostUsedWordSyllables(MRJob):
def steps(self):
return [
MRStep(mapper=self.word_splitter_mapper,
reducer=self.sorting_word_syllables),
MRStep(mapper=self.get_syllable_count_pair),
MRStep(reducer=self.get_top_10_reducer)
]
def word_splitter_mapper(self, _, line):
#for word in line.split():
for word in WORD_RE.findall(line):
yield(word.lower(), None)
def sorting_word_syllables(self, word, count):
count = 0
vowels = 'aeiouy'
word = word.lower().strip()
if word in vowels:
count +=1
for index in range(1,len(word)):
if word[index] in vowels and word[index-1] not in vowels:
count +=1
if word.endswith('e'):
count -= 1
if word.endswith('le'):
count+=1
if count == 0:
count +=1
yield None, (int(count), word)
def get_syllable_count_pair(self, _, word):
return (syllables(word), word, )
def get_top_10_reducer(self, count, word):
assert count == None # added for a guard
with_counts = [get_syllable_count_pair(w) for w in word]
# Sort the words by the syllable count
sorted_counts = sorted(syllables_counts, reverse=True, key=lambda x: x[0])
# Slice off the first ten
for t in sorted_counts[:10]:
yield t
if __name__ == '__main__':
import time
start = time.time()
MRMostUsedWordSyllables.run()
end = time.time()
print(end - start)
我相信我的问题与在get_音节le_count_pair函数中调用音节有关,但不确定如何纠正它。
根据文档,syllables
包有一个功能。你可以这样称呼它
syllables.estimate(word)
你的代码应该是这样的:
return (syllables.estimate(word), word, )