在代码中有一个名为clean_up的辅助函数,下面是我的代码。我想知道我需要修复、添加或删除什么才能使其工作。
def clean_up(s):
""" (str) -> str
Return a new string based on s in which all letters have been
converted to lowercase and punctuation characters have been stripped
from both ends. Inner punctuation is left untouched.
>>> clean_up('Happy Birthday!!!')
'happy birthday'
>>> clean_up("-> It's on your left-hand side.")
" it's on your left-hand side"
"""
punctuation = """!"',;:.-?)([]<>*#ntr"""
result = s.lower().strip(punctuation)
return result
########## Complete the following functions. ############
def type_token_ratio(text):
""" (list of str) -> float
Precondition: text is non-empty. Each str in text ends with n and
text contains at least one word.
Return the Type Token Ratio (TTR) for this text. TTR is the number of
different words divided by the total number of words.
>>> text = ['James Fennimore Coopern', 'Peter, Paul, and Maryn',
'James Goslingn']
>>> type_token_ratio(text)
0.8888888888888888
"""
# To do: Fill in this function's body to meet its specification.
distinctwords = dict()
words = 0
for line in text.splitlines():
line = line.strip().split()
for word in line:
words+=1
if word in distinctwords:
distinctwords[word]+=1
else:
distinctwords[word]=1
TTR= len(distinctwords)/words
return TTR
您的代码甚至无法运行,for line in text.splitlines()
试图拆分列表,您需要使用collections.defaultdict迭代传入的名为text
的单词列表
def type_token_ratio(text):
from collections import defaultdict
distinctwords = defaultdict(int)
for words in text: # get each string
words = clean_up(words) # clean the string
for word in words.split(): # split into individual words
distinctwords[word] += 1 # increase the count for each word
TTR = len(distinctwords) / sum(distinctwords.values()) # sum(distinctwords.values()) will give total amount of words
return TTR