从在线资源中阅读特定单词 - Python - Reading specific words from an online source

这是文本文件abc 的内容.txt

This is before the start and should be ignored.
So should this
and this

*** START OF SYNTHETIC TEST CASE ***
a ba bac
*** END OF SYNTHETIC TEST CASE ***
This is after the end and should be ignored too.
Have a nice day.

我需要编写一个函数get_words_from_file(文件名(，它返回一个小写单词列表，如下面的示例案例所示。该函数应仅处理开始和结束标记行之间的行，并使用下面提供的单词定义。

我得到了以下描述所需内容的正则表达式。我不需要了解正则表达式的工作原理，我只需要了解下面给出的对 findall 的调用将返回给定行字符串中相关单词的列表。

words_on_line = re.findall("[a-z]+[-'][a-z]+|[a-z]+[']?|[a-z]+", line)
.Include all lower-case character sequences including those that contain a 
- or ' character and those that end with a ' character. 
.Words that end with a - MUST NOT be included. 
.The words should be in the same order as they occur in the file.
.There must be no more than 9 CONSTANTS declared.
.Functions must be no longer than 20 statements.
.Functions must not have more than 3 parameters.

测试代码：

filename = "abc.txt"
words2 = get_words_from_file(filename)
print(filename, "loaded ok.")
print("{} valid words found.".format(len(words2)))
print("Valid word list:")
print("n".join(words2))

预期输出：

abc.txt loaded ok.
3 valid words found.
Valid word list:
a
ba
bac

我的代码如下：

def stripped_lines(lines):
for line in lines:
stripped_line = line.rstrip('n')
yield stripped_line
def lines_from_file(fname):
with open(fname, 'rt', encoding='utf8') as flines:
for line in stripped_lines(flines):
yield line
def is_marker_line(line, start='***', end='***'):
'''
Marker lines start and end with the given strings, which may not
overlap.  (A line containing just '***' is not a valid marker line.)
'''
min_len = len(start) + len(end)
if len(line) < min_len:
return False
return line.startswith(start) and line.endswith(end)
def advance_past_next_marker(lines):
'''
Advances the given iterator through the first encountered marker
line, if any.
'''
for line in lines:
if is_marker_line(line):
break
def lines_before_next_marker(lines):
'''
Yields all lines up to but not including the next marker line.  If
no marker line is found, yields no lines.
'''
valid_lines = []
for line in lines:
if is_marker_line(line):
break
valid_lines.append(line)
else:
# `for` loop did not break, meaning there was no marker line.
valid_lines = []
for content_line in valid_lines:
yield content_line
def lines_between_markers(lines):
'''
Yields the lines between the first two marker lines.
'''
# Must use the iterator --- if it's merely an iterable (like a list
# of strings), the call to lines_before_next_marker will restart
# from the beginning.
it = iter(lines)
advance_past_next_marker(it)
for line in lines_before_next_marker(it):
yield line
def words(lines):
text = 'n'.join(lines).lower().split()
# Same as before...
def get_words_from_file(fname):
for word in words(lines_between_markers(lines_from_file(fname))):
return word
filename = "abc.txt"
words2 = get_words_from_file(filename)
print(filename, "loaded ok.")
print("{} valid words found.".format(len(words2)))
print("Valid word list:")
print("n".join(words2))

我的蹩脚输出

Traceback (most recent call last):
File "C:/Users/Jill/SQ4.1(2).py", line 67, in <module>
words2 = get_words_from_file(filename)
File "C:/Users/Jason/SQ4.1(2).py", line 63, in <module>
for word in words(lines_between_markers(lines_from_file(fname))):
builtins.TypeError: 'NoneType' object is not iterable

你能帮我纠正我的代码吗？我完全不知所措。

我已经稍微更改了原始代码，请尝试以下操作。

def stripped_lines(lines):
for line in lines:
stripped_line = line.rstrip('n')
yield stripped_line

def lines_from_file(fname):
with open(fname, 'rt') as flines:
for line in stripped_lines(flines):
yield line

def is_marker_line(line, start='***', end='***'):
'''
Marker lines start and end with the given strings, which may not
overlap.  (A line containing just '***' is not a valid marker line.)
'''
min_len = len(start) + len(end)
if len(line) < min_len:
return False
return line.startswith(start) and line.endswith(end)

def advance_past_next_marker(lines):
'''
Advances the given iterator through the first encountered marker
line, if any.
'''
for line in lines:
if is_marker_line(line):
break

def lines_before_next_marker(lines):
'''
Yields all lines up to but not including the next marker line.  If
no marker line is found, yields no lines.
'''
valid_lines = []
for line in lines:
if is_marker_line(line):
break
valid_lines.append(line)
else:
# `for` loop did not break, meaning there was no marker line.
valid_lines = []
for content_line in valid_lines:
yield content_line

def lines_between_markers(lines):
'''
Yields the lines between the first two marker lines.
'''
# Must use the iterator --- if it's merely an iterable (like a list
# of strings), the call to lines_before_next_marker will restart
# from the beginning.
it = iter(lines)
advance_past_next_marker(it)
for line in lines_before_next_marker(it):
yield line

def words(lines):
text = 'n'.join(lines).lower().split()
return text
def get_words_from_file(fname):
return words(lines_between_markers(lines_from_file(fname)))
filename = "abc.txt"
all_words = get_words_from_file(filename)
print(filename, "loaded ok.")
print("{} valid words found.".format(len(all_words)))
print("Valid word list:")
print("n".join(all_words))

输出将在下面，

('abc.txt', 'loaded ok.')
3 valid words found.
Valid word list:
a
ba
bac

从在线资源中阅读特定单词 - Python

相关内容

最新更新

热门标签：