如何用python程序从这个文本文件中读取正确的行,然后通过填充从.txt文件中提取的数据来创建一个.py文件



要读取的文本文件(实际文件包含更多数字(,称为number_info.txt

veinti tres
23
veinti dos
22
veinti uno
21
veinte
20
tres
3
dos
2
uno
1

这是代码(我需要帮助(

import re
def auto_coding_text_to_number():
with open('number_info.txt', 'r') as f:
#lines 0, 3, 6, 9, 12, 15, 18, ...
coloquial_numbers = []
#lines 0+1, 3+1, 6+1, 9+1, 12+1, 15+1, 18+1, ... 
symbolic_numbers = []

n = 0
with open('number_to_text.py', 'w') as f:
f.write('import renndef number_to_text_func(input_text):n')

#write replacement lines based on regex
if(" " in coloquial_numbers[n]):
#for example write this line:   "    input_text = re.sub(r"veinti[s|-|]*tres", "23", input_text)"

if not (" " in coloquial_numbers[n]):
#for example write this line:   "    input_text = re.sub("tres", "3", input_text)"

f.write("    return(input_text)n    input_text = str(input())n 
print(number_to_text_func(input_text))")
n = n + 1
auto_coding_text_to_number()

这是正确的文件,称为number_to_text.py,应该由另一个脚本编写

import re
def number_to_text_func(input_text):
input_text = re.sub(r"veinti[s|-|]*tres", "23", input_text)
input_text = re.sub(r"veinti[s|-|]*dos", "22", input_text)
input_text = re.sub(r"veinti[s|-|]*uno", "21", input_text)
input_text = re.sub("tres", "3", input_text)
input_text = re.sub("dos", "2", input_text)
input_text = re.sub("uno", "1", input_text)
return(input_text)
input_text = str(input())
print(number_to_text_func(input_text))

编辑:

.txt文件中的行的结构类似于

"veinti tres"  <---- line 0
"23"           <---- line 1
"veinti dos"   <---- line 2
"22"           <---- line 3
"veinti uno"   <---- line 4
"21"           <---- line 5
"veinte"       <---- line 6
"20"           <---- line 7
"tres"         <---- line 8
"3"            <---- line 9

然后我建议将它们分成两组,并将它们存储在两个列表中

#lines 0, 3, 6, 9, 12, 15, 18, ...
coloquial_numbers = ["veinti tres", "veinti dos", "veinti uno", "veinte", "tres"]
#lines 0+1, 3+1, 6+1, 9+1, 12+1, 15+1, 18+1, ...
symbolic_numbers = ["23", "22", "21", "20". "3"]

body_template = """    input_text = re.sub(r"{}", "{}", input_text)n"""

然后函数体的结构应该像一样

input_text = re.sub(coloquial_numbers[n].replace(' ', '[s|-|]'), symbolic_numbers[n], input_text)

在输出文件的函数体中获得类似的内容

def number_to_text(input_text):
input_text = re.sub(r"veinti[s|-|]*tres", "23", input_text)
input_text = re.sub(r"veinti[s|-|]*dos", "22", input_text)
input_text = re.sub(r"veinti[s|-|]*uno", "21", input_text)
input_text = re.sub("tres", "3", input_text)
return(input_text)

为了简单起见,我省略了读/写步骤。没有给出指定元函数主体的规则,所以我做了一个猜测。

import re 
# body-component of the meta-code
body_template = """    input_text = re.sub(r"{}", "{}", input_text)n"""
# read from file
with open('number_info.txt', 'r') as fd:
text = fd.read()
# update body
body = ''
for n_text, n in re.findall(r'n*([a-zs]+)n(d+)', text):
body += body_template.format(n_text.replace(' ', '[s|-|]'), n)
# other components of the meta-code
header = """import re
def number_to_text_func(input_text):
"""
tail = """n    return(input_text)
input_text = str(input())
print(number_to_text_func(input_text))"""
# merge together texts to be saved to file
meta_code = header + body + tail
print(meta_code)

输出(number_to_text.py的内容(

import re
def number_to_text_func(input_text):
input_text = re.sub(r"treinta[s|-|]y[s|-|]uno", "31", input_text) # <-
input_text = re.sub(r"veinti[s|-|]tres", "23", input_text)
input_text = re.sub(r"veinti[s|-|]dos", "22", input_text)
input_text = re.sub(r"veinti[s|-|]uno", "21", input_text)
input_text = re.sub(r"veinte", "20", input_text)
input_text = re.sub(r"tres", "3", input_text)
input_text = re.sub(r"dos", "2", input_text)
input_text = re.sub(r"uno", "1", input_text)
return(input_text)
input_text = str(input())
print(number_to_text_func(input_text))

来自评论:

每行读取文件,无regex

with open('number_info.txt', 'r') as fd:
lines = fd.read().split('n')
symbolic_numbers, coloquial_numbers = [], []
for i, line in enumerate(lines):
if i % 3 == 0:
coloquial_numbers.append(line)
elif i % 3 == 1:
symbolic_numbers.append(line)

或读取带有切片的文件

with open('number_info.txt', 'r') as fd:
lines = fd.read().split('n')
coloquial_numbers = lines[::3]
symbolic_numbers = lines[1::3]

最新更新