如何在使用python regex执行多个文本替换时使用循环操作


import re
fo=open('INPUT_FILE.txt','r+')
fo1=re.sub(r'OLD_TEXT_01',"NEW_TEXT_01",fo.read(),re.M)
fo2=re.sub(r'OLD_TEXT_02',"NEW_TEXT_02",fo1,re.M)
fo3=re.sub(r'OLD_TEXT_03',"NEW_TEXT_03",fo2,re.M)
fo4=re.sub(r'OLD_TEXT_04',"NEW_TEXT_04",fo3,re.M)
fo5=re.sub(r'OLD_TEXT_05',"NEW_TEXT_05",fo4,re.M)
replacement=fo5
fo.close()
fo=open('OUTPUT_FILE.txt','w')
fo.write(replacement)
fo.close()

""如何在循环操作的帮助下替换上面的代码输入文件的OLD_TEXT后缀为数字,必须是替换为NEW_TEXT以各自的数字作为后缀。这是一个手动方法工作良好,但不是自动方法""

如果搜索和替换文本确实遵循示例中的模式,则只需要一个re.sub()调用:

import re

def main():
    with open('INPUT_FILE.txt', 'r') as in_file:
        content = in_file.read()
    new_content = re.sub(
        r'OLD_TEXT_0([1-5])', r'NEW_TEXT_01', content, flags=re.MULTILINE
    )
    with open('OUTPUT_FILE.txt', 'w') as out_file:
        out_file.write(new_content)

if __name__ == '__main__':
    main()

变量数部分被捕获在替换文本中使用的组中。并将flags参数与您的代码进行比较:它必须是第五个位置参数或作为关键字参数给出。

如果我理解这个问题…这应该有帮助:)

import re
in_file=open('INPUT_FILE.txt','r+')
out_file=open('OUTPUT_FILE.txt','w')
patterns = [(r'OLD_TEXT_01','NEW_TEXT_01'),(r'OLD_TEXT_02','NEW_TEXT_02'), (r'OLD_TEXT_03','NEW_TEXT_03'), (r'OLD_TEXT_04','NEW_TEXT_04'), (r'OLD_TEXT_05','NEW_TEXT_05')]
# OPTION 1: Loop to read input file line by line
for line in in_file.redlines():
  for pattern, replacement in patterns:
    line = re.sub(pattern, replacement, line)
  out_file.write(line)
# OPTION 2: Alternative loop that reads in the whole input file (not line by line)
# NOT ADVISABLE FOR LARGE FILES (takes lots of space in memory)
out_put = in_file.read()
for pattern, replacement in patterns:
  out_put = re.sub(pattern, replacement, out_put, re.M)
out_file.write(out_put)

#Close input and output
out_file.close()
in_file.close()

最新更新