Regex用html替换文本



我正在做一个简单的编程项目来解析一个用文本编辑器编写的Java程序。来自文本编辑器的程序由使用tinkter库的python程序处理。

下面是一个java程序示例

public class TestMax {
public static void main(String args[]){
System.out.println("Hello World");
}
}

下面是实际输出

public class TestMax {
public static <span class="keyword">void</span> main(String args[]){
System.out.println("Hello World");
}
}

这是预期的输出

<span class="keyword">public</span> <span class="keyword">class</span> TestMax {
<span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> main(String args[]){
System.out.println("Hello World");
}
}

python程序

import tkinter as tk
from tkinter.filedialog import askopenfilename, asksaveasfilename
import re
keywords = ["public","class","static","void","int","if","else","return"]
def open_file():
"""Open a file for editing."""
filepath = askopenfilename(
filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
)
if not filepath:
return
txt_edit.delete(1.0, tk.END)
with open(filepath, "r") as input_file:
text = input_file.read()
txt_edit.insert(tk.END, text)
window.title(f"Simple Text Editor - {filepath}")
def save_file():
"""Save the current file as a new file."""
filepath = asksaveasfilename(
defaultextension="txt",
filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")],
)
if not filepath:
return
with open(filepath, "w") as output_file:
text = txt_edit.get(1.0, tk.END)
output_file.write(text)
window.title(f"Simple Text Editor - {filepath}")
def print_html():
"""Print html"""
text = txt_edit.get(1.0, tk.END)
for line in text.split("n"):
for word in line.split(" "):
if word in keywords:
text_after = re.sub(word, '<span class="keyword">'+word+'</span>', text)
print(text_after)
window = tk.Tk()
window.title("Simple Text Editor")
window.rowconfigure(0, minsize=800, weight=1)
window.columnconfigure(1, minsize=800, weight=1)
txt_edit = tk.Text(window)
fr_buttons = tk.Frame(window, relief=tk.RAISED, bd=2)
btn_open = tk.Button(fr_buttons, text="Open", command=open_file)
btn_save = tk.Button(fr_buttons, text="Save As...", command=save_file)
btn_print_html = tk.Button(fr_buttons, text="Print as html", command=print_html)
btn_open.grid(row=0, column=0, sticky="ew", padx=5, pady=5)
btn_save.grid(row=1, column=0, sticky="ew", padx=5)
btn_print_html.grid(row=2, column=0, sticky="ew", padx=5)
fr_buttons.grid(row=0, column=0, sticky="ns")
txt_edit.grid(row=0, column=1, sticky="nsew")
window.mainloop()

问题在这个方法中:

def print_html():
"""Print html"""
text = txt_edit.get(1.0, tk.END)
for line in text.split("n"):
for word in line.split(" "):
if word in keywords:
text_after = re.sub(word, '<span class="keyword">'+word+'</span>', text)
print(text_after)

问题是关键字只被替换一次。理想情况下,我会用<span class="keyword">word</span>替换所有的关键字。对此的任何帮助都将不胜感激。谢谢!

您没有保存更改并不断重写text_after

re不会在这里做,因为你会得到你绝对不想要的循环(你添加了<span class...class是你的关键字之一),所以你最好写一个全新的文本

def print_html():
"""Print html"""
text = txt_edit.get(1.0, tk.END)

new_text=[]
for line in text.split("n"):
line_text = []
for word in line.split(" "):
if word in keywords:
line_text.append('<span class="keyword">'+word+'</span>')
else:
line_text.append(word)
new_text.append(' '.join(line_text))
new_text = 'n'.join(new_text)

print(new_text)

最新更新