通过python docx转录.docx文件以修改字体和字体大小.需要重建目标文件中的段落



目的是转录.docx文件以修改字体和字体大小,同时保留粗体、下划线、斜体等运行属性。然后,我将在新创建的target.docx文件中添加一些标题和图形

如何从跑步中重建段落?目前,每一条都有自己的独立线路!

from docx import Document
from docx.shared import Pt
def main(filename):
try:
src_doc = Document(filename)
trg_doc = Document()
style = trg_doc.styles['Normal']
font = style.font
font.name = 'Times'
font.size = Pt(11)
for p_cnt in range(len(src_doc.paragraphs)):
for r_cnt in range(len(src_doc.paragraphs[p_cnt].runs)):
curr_run = src_doc.paragraphs[p_cnt].runs[r_cnt]
print('Run: ', curr_run.text)
paragraph = trg_doc.add_paragraph()
if curr_run.bold:
paragraph.add_run(curr_run.text).bold = True
elif curr_run.italic:
paragraph.add_run(curr_run.text).italic = True
elif curr_run.underline:
paragraph.add_run(curr_run.text).underline = True
else:
paragraph.add_run(curr_run.text)
trg_doc.save('../Output/the_target.docx')
except IOError:
print('There was an error opening the file')
if __name__ == '__main__':
main("../Input/Current_File.docx

输入:

1.0 PURPOSE The purpose of this procedure is to ensure all feedback is logged, documented and any resulting complaints are received, evaluated, and reviewed in accordance with 21 CFR Part 820 and ISO 13485

输出:

PURPOSE The purpose of this procedure is to ensure
all feedback is logged,
documented and any resulting complaints are received,
evaluated, and reviewed
in accordance with 21 CFR P art 820
and ISO 13485 .

您将为每次运行添加一个新段落。你的核心循环需要看起来更像这样:

for src_paragraph in src_doc.paragraphs:
tgt_paragraph = tgt_doc.add_paragraph()
for src_run in src_paragraph.runs:
print('Run: ', src_run.text)
tgt_run = tgt_paragraph.add_run(src_run.text)
if src_run.bold:
tgt_run.bold = True
if src_run.italic:
tgt_run.italic = True
if src_run.underline:
tgt_run.underline = True

更换

for p_cnt in range(len(src_doc.paragraphs)):
for r_cnt in range(len(src_doc.paragraphs[p_cnt].runs)):
curr_run = src_doc.paragraphs[p_cnt].runs[r_cnt]

在进行管路构造的地方,我使用类似于Scanny建议的构造。在这里,每次跑步都不会变成一段。

src_doc = docx.Document(path)
trgt_doc = docx.api.Document()
# Generate new Target file from Source File
for src_paragraph in src_doc.paragraphs:
src_paragraph_format = src_paragraph.paragraph_format
# Get Target section(s) for Headers/Footers
sections = trgt_doc.sections
section = sections[0]
sectPr = section._sectPr
footer = section.footer
paragraph = footer.paragraphs[0]
trgt_paragraph = trgt_doc.add_paragraph()
trgt_paragraph_format = trgt_paragraph.paragraph_format
trgt_paragraph.style.name = src_paragraph.style.name
trgt_paragraph_format.left_indent = src_paragraph_format.left_indent 
trgt_paragraph_format.right_indent = src_paragraph_format.right_indent 
trgt_paragraph_format.space_before = Pt(2)
trgt_paragraph_format.space_after = Pt(2)
font = trgt_paragraph.style.font
font.name = 'Times'
font.size = Pt(11)
# Transcribe source file runs
for src_run in src_paragraph.runs:
trgt_run = trgt_paragraph.add_run(src_run.text)
trgt_paragraph_format = trgt_paragraph.paragraph_format
if src_run.font.highlight_color == WD_COLOR_INDEX.BRIGHT_GREEN:
trgt_run.font.highlight_color = WD_COLOR_INDEX.BRIGHT_GREEN
if src_run.bold:
trgt_run.bold = True
if src_run.italic:
trgt_run.italic = True
if src_run.underline:
trgt_run.underline = True*

最新更新