如何创建一个为pdf编号的脚本



我想创建一个脚本(最好是python),从文件夹中按顺序获得一些pdf文档,并在每个文档的第一页放置一个大数字(文件夹的第一个文档应该编号为1,下一个为2,依此类推)。

有什么库可以帮助我实现这个吗?

我见过一些python库允许你给一个文档的页面编号,但不给不同的文档编号(有点像水印)。

您可以使用Python库PyPDF2,这是一个纯Python PDF工具包,允许您操作和修改PDF文档。以下是如何使用PyPDF2向PDF文档的每一页添加带有页码的水印的示例:

import os
import PyPDF2
# Get the path to the folder containing the PDF documents
pdf_folder_path = '/path/to/pdf/folder'
# Create a list of PDF files in the folder, sorted by filename
pdf_files = sorted([f for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')])
# Loop through each PDF file in the list
for i, pdf_file in enumerate(pdf_files):
# Open the PDF file for reading and writing
with open(os.path.join(pdf_folder_path, pdf_file), 'rb+') as pdf:
# Create a PdfFileReader object for the PDF file
pdf_reader = PyPDF2.PdfFileReader(pdf)
# Create a PdfFileWriter object for the output PDF file
pdf_writer = PyPDF2.PdfFileWriter()
# Loop through each page in the PDF file
for page_num in range(pdf_reader.getNumPages()):
# Get the current page from the PDF file
page = pdf_reader.getPage(page_num)
# Create a watermark object with the page number and position it at the bottom-right corner of the page
watermark = PyPDF2.pdf.PageObject.createBlankPage(None, page.mediaBox.getWidth(), page.mediaBox.getHeight())
watermark.mergeScaledTranslatedPage(page, 1, 0, 0, 1, 0, 0)
watermark.mergeScaledTranslatedPage(PyPDF2.pdf.PageObject.createTextObject(PyPDF2.pdf.PdfContentByte(None), str(i+1)), 1, 0, 0, 1, page.mediaBox.getWidth()-100, 20)
# Merge the watermark with the current page and add it to the output PDF file
page.mergePage(watermark)
pdf_writer.addPage(page)
# Save the output PDF file with the watermark
pdf_writer.write(pdf)

下面是使用PyMuPDF的解决方案。

import os
import fitz  # import PyMuPDF
# Get the path to the folder containing the PDF documents
pdf_folder_path = '/path/to/pdf/folder'
# Create a list of PDF files in the folder, sorted by filename
pdf_files = sorted([f for f in os.listdir(pdf_folder_path) if f.endswith('.pdf')])
for i, f in enumerate(pdf_files, start=1):
filename = os.path.join(pdf_folder_path, f)
doc = fitz.open(filename)
page = doc[0]  # load page 1
page.wrap_contents()  # guard against coordinate sloppiness
prect = page.rect  # page rectangle
# text insertion point
point = fitz.Point(prect.width - 50, prect.height - 36)
# insert text, fontsize=25
page.insert_text(point, str(i), opacity=0.3, fontsize=25)
doc.saveIncr()  # incremental save

最新更新