环境
- Python:Python 3.8.5
- pdfminer:20191125
异常
{
"trace":[
{
"filename":"/project/test.py",
"name":"foo",
"lineno":49
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfpage.py",
"name":"get_pages",
"lineno":122
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfdocument.py",
"name":"__init__",
"lineno":575
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfdocument.py",
"name":"_initialize_password",
"lineno":599
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfdocument.py",
"name":"__init__",
"lineno":300
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfdocument.py",
"name":"init",
"lineno":307
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfdocument.py",
"name":"init_key",
"lineno":320
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfdocument.py",
"name":"authenticate",
"lineno":368
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfdocument.py",
"name":"authenticate_user_password",
"lineno":374
},
{
"filename":"/project/.local/lib/python3.8/site-packages/pdfminer/pdfdocument.py",
"name":"compute_encryption_key",
"lineno":351
}
]
}
问题说明
我使用的是pdfminer,这是一个工具,可以与Python3一起使用,用于从PDF文档中提取信息。问题是根本没有好的文档,也没有关于如何使用该工具的源代码示例。
我已经尝试了StackOverflow中的一些代码,但没有成功。下面是我的代码。
代码段
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager
from pdfminer.pdfinterp import PDFPageInterpreter
from pdfminer.converter import TextConverter
def foo(path, password, bankname):
rsrcmgr = PDFResourceManager()
retstr = io.StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, laparams=laparams)
fp = open(path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
maxpages = 2
caching = True
pagenos = set()
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password, caching=caching,
check_extractable=False): # This is Line 49
interpreter.process_page(page)
text = retstr.getvalue()
尝试插入密码="quot;之前获取说明。