无法从eml文件下载所有文档



我有一个.eml文件,里面有3个附件。我可以下载其中一个附件,但无法下载所有附件。

import os
import email
import base64
# Get list of all files
files = [f for f in os.listdir('.') if os.path.isfile(f)]
# Create output directory
if os.path.exists("output"):
pass
else:
os.makedirs("output")
for eml_file in files:
if eml_file.endswith(".eml"):
with open(eml_file) as f:
email = f.read()
ext=".docx"
if ext is not "":
# Extract the base64 encoding part of the eml file
encoding = email.split(ext+'"')[-1]
if encoding:
# Remove all whitespaces
encoding = "".join(encoding.strip().split())
encoding = encoding.split("=", 1)[0]
# Convert base64 to string
if len(encoding) % 4 != 0: #check if multiple of 4
while len(encoding) % 4 != 0:
encoding = encoding + "="
try:
decoded = base64.b64decode(encoding)
except:
print(encoding)
for i in range(100):
print('n')
# Save it as docx
path = os.path.splitext(eml_file)[0]
if path:
path = os.path.join("output", path + ext)
try:
os.remove(path)
except OSError:
pass
with open(path, "wb") as f:
f.write(decoded)
else:
print("File not done: " + eml_file)

如何下载所有附件?edit:我已经初始化eml_file,但仍然没有下载所有文件。

导入email模块。那么,为什么要忽略它,并尝试自己编写一个电子邮件解析器呢?此外:

  1. 您可以使用glob列出具有给定扩展名的所有文件
  2. Use应该在条件:(if not os.path.exists("output"): os.makedirs("output")(中使用not运算符,但即使这样也没有必要,因为makedirsexist_ok参数
import os
import glob
import email
from email import policy
indir = '.'
outdir = os.path.join(indir, 'output')
os.makedirs(outdir, exist_ok=True)
files = glob.glob(os.path.join(indir, '*.eml'))
for eml_file in files:
# This will not work in Python 2
msg = email.message_from_file(open(eml_file), policy=policy.default)
for att in msg.iter_attachments():
# Tabs may be added for indentation and not stripped automatically
filename = att.get_filename().replace('t', '')
# Here we suppose for simplicity sake that each attachment has a valid unique filename,
# which, generally speaking, is not true.
with open(os.path.join(outdir, filename), 'wb') as f:
f.write(att.get_content())

最新更新