我想提取并处理压缩文件中的所有文件?
import re
import zipfile
import pathlib
import pandas as pd
# Download mHealth dataset
def parse(zip_file):
# Extract all the files in output directory
with zipfile.ZipFile(zip_file, "r") as zfile:
for file in zfile.extractall():
if file.is_file():
old_name = file.stem
extension = file.suffix
directory = file.parent
new_name = re.sub("mHealth_", "", old_name) + extension
file = file.rename(pathlib.Path(directory, new_name))
zfile.close()
return file
追溯错误:
Traceback (most recent call last):
File "C:UsersUserPycharmProjectsalgorithmsproject_kmeans.py", line 47,
in <module>
df_ = parse(zip_file_) File "C:UsersUserPycharmProjectsalgorithmsproject_kmeans.py", line 12,
in parse
for file in zfile.extractall(): TypeError: 'NoneType' object is not iterable
Process finished with exit code 1
您需要infolist()
或namelist()
而不是extractall()
来使用for
循环。
extractall()
从zip
中提取文件,但它不提供文件名,因此不能与for
循环一起使用。
infolist()
或namelist()
提供文件名,但它会产生其他问题,因为它提供对象ZipInfo
或string
,而不是Path
,所以它没有.is_file
、.stem
等。您必须转换为Path
。
import zipfile
import pathlib
import pandas as pd
# Download mHealth dataset
def parse(zip_file):
results = []
# Extract all the files in output directory
with zipfile.ZipFile(zip_file, "r") as zfile:
zfile.extractall() # extract
#for filename in zfile.namelist():
# path = pathlib.Path(filename)
for fileinfo in zfile.infolist():
filename = fileinfo.filename
path = pathlib.Path(filename)
if path.is_file():
old_name = path.stem
extension = path.suffix
directory = path.parent
new_name = old_name.replace("mHealth_", "") + extension
path = path.rename(pathlib.Path(directory, new_name))
print('path:', path)
results.append([filename, new_name])
df = pd.DataFrame(results, columns=['old', 'new'])
return df
df = parse('test.zip')
print(df)
文档:信息列表和提取所有