使用Openpyxl迭代时出现MemoryLoadError



当我遍历一系列.xlsm.xlsx文件时,我正试图将第一张工作表复制到存储工作簿中。然而,在大约30-40个文件之后,我开始专门从一些非常小的.xlsx文件中获得MemoryError。我觉得单独打开excel文件应该不会太费力。

我相信我正在保存并关闭必要的文件。但很明显,我在某个地方让记忆过载了。有什么想法吗?

import logging
import os
import openpyxl as xl
from copy import copy
from zipfile import BadZipFile
# Create and configure logger
LOG_FORMAT = "%(levelname)s - %(message)s"
logging.basicConfig(filename="errors.log",
level=logging.INFO,
format = LOG_FORMAT,
filemode ='w')
logger = logging.getLogger()

def copy_paste_sheets(sheet_giv, sheet_rec, filename_giv='', filename_rec='', start_row=1, start_col=1, end_row=None, end_col=None, offset_row=0, offset_col=0):
if end_row == None:
end_row = sheet_giv.max_row
if end_col == None:
end_col = sheet_giv.max_column

print(f"Copying and Pasting {filename_giv} {sheet_giv} to {filename_rec}{sheet_rec}...")
for i in range(start_row, end_row + 1):
for j in range(start_col, end_col + 1):
giv_cell = sheet_giv.cell(row=i, column=j)            
rec_cell = sheet_rec.cell(row=i+offset_row, column=j+offset_col, value=giv_cell.value)
if giv_cell.has_style:
rec_cell.font = copy(giv_cell.font)
rec_cell.border = copy(giv_cell.border)
rec_cell.fill = copy(giv_cell.fill)
rec_cell.number_format = copy(giv_cell.number_format)
rec_cell.protection = copy(giv_cell.protection)
rec_cell.alignment = copy(giv_cell.alignment)

def main():
directory = r'C:path'
rec_wb = xl.Workbook()
for n, filename in enumerate(os.listdir(directory)):
if filename.endswith(".xlsx") or filename.endswith(".xlsm"):
try:
giv_wb = xl.load_workbook(filename=os.path.join(directory, filename))
giv_ws = giv_wb.worksheets[0]
rec_ws = rec_wb.create_sheet(str(n), -1)
title_cell = rec_ws.cell(row=1, column=1)
title_cell.value = filename
copy_paste_sheets(giv_ws, rec_ws, filename_giv=filename, end_row=50, end_col=15, offset_row=1)

except (BadZipFile, MemoryError) as e:
if type(e) == MemoryError:
e = 'MemoryError'
print(f'No: {n} - {filename} = {e}')
logger.error(f'No: {n} - {filename} = {e}')

giv_wb.close()
elif not filename.endswith(".xlsx") or not filename.endswith(".xlsm"):
logger.info(f'No: {n} - {filename} = Not xlsx or xlsm file')
rec_wb.save("output.xlsx")
rec_wb.close()
rec_wb =  xl.load_workbook(filename="output.xlsx")
if __name__ == "__main__":
main()

我能够通过在循环的末尾添加gc.collect()来解决问题

最新更新