我在创建比较两个zip文件的函数时遇到问题(如果它们相同,而不仅仅是名称(。这是我的代码示例:
def validate_zip_files(self):
host = '192.168.0.1'
port = 2323
username = '123'
password = '123'
ftp = FTP()
ftp.connect(host, port)
ftp.login(username,password)
ftp.cwd('test')
print ftp.pwd()
ftp.retrbinary('RETR test', open('test.zip', 'wb').write)
file1=open('test.zip', 'wb')
file2=open('/home/user/file/text.zip', 'wb')
return filecmp.cmp(file1, file2, shallow=True)
其中一个问题是第二个zip位于不同的位置('/home/user/file/text.zip'(,我正在我的python脚本所在的目录中下载zip文件。我不是100%确定filecmp.cmp适用于.zip文件。
任何想法都会很棒:)谢谢。
与其直接比较文件,不如继续比较文件的哈希值。这消除了 filecmp
的依赖性,正如您所说,它可能不适用于压缩文件。
import hashlib
def compare_files(a,b):
fileA = hashlib.sha256(open(a, 'rb').read()).digest()
fileB = hashlib.sha256(open(b, 'rb').read()).digest()
if fileA == fileB:
return True
else:
return False
请参阅我的要点,该要点按内容比较两个zip文件,并从一个zip生成补丁文件到另一个zip。例如,如果两个zip文件共享一个条目但内容不同,我的要点将能够找到它;如果他们有不同的条目,要点也可以做到。要点忽略了修改时间的差异。也就是说,但是,如果您只关心浅层比较,那么hashlib
可能是更好的选择。
供您参考,请从要点中获取代码:
import os
import argparse
import collections
import tempfile
import zipfile
import filecmp
import shutil
import shlex
ZipCmpResult = collections.namedtuple('ZipCmpResult',
['to_rm', 'to_cmp', 'to_add'])
def make_parser():
parser = argparse.ArgumentParser(
description='Make patch zip file from two similar zip files.')
parser.add_argument(
'--oldfile',
default=os.path.join('share', 'old.zip'),
help='default: %(default)s')
parser.add_argument(
'--newfile',
default=os.path.join('share', 'new.zip'),
help='default: %(default)s')
parser.add_argument(
'--toname',
default=os.path.join('share', 'patch'),
help='default: %(default)s')
return parser
def zipcmp(old, new):
with zipfile.ZipFile(old) as zinfile:
old_names = set(zinfile.namelist())
with zipfile.ZipFile(new) as zinfile:
new_names = set(zinfile.namelist())
to_rm = old_names - new_names
to_cmp = old_names & new_names
to_add = new_names - old_names
return ZipCmpResult(to_rm, to_cmp, to_add)
def compare_files(old, new, cmpresult):
with tempfile.TemporaryDirectory() as tmpdir,
zipfile.ZipFile(old) as zinfile_old,
zipfile.ZipFile(new) as zinfile_new:
old_dest = os.path.join(tmpdir, 'old')
new_dest = os.path.join(tmpdir, 'new')
os.mkdir(old_dest)
os.mkdir(new_dest)
for filename in cmpresult.to_cmp:
zinfile_old.extract(filename, path=old_dest)
zinfile_new.extract(filename, path=new_dest)
if not filecmp.cmp(
os.path.join(old_dest, filename),
os.path.join(new_dest, filename),
shallow=False):
cmpresult.to_add.add(filename)
def mkpatch(new, cmpresult, to_name):
with zipfile.ZipFile(new) as zinfile,
zipfile.ZipFile(to_name + '.zip', 'w') as zoutfile:
for filename in cmpresult.to_add:
with zinfile.open(filename) as infile,
zoutfile.open(filename, 'w') as outfile:
shutil.copyfileobj(infile, outfile)
with open(to_name + '.sh', 'w', encoding='utf-8') as outfile:
outfile.write('#!/bin/shn')
for filename in cmpresult.to_rm:
outfile.write('rm {}n'.format(shlex.quote(filename)))
def main():
args = make_parser().parse_args()
cmpresult = zipcmp(args.oldfile, args.newfile)
compare_files(args.oldfile, args.newfile, cmpresult)
mkpatch(args.newfile, cmpresult, args.toname)
if __name__ == '__main__':
main()