[蟒蛇]比较两个zip文件的函数,一个位于FTP目录中,另一个位于我的本地计算机上



我在创建比较两个zip文件的函数时遇到问题(如果它们相同,而不仅仅是名称(。这是我的代码示例:

def validate_zip_files(self):
    host = '192.168.0.1'
    port = 2323
    username = '123'
    password = '123'
    ftp = FTP()
    ftp.connect(host, port)
    ftp.login(username,password)
    ftp.cwd('test')
    print ftp.pwd()
    ftp.retrbinary('RETR test', open('test.zip', 'wb').write)
    file1=open('test.zip', 'wb')
    file2=open('/home/user/file/text.zip', 'wb')
    return filecmp.cmp(file1, file2, shallow=True)

其中一个问题是第二个zip位于不同的位置('/home/user/file/text.zip'(,我正在我的python脚本所在的目录中下载zip文件。我不是100%确定filecmp.cmp适用于.zip文件。

任何想法都会很棒:)谢谢。

与其直接比较文件,不如继续比较文件的哈希值。这消除了 filecmp 的依赖性,正如您所说,它可能不适用于压缩文件。

import hashlib
def compare_files(a,b):
    fileA = hashlib.sha256(open(a, 'rb').read()).digest()
    fileB = hashlib.sha256(open(b, 'rb').read()).digest()
    if fileA == fileB:
        return True
    else:
        return False

请参阅我的要点,该要点按内容比较两个zip文件,并从一个zip生成补丁文件到另一个zip。例如,如果两个zip文件共享一个条目但内容不同,我的要点将能够找到它;如果他们有不同的条目,要点也可以做到。要点忽略了修改时间的差异。也就是说,但是,如果您只关心浅层比较,那么hashlib可能是更好的选择。

供您参考,请从要点中获取代码:

import os
import argparse
import collections
import tempfile
import zipfile
import filecmp
import shutil
import shlex
ZipCmpResult = collections.namedtuple('ZipCmpResult',
                                      ['to_rm', 'to_cmp', 'to_add'])

def make_parser():
    parser = argparse.ArgumentParser(
        description='Make patch zip file from two similar zip files.')
    parser.add_argument(
        '--oldfile',
        default=os.path.join('share', 'old.zip'),
        help='default: %(default)s')
    parser.add_argument(
        '--newfile',
        default=os.path.join('share', 'new.zip'),
        help='default: %(default)s')
    parser.add_argument(
        '--toname',
        default=os.path.join('share', 'patch'),
        help='default: %(default)s')
    return parser

def zipcmp(old, new):
    with zipfile.ZipFile(old) as zinfile:
        old_names = set(zinfile.namelist())
    with zipfile.ZipFile(new) as zinfile:
        new_names = set(zinfile.namelist())
    to_rm = old_names - new_names
    to_cmp = old_names & new_names
    to_add = new_names - old_names
    return ZipCmpResult(to_rm, to_cmp, to_add)

def compare_files(old, new, cmpresult):
    with tempfile.TemporaryDirectory() as tmpdir, 
         zipfile.ZipFile(old) as zinfile_old, 
         zipfile.ZipFile(new) as zinfile_new:
        old_dest = os.path.join(tmpdir, 'old')
        new_dest = os.path.join(tmpdir, 'new')
        os.mkdir(old_dest)
        os.mkdir(new_dest)
        for filename in cmpresult.to_cmp:
            zinfile_old.extract(filename, path=old_dest)
            zinfile_new.extract(filename, path=new_dest)
            if not filecmp.cmp(
                    os.path.join(old_dest, filename),
                    os.path.join(new_dest, filename),
                    shallow=False):
                cmpresult.to_add.add(filename)

def mkpatch(new, cmpresult, to_name):
    with zipfile.ZipFile(new) as zinfile, 
         zipfile.ZipFile(to_name + '.zip', 'w') as zoutfile:
        for filename in cmpresult.to_add:
            with zinfile.open(filename) as infile, 
                 zoutfile.open(filename, 'w') as outfile:
                shutil.copyfileobj(infile, outfile)
    with open(to_name + '.sh', 'w', encoding='utf-8') as outfile:
        outfile.write('#!/bin/shn')
        for filename in cmpresult.to_rm:
            outfile.write('rm {}n'.format(shlex.quote(filename)))

def main():
    args = make_parser().parse_args()
    cmpresult = zipcmp(args.oldfile, args.newfile)
    compare_files(args.oldfile, args.newfile, cmpresult)
    mkpatch(args.newfile, cmpresult, args.toname)

if __name__ == '__main__':
    main()

相关内容

最新更新