Swift计算大型文件的MD5校验和



我正在为大型视频文件创建MD5校验和。我目前使用的代码:

extension NSData {
func MD5() -> NSString {
let digestLength = Int(CC_MD5_DIGEST_LENGTH)
let md5Buffer = UnsafeMutablePointer<CUnsignedChar>.allocate(capacity: digestLength)
CC_MD5(bytes, CC_LONG(length), md5Buffer)
let output = NSMutableString(capacity: Int(CC_MD5_DIGEST_LENGTH * 2))
for i in 0..<digestLength {
output.appendFormat("%02x", md5Buffer[i])
}
return NSString(format: output)
}
}

但这会创建一个内存缓冲区,对于大型视频文件来说并不理想。Swift中有没有一种方法可以计算读取文件流的MD5校验和,从而将内存占用降至最低?

您可以按块计算MD5校验和,如图所示例如,在是否存在一个MD5库中;不需要同时输入全部内容吗?。

以下是使用Swift(现已针对Swift 5更新)的可能实现

import CommonCrypto
func md5File(url: URL) -> Data? {
let bufferSize = 1024 * 1024
do {
// Open file for reading:
let file = try FileHandle(forReadingFrom: url)
defer {
file.closeFile()
}
// Create and initialize MD5 context:
var context = CC_MD5_CTX()
CC_MD5_Init(&context)
// Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
while autoreleasepool(invoking: {
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
data.withUnsafeBytes {
_ = CC_MD5_Update(&context, $0.baseAddress, numericCast(data.count))
}
return true // Continue
} else {
return false // End of file
}
}) { }
// Compute the MD5 digest:
var digest: [UInt8] = Array(repeating: 0, count: Int(CC_MD5_DIGEST_LENGTH))
_ = CC_MD5_Final(&digest, &context)
return Data(digest)
} catch {
print("Cannot open file:", error.localizedDescription)
return nil
}
}

需要自动释放池来释放由file.readData(),如果没有它,整个(潜在的巨大)文件将被加载到存储器中。感谢Abhi Beckert注意到以及提供一种实现方式。

如果您需要将摘要作为十六进制编码的字符串,请更改将类型返回到String?并替换

return digest

通过

let hexDigest = digest.map { String(format: "%02hhx", $0) }.joined()
return hexDigest

自iOS13 以来

'CC_MD5_Init'在iOS 13.0 中已被弃用

您可以将代码替换为CryptoKit

import Foundation
import CryptoKit
extension URL {
func checksumInBase64() -> String? {
let bufferSize = 16*1024
do {
// Open file for reading:
let file = try FileHandle(forReadingFrom: self)
defer {
file.closeFile()
}
// Create and initialize MD5 context:
var md5 = CryptoKit.Insecure.MD5()

// Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
while autoreleasepool(invoking: {
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
md5.update(data: data)
return true // Continue
} else {
return false // End of file
}
}) { }
// Compute the MD5 digest:
let data = Data(md5.finalize())

return data.base64EncodedString()
} catch {
error_log(error)

return nil
}
}
}

SHA256哈希的解决方案(基于Martin R的答案):

func sha256(url: URL) -> Data? {
do {
let bufferSize = 1024 * 1024
// Open file for reading:
let file = try FileHandle(forReadingFrom: url)
defer {
file.closeFile()
}
// Create and initialize SHA256 context:
var context = CC_SHA256_CTX()
CC_SHA256_Init(&context)
// Read up to `bufferSize` bytes, until EOF is reached, and update SHA256 context:
while autoreleasepool(invoking: {
// Read up to `bufferSize` bytes
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
data.withUnsafeBytes {
_ = CC_SHA256_Update(&context, $0, numericCast(data.count))
}
// Continue
return true
} else {
// End of file
return false
}
}) { }
// Compute the SHA256 digest:
var digest = Data(count: Int(CC_SHA256_DIGEST_LENGTH))
digest.withUnsafeMutableBytes {
_ = CC_SHA256_Final($0, &context)
}
return digest
} catch {
print(error)
return nil
}
}

与先前创建的名称为fileURL的类型为URL的实例一起使用:

if let digestData = sha256(url: fileURL) {
let calculatedHash = digestData.map { String(format: "%02hhx", $0) }.joined()
DDLogDebug(calculatedHash)
}

最新更新