如何从百分比的Windowscp1251中解码字符串?
替换percentescapes(使用:string.encoding.windowscp1251)现在是过时的
removingPercentEncoding使用UTF8
我找到了解决方案。这个对我有用。欢迎来重构我的例子。
extension String {
func removingPercentEncoding(using encoding: String.Encoding) -> String {
let firstChar = self.first
let percentCharacter = Character("%")
var encodedPrefix: String.SubSequence? = nil
var encodedSuffix = self
if firstChar != percentCharacter {
if let indexOfFirstPercentChar = index(of: percentCharacter) {
encodedPrefix = self[..<indexOfFirstPercentChar]
encodedSuffix = String(self[indexOfFirstPercentChar...])
} else {
//no % char at all. Nothing encoded
return self
}
}
let substrings = encodedSuffix.components(separatedBy: "%")
let arr = substrings.map{ substring -> (String) in
switch substring.count {
case let count where count < 2:
return substring
case let count where count == 2:
let decodedArr = substring.hexa2Bytes
let data = Data(decodedArr)
if let decodedStr = String(data: data, encoding: encoding) {
return decodedStr
}
return substring
default: //>2
let thirdSymbolIndex = index(startIndex, offsetBy: 2)
let firstTwo = substring[..<thirdSymbolIndex]
let furhter = substring[thirdSymbolIndex...]
let decodedArr = String(firstTwo).hexa2Bytes
let data = Data(decodedArr)
if let decodedStr = String(data: data, encoding: encoding) {
return decodedStr + furhter
}
return substring
}
}
let result = arr.joined()
return String(encodedPrefix ?? "") + result
}
var hexa2Bytes: [UInt8] {
let hexa = Array(characters)
return stride(from: 0, to: characters.count, by: 2).flatMap { UInt8(String(hexa[$0..<$0.advanced(by: 2)]), radix: 16) }
}
}
一个小示例,可以与多字节字符串编码一起使用。
extension UInt8 {
//returns 0...15 when '0'...'9', 'A'...'F', 'a'...'f', otherwise returns nil
var hexValue: UInt8? {
if UInt8(ascii: "0") <= self && self <= UInt8(ascii: "9") {
return self - UInt8(ascii: "0")
} else if UInt8(ascii: "A") <= self && self <= UInt8(ascii: "F") {
return self - UInt8(ascii: "A") + 10
} else if UInt8(ascii: "a") <= self && self <= UInt8(ascii: "f") {
return self - UInt8(ascii: "a") + 10
} else {
return nil
}
}
}
extension String {
func removingPercentEncoding(using encoding: String.Encoding) -> String? {
guard let percentEncodedData = self.data(using: encoding) else {return nil}
var byteIterator = percentEncodedData.makeIterator()
var percentDecodedData = Data()
while let b0 = byteIterator.next() {
guard b0 == UInt8(ascii: "%"), let b1 = byteIterator.next() else {
//Non percent character
percentDecodedData.append(b0)
continue
}
guard let h1 = b1.hexValue, let b2 = byteIterator.next() else {
//Keep it as is, when invalid hex-sequece appeared
percentDecodedData.append(b0)
percentDecodedData.append(b1)
continue
}
guard let h2 = b2.hexValue else {
//Keep it as is, when invalid hex-sequece appeared
percentDecodedData.append(b0)
percentDecodedData.append(b1)
percentDecodedData.append(b2)
continue
}
percentDecodedData.append((h1<<4) + h2)
}
return String(data: percentDecodedData, encoding: encoding)
}
}
我认为,您应该认为 non-utf8%编码现在是过时的,并且应该修复使用CP1251的百分比编码字符串的零件。