在我的应用程序中,TCP客户端正在处理来自远程TCP服务器的数据流。当收到的字符是1字节字符时,一切正常。当TCP服务器发送特殊字符(Hex" C3B5" -> 2字节字符)时,我开始遇到问题。
这是Swift 3行代码,每当收到的数据包含一些超过1个字节的UTF8字符时,它将获得零字符串:
let convertedString = String(bytes: data, encoding: String.Encoding.utf8)
对我该如何解决这个问题?基本上,传入流可以包括编码为UTF8的1字节或2字节字符,我需要将数据流转换为毫无问题的字符串。
这是我遇到问题的整个代码部分:
func startRead(for task: URLSessionStreamTask) {
task.readData(ofMinLength: 1, maxLength: 65535, timeout: 300) { (data, eof, error) in
if let data = data {
NSLog("stream task read %@", data as NSData)
let convertedString1 = String(data: data, encoding: String.Encoding(rawValue: String.Encoding.utf8.rawValue))
if let convertedString = String(bytes: data, encoding: String.Encoding.utf8) {
self.partialMessage = self.partialMessage + convertedString
NSLog(convertedString)
// Assign lengths (delimiter, MD5 digest, minimum expected length, message length)
let delimiterLength = Constants.END_OF_MESSAGE_DELIMITER.lengthOfBytes(using: String.Encoding.utf8)
let MD5Length = 32 // 32 characters -> hex representation of 16 bytes
// 3 = CR+LF+1 char at least
let minimumExpectedMessageLength = MD5Length + delimiterLength + 3
let messageLength = self.partialMessage.lengthOfBytes(using: String.Encoding.utf8)
// Check for delimiter and minimum expected message length (2 char msg + MD5 digest + delimiter)
if (self.partialMessage.contains(Constants.END_OF_MESSAGE_DELIMITER)) &&
(messageLength >= minimumExpectedMessageLength) {
var message = self.partialMessage
// Get rid of optional CR+LF
var lowBound = message.index(message.endIndex, offsetBy: -1)
var hiBound = message.index(message.endIndex, offsetBy: 0)
var midRange = lowBound ..< hiBound
let optionalCRLF = message.substring(with: midRange)
if (optionalCRLF == "rn") || (optionalCRLF == " ") { // Remove CR+LF if present
lowBound = message.index(message.endIndex, offsetBy: -1)
hiBound = message.index(message.endIndex, offsetBy: 0)
midRange = lowBound ..< hiBound
message.removeSubrange(midRange)
}
// Check for delimiter proper position (has to be at the end)
lowBound = message.index(message.endIndex, offsetBy: -delimiterLength)
hiBound = message.index(message.endIndex, offsetBy: 0)
midRange = lowBound ..< hiBound
let delimiter = message.substring(with: midRange)
if (delimiter == Constants.END_OF_MESSAGE_DELIMITER) // Delimiter in proper position?
{
// Acquire the MD digest
lowBound = message.index(message.endIndex, offsetBy: -(MD5Length+delimiterLength))
hiBound = message.index(message.endIndex, offsetBy: -(delimiterLength))
midRange = lowBound ..< hiBound
let receivedMD5 = message.substring(with: midRange)
// Acquire the deframed message (normalized message)
lowBound = message.index(message.startIndex, offsetBy: 0)
hiBound = message.index(message.endIndex, offsetBy: -(MD5Length+delimiterLength))
midRange = lowBound ..< hiBound
let normalizedMessage = message.substring(with: midRange)
// Calculate the MD5 digest on the normalized message
let calculatedMD5Digest = normalizedMessage.md5()
// Debug
print(delimiter)
print(normalizedMessage)
print(receivedMD5)
print(calculatedMD5Digest!)
// Check for the integrity of the data
if (receivedMD5.lowercased() == calculatedMD5Digest?.lowercased()) || self.noMD5Check // TEMPORARY
{
if (normalizedMessage == "Unauthorized Access")
{
// Update the authorization status
self.authorized = false
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Unauthorized Access", comment: "Unauthorized Access Title"), message: NSLocalizedString("Please login with the proper Username and Password before to send any command!", comment: "Unauthorized Access Message"))
}
else if (normalizedMessage == "System Busy")
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("System Busy", comment: "System Busy Title"), message: NSLocalizedString("The system is busy at the moment. Only one connection at a time is allowed!", comment: "System Busy Message"))
}
else if (normalizedMessage == "Error")
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Error", comment: "Error Title"), message: NSLocalizedString("An error occurred during the execution of the command!", comment: "Command Error Message"))
}
else if (normalizedMessage == "ErrorMachineRunning")
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Error", comment: "Error Title"), message: NSLocalizedString("The command cannot be executed while the machine is running", comment: "Machine Running Message 1")+"!rnn "+NSLocalizedString("Trying to execute any command in this state could be dangerous for both people and machinery", comment: "Machine Running Message 2")+".rnn "+NSLocalizedString("Please stop the machine and leave the automatic or semi-automatic modes before to provide any command", comment: "Machine Running Message 3")+".")
}
else if (normalizedMessage == "Command Not Recognized")
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Error", comment: "Error Title"), message: NSLocalizedString("Command not recognized!", comment: "Command Unrecognized Message"))
}
else
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
//let testMessage = "testrnf3ea0b9bff4a2c79e60acf6873f4a1ce</EOM>rn"
//normalizedMessage = testMessage
// Process the received csv file
self.processCsvData(file: normalizedMessage)
}
}
else
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Data Error", comment: "Data Error Title"), message: NSLocalizedString("The received data cannot be read since it's corrupted or incomplete!", comment: "Data Error Message"))
}
}
else
{
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Stop the stream
NSLog("stream task stop")
self.stop(task: task)
// Shows an alert
self.showAlert(title: NSLocalizedString("Data Error", comment: "Data Error Title"), message: NSLocalizedString("The received data cannot be read since it's corrupted or incomplete!", comment: "Data Error Message"))
}
}
}
}
if eof {
// Stop the refresh control
if let refreshControl = self.refreshControl {
if refreshControl.isRefreshing {
refreshControl.endRefreshing()
}
}
// Refresh the tableview content
self.tableView.reloadData()
// Stop the stream
NSLog("stream task end")
self.stop(task: task)
} else if error == nil {
self.startRead(for: task)
} else {
// We ignore the error because we'll see it again in `didCompleteWithError`.
NSLog("stream task read error")
}
}
}
data
代表整个字符串的数据至关重要,而不仅仅是子字符串。如果您尝试从整个字符串的部分数据转换子字符串,则在许多情况下会失败。
它可以与1字节字符一起使用,因为无论您在何处切碎数据流,部分数据仍然代表有效的字符串。但是,一旦您开始处理多字节字符,部分数据流很容易导致数据的第一个或最后一个字节仅仅是多字节字符的一部分。这样可以防止数据正确解释。
因此,在尝试将数据转换为字符串之前,必须确保用给定字符串的所有字节构建data
对象。
通常,您应该使用字节计数启动数据。假设前4个字节代表了一个同意" endianness"的32位整数。您会读取这些4个字节以获得长度。然后,您会读取数据,直到获得更多字节为止。然后您知道您处于消息的结尾。
尝试在数据末尾使用"末尾"标记的问题是,可以在读取中将"消息的结尾"标记分开。无论哪种方式,您都需要重构代码以在数据级别进行处理,而不会尝试将数据转换为字符串,直到读取所有字符串数据为止。
如您所知,单个UTF-8字符在1、2、3或4个字节中。对于您的情况,您需要处理1或2个字节字符。并且您的接收字节序列可能不会与"字符边界"一致。但是,正如rmaddy所指出的那样,字符串的字节序列.encoding.utf8必须从右边界开始和结束。
现在,有两种选择来处理这种情况。正如Rmaddy所建议的那样,一种是首先发送长度并计算传入数据字节。这样的缺点是您也必须修改传输(服务器)端,这可能是不可能的。
另一个选择是扫描传入序列字节字节并保持跟踪字符边界,然后构建合法的UTF-8字节序列。幸运的是,UTF-8的设计使您可以轻松地确定字符边界在哪里通过在字节流中看到任何字节。具体而言,分别以0xxxxxx,110xxxxx,1110xxxx和11110xxx和11110xxx和第二..fourth字节为单位的1、2、3和4字节的第一字节以0xxxxxxx,110xxxxx,110xxxxx,第五字节开始在位表示中都在10xxxxxx中。这使您的生活变得更加轻松。
如果您从1个字节UTF-8字符之一中获取"消息结束"标记,您可以轻松而成功地检测到EOM,因为它是单个字节,因为它是一个字节,并且在2..4字节chars中不会出现在任何地方。