查询类DataInputStream的方法readUTF()

有人知道它是如何在引擎盖下工作的吗？我已经读过这个API，但它不是那么清楚。有人能用更简单的方式把它写下来吗？提前谢谢。

首先读取一个无符号short，它是字符串的长度
对于字符串长度，重复以下步骤：
读取一个字节。如果字节与位模式0xxxxxxx匹配，则它是1个字符。如果字节与位模式110xxxxx匹配，则字符由2个字节（unicode）组成。如果字节与位模式1110xxxx匹配，则该字符由3个字节组成。当这个新字符被组装时，它被附加到要返回的字符串的末尾

查看功能背后的代码可能会有所帮助：

 public final static String readUTF(DataInput in) throws IOException {
int utflen = in.readUnsignedShort();
byte[] bytearr = null;
char[] chararr = null;
if (in instanceof DataInputStream) {
    DataInputStream dis = (DataInputStream)in;
    if (dis.bytearr.length < utflen){
        dis.bytearr = new byte[utflen*2];
        dis.chararr = new char[utflen*2];
    }
    chararr = dis.chararr;
    bytearr = dis.bytearr;
} else {
    bytearr = new byte[utflen];
    chararr = new char[utflen];
}
int c, char2, char3;
int count = 0;
int chararr_count=0;
in.readFully(bytearr, 0, utflen);
while (count < utflen) {
    c = (int) bytearr[count] & 0xff;
    if (c > 127) break;
    count++;
    chararr[chararr_count++]=(char)c;
}
while (count < utflen) {
    c = (int) bytearr[count] & 0xff;
    switch (c >> 4) {
        case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
            /* 0xxxxxxx*/
            count++;
            chararr[chararr_count++]=(char)c;
            break;
        case 12: case 13:
            /* 110x xxxx   10xx xxxx*/
            count += 2;
            if (count > utflen)
                throw new UTFDataFormatException(
                    "malformed input: partial character at end");
            char2 = (int) bytearr[count-1];
            if ((char2 & 0xC0) != 0x80)
                throw new UTFDataFormatException(
                    "malformed input around byte " + count);
            chararr[chararr_count++]=(char)(((c & 0x1F) << 6) |
                                            (char2 & 0x3F));
            break;
        case 14:
            /* 1110 xxxx  10xx xxxx  10xx xxxx */
            count += 3;
            if (count > utflen)
                throw new UTFDataFormatException(
                    "malformed input: partial character at end");
            char2 = (int) bytearr[count-2];
            char3 = (int) bytearr[count-1];
            if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
                throw new UTFDataFormatException(
                    "malformed input around byte " + (count-1));
            chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
                                            ((char2 & 0x3F) << 6)  |
                                            ((char3 & 0x3F) << 0));
            break;
        default:
            /* 10xx xxxx,  1111 xxxx */
            throw new UTFDataFormatException(
                "malformed input around byte " + count);
    }
}
// The number of chars produced may be less than utflen
return new String(chararr, 0, chararr_count);

}

相关内容

最新更新

热门标签：