服务器正在向TcpClient发送无效的gzip



我正试图通过实现web tcp客户端来了解更多关于web和tcp如何工作的信息。

目前,我的web请求功能如下:

    public string SendWebRequest(SocketWebRequest request)
    {
        using (NetworkStream ns = tc.GetStream())
        {
                using (System.IO.StreamReader sr = new System.IO.StreamReader(ns))
                {
                    request.WriteTo(ns);
                    ns.Flush();
                    var statusLine = sr.ReadLine();
                    ProcessStatusLine(statusLine);
                    Headers = ReadHeaders(sr);
                    ProcessCookies(request.Host);
                    int contentLength = 0;
                    if (Headers.ContainsKey("Content-Length"))
                    {
                        foreach (var cl in Headers["Content-Length"])
                        {
                            int buf;
                            if (int.TryParse(cl,out buf))
                            {
                                contentLength = buf;
                                break;
                            }
                        }
                    }
                    if (contentLength==0)
                    {
                        return "";
                    }
                    byte[] content = new byte[contentLength];
                    if (IsGziped())
                    {
                        MemoryStream decompressed = new MemoryStream();
                        using (var zs = new GZipStream(ns, CompressionMode.Decompress))
                        {
                            while (true)
                            {
                                var buf = new byte[1024];
                                int read = zs.Read(buf, 0, buf.Length);
                                if (read == 0)
                                {
                                    break;
                                }
                                decompressed.Write(buf, 0, read);
                            }
                        }
                        content = decompressed.ToArray();
                    }
                    else
                    {
                        using (BinaryReader rdr = new BinaryReader(ns))
                        {
                            rdr.Read(content, 0, content.Length);
                        }
                    }
                    var encoding = GetEncoding();
                    return encoding.GetString(content.ToArray());
                }
        }
    }

请求如下:

GET http://www.youtube.com/ HTTP/1.1
Accept: application/x-ms-application, image/jpeg, application/xaml+xml, image/gif, image/pjpeg, application/x-ms-xbap, */*
Accept-Language: en-US
User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)
Accept-Encoding: gzip, deflate
Connection: Keep-Alive
Host:www.youtube.com

响应标头如下所示:

HTTP/1.1 200 OK
Date: Sat, 25 Aug 2012 19:46:51 GMT
Server: Apache
X-Content-Type-Options: nosniff
Content-Encoding: gzip
Set-Cookie: use_hitbox=d5c5516c3379125f43aa0d495d100d6ddAEAAAAw; path=/; domain=.youtube.com
Set-Cookie: VISITOR_INFO1_LIVE=av7rkkf4Sfw; path=/; domain=.youtube.com; expires=Mon, 22-Apr-2013 19:46:51 GMT
Expires: Tue, 27 Apr 1971 19:44:06 EST
Cache-Control: no-cache
P3P: CP="This is not a P3P policy! See //support.google.com/accounts/bin/answer.py?answer=151657&hl=en-US for more info."
X-Frame-Options: SAMEORIGIN
Content-Length: 18977
Content-Type: text/html; charset=utf-8

在这之后,第一个int read = zs.Read(buf, 0, buf.Length);有时会工作,但通常会失败,例外情况如下:

The magic number in GZip header is not correct. Make sure you are passing in a GZip stream. I've tried reading the data as string, and it looks encoded.

Youtube通过浏览器运行良好。将数据作为字符串读取时,它看起来已编码。

为什么我会得到这个,我应该如何解决?

更新

看起来这是传输过程中的某种错误。在10种情况中的5种情况下,它有效,在其他5种情况中,它在没有明显原因的情况下失败

这是IsGziped() 的代码

 bool IsGziped()
    {
        foreach (var h in Headers["Content-Encoding"])
        {
            if (h.ToLowerInvariant().Contains("gzip"))
            {
                return true;
            }
        }
        return false;
    }

StreamReader不一定只读取所需的字节数。由于内部缓冲,它可以读取更多内容。这导致压缩字节从NetworkStream ns中取出并放入StreamReader内部缓冲器中。

在取得字节之后,GZipStream不能读取它们。

您可能需要使用在二进制级别上工作的自定义标头解析解决方案。没有办法限制StreamReader只读取尽可能少的字节。

CCD_ 8不与其他读取器一起使用。

您可以使用以下代码(带标头的响应流)无缝分离。

// Read response.
var buffer2 = new byte[4096];
var hd = new MemoryStream();
var response = new MemoryStream();
var endHeader = false;
do
{
    // Your networkstream object instead > "stream".
    bytes = stream.Read(buffer2, 0, buffer2.Length);
    if (!endHeader)
    {
        var startIndex = 0;
        if (IsContainsHeaderCrLf(buffer2, out startIndex))
        {
            endHeader = true;
            hd.Write(buffer2, 0,startIndex);
            response.Write(buffer2, startIndex + 4, bytes - startIndex - 4);
        }
        else
        {
            hd.Write(buffer2, 0, bytes);
        }
    }
    else
    {
        response.Write(buffer2, 0, bytes);
    }
} while (bytes != 0);
var headertxt = System.Text.Encoding.UTF8.GetString(hd.ToArray());
var unziptxt = "";
var responsetxt = "";
if (headertxt.Contains("gzip"))
{
    unziptxt = System.Text.Encoding.UTF8.GetString(Decompress(response.ToArray()));
}
else
{
    responsetxt = System.Text.Encoding.UTF8.GetString(response.ToArray());
}
return headertxt + "rnrn" + unziptxt + responsetxt;
//...
private bool IsContainsHeaderCrLf(byte[] buffer, out int startIndex)
{
    for (var i = 0; i <= buffer.Length - 4; i++)
    {
        if (buffer[i] == 13 & buffer[i + 1] == 10 && buffer[i + 2] == 13 && buffer[i + 3] == 10)
        {
            startIndex = i;
            return true;
        }
    }
    startIndex = -1;
    return false;
}

奖金解压缩代码。

static byte[] Decompress(byte[] gzip)
{
    // Create a GZIP stream with decompression mode.
    // ... Then create a buffer and write into while reading from the GZIP stream.
    using (var stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
    {
        const int size = 4096;
        var buffer = new byte[size];
        using (var memory = new MemoryStream())
        {
            var count = 0;
            do
            {
                count = stream.Read(buffer, 0, size);
                if (count > 0)
                {
                    memory.Write(buffer, 0, count);
                }
            }
            while (count > 0);
            return memory.ToArray();
        }
    }
}

相关内容

最新更新