我正试图通过实现web tcp客户端来了解更多关于web和tcp如何工作的信息。
目前,我的web请求功能如下:
public string SendWebRequest(SocketWebRequest request)
{
using (NetworkStream ns = tc.GetStream())
{
using (System.IO.StreamReader sr = new System.IO.StreamReader(ns))
{
request.WriteTo(ns);
ns.Flush();
var statusLine = sr.ReadLine();
ProcessStatusLine(statusLine);
Headers = ReadHeaders(sr);
ProcessCookies(request.Host);
int contentLength = 0;
if (Headers.ContainsKey("Content-Length"))
{
foreach (var cl in Headers["Content-Length"])
{
int buf;
if (int.TryParse(cl,out buf))
{
contentLength = buf;
break;
}
}
}
if (contentLength==0)
{
return "";
}
byte[] content = new byte[contentLength];
if (IsGziped())
{
MemoryStream decompressed = new MemoryStream();
using (var zs = new GZipStream(ns, CompressionMode.Decompress))
{
while (true)
{
var buf = new byte[1024];
int read = zs.Read(buf, 0, buf.Length);
if (read == 0)
{
break;
}
decompressed.Write(buf, 0, read);
}
}
content = decompressed.ToArray();
}
else
{
using (BinaryReader rdr = new BinaryReader(ns))
{
rdr.Read(content, 0, content.Length);
}
}
var encoding = GetEncoding();
return encoding.GetString(content.ToArray());
}
}
}
请求如下:
GET http://www.youtube.com/ HTTP/1.1
Accept: application/x-ms-application, image/jpeg, application/xaml+xml, image/gif, image/pjpeg, application/x-ms-xbap, */*
Accept-Language: en-US
User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)
Accept-Encoding: gzip, deflate
Connection: Keep-Alive
Host:www.youtube.com
响应标头如下所示:
HTTP/1.1 200 OK
Date: Sat, 25 Aug 2012 19:46:51 GMT
Server: Apache
X-Content-Type-Options: nosniff
Content-Encoding: gzip
Set-Cookie: use_hitbox=d5c5516c3379125f43aa0d495d100d6ddAEAAAAw; path=/; domain=.youtube.com
Set-Cookie: VISITOR_INFO1_LIVE=av7rkkf4Sfw; path=/; domain=.youtube.com; expires=Mon, 22-Apr-2013 19:46:51 GMT
Expires: Tue, 27 Apr 1971 19:44:06 EST
Cache-Control: no-cache
P3P: CP="This is not a P3P policy! See //support.google.com/accounts/bin/answer.py?answer=151657&hl=en-US for more info."
X-Frame-Options: SAMEORIGIN
Content-Length: 18977
Content-Type: text/html; charset=utf-8
在这之后,第一个int read = zs.Read(buf, 0, buf.Length);
有时会工作,但通常会失败,例外情况如下:
The magic number in GZip header is not correct. Make sure you are passing in a GZip stream. I've tried reading the data as string, and it looks encoded.
Youtube通过浏览器运行良好。将数据作为字符串读取时,它看起来已编码。
为什么我会得到这个,我应该如何解决?
更新
看起来这是传输过程中的某种错误。在10种情况中的5种情况下,它有效,在其他5种情况中,它在没有明显原因的情况下失败
这是IsGziped()
的代码
bool IsGziped()
{
foreach (var h in Headers["Content-Encoding"])
{
if (h.ToLowerInvariant().Contains("gzip"))
{
return true;
}
}
return false;
}
StreamReader
不一定只读取所需的字节数。由于内部缓冲,它可以读取更多内容。这导致压缩字节从NetworkStream ns
中取出并放入StreamReader
内部缓冲器中。
在取得字节之后,GZipStream
不能读取它们。
您可能需要使用在二进制级别上工作的自定义标头解析解决方案。没有办法限制StreamReader
只读取尽可能少的字节。
CCD_ 8不与其他读取器一起使用。
您可以使用以下代码(带标头的响应流)无缝分离。
// Read response.
var buffer2 = new byte[4096];
var hd = new MemoryStream();
var response = new MemoryStream();
var endHeader = false;
do
{
// Your networkstream object instead > "stream".
bytes = stream.Read(buffer2, 0, buffer2.Length);
if (!endHeader)
{
var startIndex = 0;
if (IsContainsHeaderCrLf(buffer2, out startIndex))
{
endHeader = true;
hd.Write(buffer2, 0,startIndex);
response.Write(buffer2, startIndex + 4, bytes - startIndex - 4);
}
else
{
hd.Write(buffer2, 0, bytes);
}
}
else
{
response.Write(buffer2, 0, bytes);
}
} while (bytes != 0);
var headertxt = System.Text.Encoding.UTF8.GetString(hd.ToArray());
var unziptxt = "";
var responsetxt = "";
if (headertxt.Contains("gzip"))
{
unziptxt = System.Text.Encoding.UTF8.GetString(Decompress(response.ToArray()));
}
else
{
responsetxt = System.Text.Encoding.UTF8.GetString(response.ToArray());
}
return headertxt + "rnrn" + unziptxt + responsetxt;
//...
private bool IsContainsHeaderCrLf(byte[] buffer, out int startIndex)
{
for (var i = 0; i <= buffer.Length - 4; i++)
{
if (buffer[i] == 13 & buffer[i + 1] == 10 && buffer[i + 2] == 13 && buffer[i + 3] == 10)
{
startIndex = i;
return true;
}
}
startIndex = -1;
return false;
}
奖金解压缩代码。
static byte[] Decompress(byte[] gzip)
{
// Create a GZIP stream with decompression mode.
// ... Then create a buffer and write into while reading from the GZIP stream.
using (var stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
{
const int size = 4096;
var buffer = new byte[size];
using (var memory = new MemoryStream())
{
var count = 0;
do
{
count = stream.Read(buffer, 0, size);
if (count > 0)
{
memory.Write(buffer, 0, count);
}
}
while (count > 0);
return memory.ToArray();
}
}
}