我想解析一些像 pluralsight 这样的 html 站点,例如 (https://app.pluralsight.com/id?),那么我如何首先以编程方式登录站点(不使用 webbrowser 控件)然后调用另一个 url(例如:Pluralsight)并使用 Htmlagility 包获得响应和解析。
但是我已经写了一个登录代码,但我不知道下一步。
public class Login
{
private CookieContainer Cookies = new CookieContainer();
public void SiteLogin(string username, string password)
{
Uri site = new Uri("https://app.pluralsight.com/id?");
HttpWebRequest wr = (HttpWebRequest)WebRequest.Create(site);
wr.Method = "Post";
wr.ContentType = "application/x-www-form-urlencoded";
wr.Referer = "https://app.pluralsight.com/id?";
wr.CookieContainer = Cookies;
var parameters = new Dictionary<string, string>{
{"realm", "vzw"},
{"goto",""},
{"gotoOnFail",""},
{"gx_charset", "UTF-8"},
{"rememberUserNameCheckBoxExists","Y"},
{"IDToken1", username},
{"IDToken2", password}
};
string input = string.Empty;
using (var requestStream = wr.GetRequestStream())
using (var writer = new StreamWriter(requestStream, Encoding.UTF8))
writer.Write(ParamsToFormEncoded(parameters));
using (var response = (HttpWebResponse)wr.GetResponse())
{
if (response.StatusCode == HttpStatusCode.OK)
{
//but I do not know the next step.
}
}
}
private string ParamsToFormEncoded(Dictionary<string, string> parameters)
{
return string.Join("&", parameters.Select(kvp => Uri.EscapeDataString(kvp.Key).Replace("%20", "+")
+ "=" + Uri.EscapeDataString(kvp.Value).Replace("20%", "+")
).ToArray());
}
}
您必须
通过 HttpWebResponse.GetResponseStream 获取响应的流,然后通过 HtmlDocument 的Load
方法加载文档。
var doc = new HtmlAgilityPack.HtmlDocument();
doc.Load(response.GetResponseStream());
//further processing...