我正在尝试在aspx中抓取一个网站,该网站有一个js dopostBack按钮。其中一个按钮是回复我的页面的可打印视图,另一个按钮将.csv文件推送给客户端。
我在这里看到一个问题,它描述了csv下载问题,但没有回答它:使用javascript链接下载PhantomJS浏览器
所以我专注于尝试在 phantomJs 中获取可打印视图,因为它看起来更简单(它显示在浏览器窗口中,一定有办法!
按钮代码 :
<a id="ctl00_ctl00_ctl00_MainContentPlaceHolder_PrintResultsLinkButton"
title="Print Results" class="btn-blue"
href="javascript:
__doPostBack('ctl00$ctl00$ctl00$MainContentPlaceHolder$PrintResultsLinkButton','')
">
<span>Print Results</span>
</a>
我可以单击带有幻影/卡斯珀的链接,但它似乎不起作用。我认为必须在单击按钮时发送请求,其中包含所有标题,但我无法弄清楚如何接收答案。帮助?
我的卡斯珀代码,工作到我应该得到结果页面的程度:https://gist.github.com/xShirase/7156131
我也尝试评估了 js 函数,它在 chrome 控制台中工作,但仍然没有给我幻影中的结果......
迄今为止的最后一次尝试:我第一次加载我的页面,以获取cookie和隐藏输入的值,然后尝试自己发布请求。捕获中的输出仍然相同,所以我知道我的请求没问题,但为什么我没有得到正确的结果?
法典:
casper.start();
capture = function(){
var url = 'http://www.cms.gov/apps/physician-fee-schedule/search/search-criteria.aspx';
casper.open(url).thenClick('a.btn',function() {
this.then(grabResults);
});
};
grabResults = function(){
this.echo(this.getCurrentUrl());
this.open('http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5').then(function(){
this.capture('page.png');
a = this.evaluate(function(){
var v = $('input:hidden#__VIEWSTATE').val();
var d = document.cookie;
return [v,d];
});
});
this.then(grabPRResults);
};
grabPRResults = function(){
this.open('http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5', {
method: 'post',
headers: {
'Host': 'www.cms.gov',
'Connection': 'keep-alive',
'Content-Length': '103902',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Origin': 'http://www.cms.gov',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded',
'Referer': 'http://www.cms.gov/apps/physician-fee-schedule/search/search-results.aspx?Y=0&T=0&HT=2&CT=3&H1=00100&H2=11400&M=5',
'Accept-Encoding': 'gzip,deflate,sdch',
'Accept-Language': 'fr,en-US;q=0.8,en;q=0.6',
'Cookie': a[1]
},
data: {
'__EVENTTARGET':'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$PrintResultsLinkButton',
'__EVENTARGUMENT':'',
'__LASTFOCUS':'',
'__VIEWSTATE':a[0],
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$PFSSResultsCPEWrapper_ClientState':'false',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$YearDropDown':'2013',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$TypeOfInfoDropDown':'pi',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPCTypeDropDown':'range',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierTypeDropDown':'all',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC1Textbox':'00100',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC2Textbox':'11400',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC3Textbox':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC4Textbox':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$HCPC5Textbox':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ModifierDropDown':'%',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierDropDown':'default',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$CarrierLocalityDropDown':'default',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl01$ tbGotoPage':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl01$PFSSGridViewtopddlTopPageSize':'10',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl14$ tbGotoPageBottom':'',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$ResultsControl1$PFSSGridView$ctl14$PFSSGridViewbottomddlBottomPageSize':'10',
'ctl00$ctl00$ctl00$CMSGMainContentPlaceHolder$ToolContentPlaceHolder$PFSSContentPlaceHolder$DownloadsWidget1$DownloadsCPEWrapper_ClientState':'false'
}
}).then(function(){
this.wait(25000);
this.then(lest);
});
};
lest= function(){
this.capture('ppp.png');
};
casper.then(capture);
casper.run();
据我所知,您的主要问题是在回发完成后收到通知。我已经模拟了一个简单的aspx页面,模拟了长时间的回发,它应该适用于您的情况。要等待回调完成,则可以使用标准的casperjs功能进行抓取。我有点担心发布政府网站的抓取说明,希望我的测试页面足以帮助您弄清楚。
卡斯珀JS
var casper = require('casper').create({
// verbose: true,
logLevel: "debug"
});
casper.start();
casper.on('remote.message', function (message) {
this.echo(message);
});
grabResults = function () {
this.echo(this.getCurrentUrl());
};
casper.start('http://localhost:13851/default.aspx', function () {
casper.thenClick('#Button1', function () {
// Setup a listener for the postback complete event
this.evaluate(function () {
Sys.WebForms.PageRequestManager.getInstance().add_endRequest(function () {
console.log("client: doPostback complete");
window.onPostBackComplete = true;
});
});
// Use waitFor to wait for the postback to be finished
this.waitFor(function () {
return this.evaluate(function () {
return window.onPostBackComplete;
});
}, function then() {
this.echo("doPostback complete");
this.echo("value of test label: " + this.fetchText('#Label1'));
}, function timeout() {
this.echo("-- > timeout");
},
5000);
});
});
casper.run(function () {
this.echo("finished");
});
默认.aspx
<%@ Page Language="C#" AutoEventWireup="true" %>
<!DOCTYPE html>
<script runat="server">
protected void Button1_Click(object sender, EventArgs e)
{
Label1.Text = "Slow loaded text";
System.Threading.Thread.Sleep(1000); // simulate a slow server
}
</script>
<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
<title>Sample page</title>
</head>
<body>
<form id="form1" runat="server">
<asp:ScriptManager ID="ScriptManager1" runat="server"></asp:ScriptManager>
<div>
<asp:UpdatePanel ID="UpdatePanel1" runat="server" >
<ContentTemplate>
<asp:Label ID="Label1" runat="server" Text="Default Label"></asp:Label>
<br />
<asp:Button ID="Button1" runat="server" Text="Button" OnClick="Button1_Click" />
</ContentTemplate>
</asp:UpdatePanel>
</div>
</form>
</body>
</html>
结帐: http://forums.asp.net/t/1245557.aspx?how+to+detect+the+end+of+__doPostBack+in+Javascript