我正试图用Unirest和Cheerio在谷歌上搜索结果。所以在这里,我试图得到这个刮标题数组。但当我在外部控制台时,它什么也不返回,而当我在Unirest块内部控制台时,会返回数据。
这是我的代码:
const unirest = require('unirest')
const cheerio = require('cheerio')
var titles = []
unirest
.get('https://www.google.com/search?q=oxylabs')
.headers({'Accept': 'application/json', 'Content-Type': 'application/json'})
.proxy(proxy)//hided
.then((response) =>
{
const $ = cheerio.load(response.body)
$('.uEierd').each((i,el) =>
{
titles[i] = $(el)
.find('.ZINbbc div.v5yQqb a.cz3goc div.CCgQ5 span')
.text()
})
})
for (let i = 0; i < titles.length; i++)
{
console.log(titles[i]);
}
您的代码不起作用,因为unirest
是异步的,并且在for
循环之后运行。在线IDE中的代码和完整示例:
const unirest = require("unirest");
const cheerio = require("cheerio");
function getData() {
return new Promise((resolve, reject) => {
const titles = [];
unirest
.get("https://www.google.com/search?q=oxylabs")
.headers({
"Accept": "application/json",
"Content-Type": "application/json",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36"
})
// .proxy(proxy) //hided
.then((response) => {
const $ = cheerio.load(response.body);
$(".uEierd").each((i, el) => {
titles[i] = $(el).find(".v0nnCb span").text();
});
resolve(titles);
})
.catch((error) => {
console.error(error);
reject(error);
});
});
}
function logData() {
getData().then((titles) => {
for (let i = 0; i < titles.length; i++) {
console.log(titles[i]);
}
});
}
logData();
输出:
Oxylabs Premium Proxies - Residential Proxy Network
72M+ Residential IPs Network - 7 Day-Free Trial - Join Now
或者,您可以使用SerpApi的Google Ad Results API。如果你不想知道如何解决captchas,旋转代理,从头开始创建解析器并维护它,API方法会更容易。查看Playground了解更多信息。
用法:
const SerpApi = require("google-search-results-nodejs");
const mySecret = process.env['API_KEY'] //your API key from serpapi.com
const search = new SerpApi.GoogleSearch(mySecret);
const params = {
engine: "google", // search engine
q: "oxylabs", // search query
location: "Austin, Texas, United States", // location parameter
google_domain: "google.com", // google domain of the search
gl: "us", // contry of the search
hl: "en", // language of the search
};
const getAdTitles = function (data) {
const titles = [];
const adResults = data.ads;
adResults?.forEach((result) => {
const { title } = result;
titles.push(title);
});
console.log(titles);
};
search.json(params, getAdTitles);
输出:
[
'Oxylabs Premium Proxies - Residential Proxy Network',
'72M+ Residential IPs Network - 7 Day-Free Trial - Join Now'
]
免责声明,我为SerpApi工作。