如何使用Puppeteer和Cheerio从多个页面抓取数据并在一个页面中显示



我尝试从两个不同的网页中抓取数据,并在自己的页面中显示/比较。它可以很好地显示来自一个网站的数据,当我添加另一个URL时崩溃了。(好吧,仍然显示从第一个网站获得的信息,但第二个网站保持空白(那么如何编写代码从多个网站获取数据呢?这是代码

//modules for express, path, ejs, puppeteer, cheerio
//steup const
const express = require('express');
const app = express();
const puppeteer = require('puppeteer');
const cheerio = require('cheerio');
const path = require('path')
let browser;
app.set('views', path.join(__dirname, 'views'));
app.set('view engine', 'ejs');
//async getData
async function getData(url, page){
try{
await page.goto(url, {waitUnitl : 'load', timeout : 0})
const html = await page.evaluate( () => document.body.innerHTML);
const $ = cheerio.load(html);
let text1 = $("#feature-bullets > ul > li:nth-child(6) > span").text();
let text2 = $("#feature-bullets > ul > li:nth-child(8) > span").text();
let text3 = $("#feature-bullets > ul > li:nth-child(10) > span").text();
let text4 = $("#feature-bullets > ul > li:nth-child(9) > span").text();
let img = $("#landingImage").attr("src");
let aprice = $("#priceblock_dealprice").text();
let aunit = $("#variation_style_name > div > span > span.a-truncate-cut > span").text();
let ashipping = $("#deliveryMessageMirId > b").text();
let awarranty = $("#creturns-policy-anchor-text").text();
let eprice = $("#a-autoid-10-announce > span.a-color-base > span").text();
return{text1, text2, text3, text4, aprice, aunit, ashipping, awarranty, img, eprice}
}
catch(error){
console.log(error);
}
}
//routes
app.get('/results', async function(req, res){
browser = await puppeteer.launch({headless : true})
const page = await browser.newPage();
//calling the getData function and sending URL and the page
let data = await getData('https://www.amazon.com/TP-Link-Deco-Whole-Home-System/dp/B06WVCB862/ref=sr_1_3?dchild=1&keywords=mesh+wifi+system&qid=1606836634&sr=8-3', page, 'https://www.amazon.com/gp/product/1616770813/ref=ppx_yo_dt_b_asin_title_o00_s00?ie=UTF8&psc=1', page );
res.render('results', {data : data});
})
//server
app.listen(3000, ()=>{
console.log('server running');
})
----

希望我问的是正确的,这是我在这里问的第一个问题。我刚开始学习node.js,所以还没有掌握太多知识。非常感谢

您需要更改getData函数的调用,当它一次只接受2个参数时,您不能传递4个参数。

因此,您可以通过将对以下两个页面的调用分开来解决此问题:

let data1 = await getData('https://www.amazon.com/TP-Link-Deco-Whole-Home-System/dp/B06WVCB862/ref=sr_1_3?dchild=1&keywords=mesh+wifi+system&qid=1606836634&sr=8-3', page);
let data2 = await getData('https://www.amazon.com/gp/product/1616770813/ref=ppx_yo_dt_b_asin_title_o00_s00?ie=UTF8&psc=1', page );
let data = [data1, data2]
res.render('results', {data})

最新更新