用axios抓取产品标题和url



这是我从youtube教程中获得的代码,最初用于从新闻网站抓取新闻。

const PORT = process.env.PORT || 8000
const express = require('express')
const axios = require('axios')
const cheerio = require('cheerio')
const app = express()

const categories = [
{
name: 'jamtangan',
address: 'https://www.blibli.com/c/2/aksesoris-jam-tangan/AK-1000005/JA-1000225',
base: ''
}
]
const products = []
categories.forEach(category => {
axios.get(category.address)
.then(response => {
const html = response.data
const $ = cheerio.load(html)
$('a:contains("jam")', html).each(function () {
const title = $(this).text()
const url = $(this).attr('href')
products.push({
title,
url: category.base + url,
source: category.name
})
})
})
})


app.get('/', (req, res) => {
res.json('Welcome to my Climate Change News API')
})
app.get('/products', (req, res) => {
res.json(products)
})

我如何修改这个get请求,以便我可以获得一个产品名称列表和它的url,其中包含字符串"jam">目录:

<div data-v-016547a4="" tabindex="0" class="product__item"><a data-v-016547a4="" href="/p/baterai-maxell-lr44-baterai-kancing-maxel-jam-cts-99-kf909b-kalkulator/ps--LUO-70001-00038?ds=LUO-70001-00038-00001&amp;source=CATEGORY&amp;sid=cf162d5637763f56&amp;cnc=false&amp;pickupPointCode=PP-3137527&amp;pid=LUO-70001-00038" class="" target="_blank"><div data-v-016547a4="" class="product__item-container"><div data-v-016547a4="" class="product__image"><div data-v-016547a4="" class="product__tags"><div data-v-016547a4=""><!----></div></div> <div data-v-016547a4="" class="product__wishlist"><i data-v-016547a4="" class="bli-wishlist-icon"></i></div> <div data-v-016547a4="" id="PRODUCT_IMAGE_LUO-70001-00038" class="product__image-color product__image-color__gridview"><div data-v-016547a4="" class="product__itemImage product__image__grid-view"><img data-v-016547a4="" title="Baterai Maxell LR44 baterai kancing Maxel jam CTS-99 KF909B kalkulator" src="https://www.static-src.com/wcsstore/Indraprastha/images/catalog/medium//97/MTA-7927225/maxell_baterai_maxell_lr44_baterai_kancing_maxel_jam_cts-99_kf909b_kalkulator_full01_qrfnk13y.jpg" alt="Baterai Maxell LR44 baterai kancing Maxel jam CTS-99 KF909B kalkulator"></div> <!----> <div data-v-016547a4="" class="product__color"><div data-v-2ecbe0a1="" data-v-016547a4="" class="attribute"><!----> <!----></div></div></div> <!----></div> <div data-v-016547a4="" class="product__description"><div data-v-016547a4="" class="product__content"><!----> <!----> <div data-v-016547a4="" title="Baterai Maxell LR44 baterai kancing Maxel jam CTS-99 KF909B kalkulator" class="product__title">
Baterai Maxell LR44 baterai kancing Maxel jam CTS-99 KF909B kalkulator
</div> <div data-v-016547a4="" class="product__body"><p data-v-016547a4="" class="product__body__price"><strong data-v-016547a4="" class="product__body__price__display">
Rp1.500
<!----></strong> <span data-v-016547a4="" class="product__body__price__discount"><span data-v-016547a4="" class="product__body__price__slashed">Rp5.000</span> <span data-v-016547a4="" class="product__body__price__slashed-percentage">
70%
</span></span></p> <div data-v-016547a4="" class="product__body__location_container"><!----> <img data-v-016547a4="" src="https://www.static-src.com//siva/asset//11_2020/icon-top-rated-bronze.png" alt="Bronze" class="product__body__location_merchantIcon"> <span data-v-016547a4="" class="product__body__location"><!----> <span data-v-016547a4="" class="product__body__location__text">
Kab. Tangerang
</span></span></div> <div data-v-016547a4="" class="product__body__rating"><div data-v-016547a4="" class="product__body__rating__stars seperator"><img data-v-016547a4="" src="" alt="rating icon" class="product__body__rating__stars__icon"> <span data-v-016547a4="" class="product__body__rating__stars__rating">4.3</span> <span data-v-016547a4="" class="product__body__rating__stars__count">(13)</span></div> <div data-v-016547a4="" class="product__body__rating__sold"><span data-v-016547a4="" class="product__body__rating__sold__count">Terjual 363</span></div></div> <div data-v-016547a4="" class="product__body__rating__badge-gridView"><!----> <!----></div></div></div> <!----></div></div></a> <div data-v-016547a4="" class="product__add-to-cart-section"><button data-v-016547a4="" class="product__body__button">Tambah ke Bag</button></div> <!----></div>
const tutorial = "Great tutorial from Ania Kubow";
const products = []
const url = category.adress
app.get('/', (req, res) => {
res.json('Welcome to Products API')
})
app.get("/prodcuts", async function(req, res) {res.send(
axios.get(url)
.then(response => {
const html = response.data
const $ = cheerio.load(html)
$('a:contains("jam")', html).each(function () {
const title = $(this).find("product__title").text()
const price = $(this).
find("product__body__price__display").text()
products.push({
title,
price,
})
console.log(products)
console.log(tutorial)
})
});
return {status: 200}, {products}
}).catch((err) => ({status: 500})),
)
.json(products)
.setHeader("Access-Control-Allow-Origin", "*")
.setHeader("Content-Type", "application/json;charset=utf-8")
});

您可以确认const url变量值,并确保不需要应用trim()或split('n/)方法来清理文本()中的数据结果。