所以我试图抓取Newegg的网站,告诉我什么时候可以将项目添加到购物车选项,我一直面临的问题是试图循环程序,直到"添加到购物车";可获得的我不知道如何开始循环。
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
from selenium import webdriver
import time
def checker():
my_url= 'https://www.newegg.com/p/pl?d=rtx++2070'
#open connection
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
#parsing
page_soup= soup(page_html, "html.parser")
containers = page_soup.findAll('div',{'class':"item-container"})
for container in containers:
title_container = container.findAll('a',{'class':'item-title'})
product_name = title_container[0].text
link= container.find('a')['href']
cart_container= container.findAll('div',{'class':'item-button-area'})
cart= cart_container[0].text
while True:
if "Add to cart" not in cart:
checker()
print('retrying')
您可以通过使用xpath轴方法来简化操作。你可以查看这个有用的网站这是你可以在你的问题上使用的示例脚本
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
PATH = "chromedriver.exe"
driver = webdriver.Chrome(PATH)
driver.get("https://www.newegg.com/p/pl?d=rtx++2070")
# look for all "Add to cart button" then get it's attribute "title"
addToCartBtns = driver.find_elements_by_xpath("//button[normalize-space()='Add to cart']/ancestor::div[@class='item-action']/preceding-sibling::div[@class='item-info']/a")
for addToCartBtn in addToCartBtns:
itemTitle = addToCartBtn.text
print("n"+itemTitle)