将鼠标悬停在选项卡上,然后刮取产品



你好,我正在尝试使用selenium自动抓取产品标题和价格,我正在使用ActionChains和move_to_element,但不知何故,它给了我超时异常,有更好的方法吗?选项卡中的标题

https://denago.com/collections/ebikes

#For Dynamic webpage, import selenium
!apt-get update
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
!pip install selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC
#set up Chrome driver
options=webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
#Define web driver as a Chrome driver
driver=webdriver.Chrome('chromedriver',options=options)
driver.implicitly_wait(10)

driver.get('https://denago.com/collections/ebikes')
action = ActionChains(driver)
ourbike = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, "/html/body/div[6]/div/header/nav/ul/li[1]/a/span")))
ActionChains(driver).move_to_element(ourbike).perform()
Titles=driver.find_elements(By.CLASS_NAME,'mm-title')
for i in range(len(Titles)):
print(Titles[i].text)

有几个问题:

  1. 浏览器以较小的默认大小打开,并且您要搜索悬停的元素不在页面上。因此,您需要设置options.add_argument('window-size=1200,1980')
  2. 有一条关于cookie的消息与页面上的元素重叠。最好关闭它:driver.find_element(By.ID, 'CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll').click()
  3. 您尝试悬停的元素无法被您使用的XPATH找到。使用(By.XPATH, '(//li[@itemid="m9RVB"])')可以很容易地找到它,但页面上有两个这样的元素,第一个是隐藏的。因此,您需要将第二个悬停在定位器上,因此将[2]添加到定位器中:(By.XPATH, '(//li[@itemid="m9RVB"])[2]')
    因此,以下是代码:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#set up Chrome driver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('window-size=1200,1980')
#Define web driver as a Chrome driver
driver = webdriver.Chrome('chromedriver', options=options)
driver.implicitly_wait(10)
driver.get('https://denago.com/collections/ebikes')
driver.find_element(By.ID, 'CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll').click()
action = ActionChains(driver)
ourbike = WebDriverWait(driver, 5).until(EC.visibility_of_element_located((By.XPATH, '(//li[@itemid="m9RVB"])[2]')))
ActionChains(driver).move_to_element(ourbike).perform()
Titles = driver.find_elements(By.CLASS_NAME, 'mm-title')
for i in range(len(Titles)):
print(Titles[i].text)
driver.quit()

我想你看起来像这样:

# Needed libs
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# We create the driver
options=webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
# We maximize the window, because if not the page will be different
driver.maximize_window()
# We navigate to the url
driver.get('https://denago.com/collections/ebikes')
# We wait for the first title, I think it is enough
WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, "(//h5)[1]")))
# We get all the titles elements
titles=driver.find_elements(By.XPATH,'//h5')
# For each title element we get the text and also we get the price
for i in range(0,len(titles)):
product_name = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f'(//h5)[{i+1}]'))).text
product_price = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.XPATH, f'(//div[@class="price"])[{i+1}]'))).text
print(f"Product {i+1}: {product_name} - Price: {product_price}")
driver.quit()

该页面上有5辆自行车。这里有一种更像蟒蛇(更像硒(的方式来获得这些头衔(如果你想的话,还有每辆自行车上的其他信息(:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
wait = WebDriverWait(driver, 25)

driver.get('https://denago.com/collections/ebikes')
try:
wait.until(EC.element_to_be_clickable((By.ID, "CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"))).click()
print('accepted cookies')
except Exception as e:
print('no cookie button!')
bikes= wait.until(EC.presence_of_all_elements_located((By.XPATH, '//div[@class="grid-view-item product-card"]//h5/a')))
for bike in bikes:
print(bike.text.strip())

终端打印输出:

accepted cookies
DENAGO CITY MODEL 1 STEP-THRU EBIKE
DENAGO CITY MODEL 1 TOP-TUBE EBIKE
DENAGO COMMUTE MODEL 1 STEP-THRU EBIKE
DENAGO FAT TIRE STEP-THRU EBIKE
DENAGO COMMUTE MODEL 1 TOP-TUBE EBIKE

硒文档:https://www.selenium.dev/documentation/

相关内容

最新更新