元素在抓取网站时不可交互(可点击)



有人能告诉我代码出了什么问题吗?

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import csv
import requests
from bs4 import BeautifulSoup
rows, cols = (1000,11)
rows = [[0 for i in range(cols)] for j in range(rows)]
driver_path = 'C:/Users/adith/Downloads/chromedriver_win32/chromedriver.exe'
brave_path = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
option = webdriver.ChromeOptions()
option.binary_location = brave_path
browser = webdriver.Chrome(executable_path=driver_path, options=option)
browser.get('https://www.dell.com/community/Laptops/ct-p/Laptops')
cookies = {
'lithiumLogin:vjauj58549': '~2TtiW3OEsenvCn5Ir~fBcCal7YbmhAmxNWLe4LgaSRCss_g69Gqm2CAs-fDA_FtccFLDK3AoWuzXHz72fb'
}

def load():
count = 0
while True:
page = requests.get(browser.current_url)
soup = BeautifulSoup(page.content)
button = [z.text for z in soup.find_all(class_='lia-link-navigation')]
print(button[-1])
if(button[-1]=='Load more'):
count = count + 1
browser.find_element(By.XPATH, '//*[@id="btn-load-more"]').click()
if(count>12):
break
else:
break

try:
load()
except:
load()

它可以点击加载按钮3次,但我得到了一个错误,如下所示。

Load more
Load more
Load more
Load more
Load more
Load more
---------------------------------------------------------------------------
ElementClickInterceptedException          Traceback (most recent call last)
<ipython-input-6-ef6cae973b0d> in <module>
18 try:
---> 19     load()
20 except:
<ipython-input-6-ef6cae973b0d> in load()
11             count = count + 1
---> 12             browser.find_element(By.XPATH, '//*[@id="btn-load-more"]').click()
13             if(count>15):
~anaconda3libsite-packagesseleniumwebdriverremotewebelement.py in click(self)
87         """Clicks the element."""
---> 88         self._execute(Command.CLICK_ELEMENT)
89 
~anaconda3libsite-packagesseleniumwebdriverremotewebelement.py in _execute(self, command, params)
395         params['id'] = self._id
--> 396         return self._parent.execute(command, params)
397 
~anaconda3libsite-packagesseleniumwebdriverremotewebdriver.py in execute(self, driver_command, params)
434         if response:
--> 435             self.error_handler.check_response(response)
436             response['value'] = self._unwrap_value(
~anaconda3libsite-packagesseleniumwebdriverremoteerrorhandler.py in check_response(self, response)
246             raise exception_class(message, screen, stacktrace, alert_text)  # type: ignore[call-arg]  # mypy is not smart enough here
--> 247         raise exception_class(message, screen, stacktrace)
248 
ElementClickInterceptedException: Message: element click intercepted: Element is not clickable at point (470, 1629)
(Session info: chrome=103.0.5060.114)

有人能解释一下代码出了什么问题吗?此外,有人能帮助我完全消除BeautifulSoup库的使用,以获得所需的解决方案吗?

由于您可以点击加载按钮3次,这意味着定位器策略

(By.XPATH, "//*[@id='btn-load-more']")

perfecto


解决方案

理想情况下,要点击clickable元素,您需要诱导WebDriverWait等待element_to_be_clickleable((,然后您可以使用以下解决方案:

WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//*[@id='btn-load-more']"))).click()

注意:您必须添加以下导入:

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC

当您已经通过selenium拥有浏览器时,您正在使用请求和bs4,您应该只使用selenium而不使用请求来完成相同的工作:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import csv
import requests
from bs4 import BeautifulSoup
rows, cols = (1000,11)
rows = [[0 for i in range(cols)] for j in range(rows)]

driver_path = 'C:/Users/adith/Downloads/chromedriver_win32/chromedriver.exe'
brave_path = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
option = webdriver.ChromeOptions()
option.binary_location = brave_path
browser = webdriver.Chrome(executable_path=driver_path, options=option)
browser.get('https://www.dell.com/community/Laptops/ct-p/Laptops')
def load():
count = 0
while True:
button = [z.text for z in driver.find_elements(By.CLASS_NAME, 'lia-link-navigation')]
print(button[-1])
if(button[-1]=='Load more'):
count = count + 1
browser.execute_script( "window.scrollTo(0,document.body.scrollHeight);")
browser.find_element(By.XPATH, '//*[@id="btn-load-more"]').click()
if(count>12):
break
else:
break

try:
load()
except:
load()

点击之前,请尝试滚动到页面的末尾(按钮所在的位置(:

def load():
count = 0
while True:
page = requests.get(browser.current_url)
soup = BeautifulSoup(page.content)
button = [z.text for z in soup.find_all(class_='lia-link-navigation')]
print(button[-1])
if(button[-1]=='Load more'):
count = count + 1
browser.execute_script( "window.scrollTo(0,document.body.scrollHeight);")
time.sleep(1)
browser.find_element(By.XPATH, '//*[@id="btn-load-more"]').click()
if(count>12):
break
else:
break

最新更新