我正在尝试进行网络清理。到目前为止,我已经有了从一页中提取值并更改到下一页的代码。但是,当我循环该过程以对所有其他页面执行相同操作时,它会返回一个错误。到目前为止,我有这个代码:
import time
import requests
import pandas
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import json
driver = webdriver.Chrome('C:DRIVERSchromedriver.exe')
driver.get('https://www.remax.pt/comprar?searchQueryState={%22regionName%22:%22%22,%22businessType%22:1,%22listingClass%22:1,%22page%22:1,%22sort%22:{%22fieldToSort%22:%22ContractDate%22,%22order%22:1},%22mapIsOpen%22:false}')
driver.maximize_window()
driver.implicitly_wait(5)
wait = WebDriverWait(driver, 10)
cookies = driver.find_element_by_id('rcc-decline-button')
cookies.click()
element_list = []
for j in range (1,2569):
try:
for i in range(1,40,2):
link = driver.find_element_by_xpath("(//div[@class='listing-search-searchdetails-component'])[{0}]".format(i))
link.click()
try:
detalhes = driver.find_element_by_id('details')
preço = driver.find_element_by_id('listing-price')
tipo = driver.find_element_by_id('listing-title')
freguesia = driver.find_element_by_xpath('//h5[@class="listing-address"]')
imoveis = [detalhes.text, preço.text, tipo.text, freguesia.text]
element_list.append(imoveis)
finally:
driver.back()
finally:
wait.until(EC.element_to_be_clickable((By.XPATH,"//a[@class='page-link'][.//span[.='Next']]"))).click()
所有值都是在第一页中刮取的,但当它更改页面时,会显示此错误:
错误:
---------------------------------------------------------------------------
StaleElementReferenceException Traceback (most recent call last)
<ipython-input-7-052f5032275d> in <module>
12 for i in range(1,40,2):
13 link = driver.find_element_by_xpath("(//div[@class='listing-search-searchdetails-component'])[{0}]".format(i))
---> 14 link.click()
15 try:
16 detalhes = driver.find_element_by_id('details')
~anaconda3libsite-packagesseleniumwebdriverremotewebelement.py in click(self)
78 def click(self):
79 """Clicks the element."""
---> 80 self._execute(Command.CLICK_ELEMENT)
81
82 def submit(self):
~anaconda3libsite-packagesseleniumwebdriverremotewebelement.py in _execute(self, command, params)
631 params = {}
632 params['id'] = self._id
--> 633 return self._parent.execute(command, params)
634
635 def find_element(self, by=By.ID, value=None):
~anaconda3libsite-packagesseleniumwebdriverremotewebdriver.py in execute(self, driver_command, params)
319 response = self.command_executor.execute(driver_command, params)
320 if response:
--> 321 self.error_handler.check_response(response)
322 response['value'] = self._unwrap_value(
323 response.get('value', None))
~anaconda3libsite-packagesseleniumwebdriverremoteerrorhandler.py in check_response(self, response)
240 alert_text = value['alert'].get('text')
241 raise exception_class(message, screen, stacktrace, alert_text)
--> 242 raise exception_class(message, screen, stacktrace)
243
244 def _value_or_default(self, obj, key, default):
StaleElementReferenceException: Message: stale element reference: element is not attached to the page document
(Session info: chrome=90.0.4430.72)
这是什么元素?
我发布了改进版本。然而我不能说我对此完全满意。我尝试了至少三个其他选项,但如果不执行Javascript,就无法单击"下一步"按钮。我把我尝试过的选项留下来评论,因为我想让你看到它们。
import time
import requests
import pandas
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import json
driver = webdriver.Chrome(executable_path='/snap/bin/chromium.chromedriver')
driver.get(
'https://www.remax.pt/comprar?searchQueryState={%22regionName%22:%22%22,%22businessType%22:1,%22listingClass%22:1,%22page%22:1,%22sort%22:{%22fieldToSort%22:%22ContractDate%22,%22order%22:1},%22mapIsOpen%22:false}')
driver.maximize_window()
driver.implicitly_wait(5)
wait = WebDriverWait(driver, 15)
cookies = driver.find_element_by_id('rcc-decline-button')
cookies.click()
element_list = []
for j in range(1, 2569):
try:
for i in range(1, 40, 2):
wait.until(EC.element_to_be_clickable((By.XPATH, "(//div[@class='listing-search-searchdetails-component'])[{0}]".format(i))))
link = driver.find_element_by_xpath(
"(//div[@class='listing-search-searchdetails-component'])[{0}]".format(i))
link.click()
try:
detalhes = driver.find_element_by_id('details')
preco = driver.find_element_by_id('listing-price')
tipo = driver.find_element_by_id('listing-title')
freguesia = driver.find_element_by_xpath('//h5[@class="listing-address"]')
imoveis = [detalhes.text, preco.text, tipo.text, freguesia.text]
element_list.append(imoveis)
finally:
driver.find_element_by_css_selector(".modal-close-icon").click()
finally:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
next_btn = driver.find_element_by_xpath("//a[@class='page-link'][.//span[.='Next']]")
# next_btn.send_keys(Keys.PAGE_DOWN)
# driver.execute_script("arguments[0].scrollIntoView();", next_btn)
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@class='page-link'][.//span[.='Next']]/span")))
# actions = ActionChains(driver)
# actions.move_to_element(next_btn)
# actions.click().perform()
driver.execute_script("arguments[0].click();", next_btn)
另外,请注意,我已经从内部修改了您的一些代码,使其更加稳定(添加了一些定位器(。目前,它单击下一个按钮。
你需要进一步实施它。我的意思是,你需要再次获取所有列表并循环浏览它们。为此,您需要等待下一页完全加载。我还没有答案。这将需要更多的时间。
以下是关于Selenium的点击和JS点击WebDriver点击((与JavaScript点击((之间的区别的大问题
这个问题还没有完全得到回答,只是变得更加稳定了。你点击下一页几乎没有效果。
更新
在尝试了许多页面加载方法和其他方法几个小时后,我发现了真正的问题所在。for i in range(1,40,2)
是最大的问题。
您试图点击id为21的列表,但只有21个。所以,我把它改为for i in range(1, 20, 2)
,在新页面上添加了一个等待,现在一切都很好。我生活在调试代码中,所以一切对你来说都很清楚。对不起,我没有更多的时间查看列表,但现在应该很容易了。
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome(executable_path='/snap/bin/chromium.chromedriver')
driver.get(
'https://www.remax.pt/comprar?searchQueryState={%22regionName%22:%22%22,%22businessType%22:1,%22listingClass%22:1,%22page%22:1,%22sort%22:{%22fieldToSort%22:%22ContractDate%22,%22order%22:1},%22mapIsOpen%22:false}')
driver.maximize_window()
driver.implicitly_wait(15)
wait = WebDriverWait(driver, 15)
cookies = driver.find_element_by_id('rcc-decline-button')
cookies.click()
element_list = []
for j in range(1, 2569):
try:
print("Searching Page " + str(j))
wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//div[@class='listing-search-searchdetails-component']")))
for i in range(1, 20, 2):
wait.until(EC.element_to_be_clickable((By.XPATH, "(//div[@class='listing-search-searchdetails-component'])[{0}]".format(i))))
el = driver.find_element_by_xpath("(//div[@class='listing-search-searchdetails-component'])[{0}]".format(i))
print("Listing number " + str(i))
link = driver.find_element_by_xpath(
"(//div[@class='listing-search-searchdetails-component'])[{0}]".format(i))
link.click()
try:
detalhes = driver.find_element_by_id('details')
preco = driver.find_element_by_id('listing-price')
tipo = driver.find_element_by_id('listing-title')
freguesia = driver.find_element_by_xpath('//h5[@class="listing-address"]')
imoveis = [detalhes.text, preco.text, tipo.text, freguesia.text]
element_list.append(imoveis)
finally:
driver.find_element_by_css_selector(".modal-close-icon").click()
finally:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
next_btn = driver.find_element_by_xpath("//a[@class='page-link'][.//span[.='Next']]")
wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@class='page-link'][.//span[.='Next']]/span")))
driver.execute_script("arguments[0].click();", next_btn)
附言:到这个时候你能完成的已经很好了。