硒在Chrome中工作,但不使用无头Chrome



当我使用Chrome时,我有这个代码块可以正常工作,但是,当我想切换到无头Chrome时,我得到selenium.common.exceptions.StaleElementReferenceException: Message: stale element reference: element is not attached to the page document,它不返回任何东西。知道为什么吗?

代码:

class IOLSpider(scrapy.Spider):
name = 'iol'
start_urls = [
'http://www.iolproperty.co.za/all_properties/For_Sale/Western_Cape',
'http://www.iolproperty.co.za/all_properties/Rentals/Western_Cape',
]
def __init__(self):
#path to driver
chrome_options = webdriver.ChromeOptions()
chrome_options.headless = True
self.driver = webdriver.Chrome('/path/chromedriver',chrome_options=chrome_options)
def parse(self, response):
. . . 
def parse_area(self, response):
. . .
def parse_property(self,response):
#the link that comes here is the link of property, like this one
#https://www.iolproperty.co.za/view-property.jsp?PID=2000026825
item = response.meta.get('item')
self.driver.get(response.url)
self.driver.current_url
self.driver.execute_script("document.getElementById('footcont').setAttribute('hidden', true)")
elem = WebDriverWait(self.driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//a[text()="Show email address"]')))
elem.click()
contact_email=self.driver.find_element_by_xpath('//span[@id="viewagmail" and @style="display: block;"]/a').text

您试图抓取的站点检测到您正在使用无头浏览器,并且没有发挥良好。这在过去对我很有效,但你可能必须根据你的具体需求进行调整。

url = "https://www.iolproperty.co.za/view-property.jsp?PID=2000026825"
options = Options()
options.add_argument('--no-sandbox')
options.add_argument("--headless")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
with webdriver.Chrome(options=options) as driver:
driver.execute_cdp_cmd('Network.setUserAgentOverride', {
"userAgent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.53 Safari/537.36'})
driver.get(url)
driver.execute_script("document.getElementById('footcont').setAttribute('hidden', true)")
elem = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//a[text()="Show email address"]')))
elem.click() # This can be combined with the webdriver wait line, but isn't necessary.
contact_email = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '//span[@id="viewagmail" and @style="display: block;"]/a[starts-with(@href, "mailto")]'))).text

最新更新