您好,我试图从以下页面中删除一些信息:http://verify.sos.ga.gov/verification/
我的代码如下:
import sys
reload(sys)
sys.setdefaultencoding('utf8')
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
import time
import csv
url = 'http://verify.sos.ga.gov/verification/'
def init_Selenium():
global driver
driver = webdriver.Chrome("/Users/rodrigopeniche/Downloads/chromedriver")
driver.get(url)
def select_profession():
select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
options = select.options
for index in range(1, len(options) - 1):
select = Select(driver.find_element_by_name('t_web_lookup__profession_name'))
select.select_by_index(index)
select_license_type()
def select_license_type():
select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
options = select.options
for index in range(1, len(options) - 1):
select = Select(driver.find_element_by_name('t_web_lookup__license_type_name'))
select.select_by_index(index)
search_button = driver.find_element_by_id('sch_button')
driver.execute_script('arguments[0].click();', search_button)
scrap_licenses_results()
def scrap_licenses_results():
table_rows = driver.find_elements_by_tag_name('tr')
for index, row in enumerate(table_rows):
if index < 9:
continue
else:
attributes = row.find_elements_by_xpath('td')
try:
name = attributes[0].text
license_number = attributes[1].text
profession = attributes[2].text
license_type = attributes[3].text
status = attributes[4].text
address = attributes[5].text
license_details_page_link = attributes[0].find_element_by_id('datagrid_results__ctl3_name').get_attribute('href')
driver.get(license_details_page_link)
data_rows = driver.find_elements_by_class_name('rdata')
issued_date = data_rows[len(data_rows) - 3].text
expiration_date = data_rows[len(data_rows) - 2].text
last_renewal_day = data_rows[len(data_rows) - 1].text
print name, license_number, profession, license_type, status, address, issued_date, expiration_date, last_renewal_day
driver.back()
except:
pass
init_Selenium()
select_profession()
当我执行脚本时,它适用于第一次迭代,但在第二个迭代中失败。 attributes = row.find_elements_by_xpath('td')
行中的scrap_licenss_results((函数中引起错误的确切位置。
任何帮助将不胜感激
StaleElementReferenceException归因于循环迭代之前收集的行列表。最初,您创建了所有行的列表,名为table_rows。
table_rows = driver.find_elements_by_tag_name('tr')
现在,在第一次迭代期间,您的第一行元素是新鲜的,可以由驾驶员找到。在第一次迭代结束时,您正在执行driver.back()
,页面更改/刷新HTML DOM。现在所有先前收集的参考文献现在都丢失了。现在,您的table_rows列表中的所有行都是陈旧的。因此,在第二次迭代中,您正面临这样的例外。
您必须在循环中移动查找行操作,以便每次在目标应用程序上找到新的参考。psuedocode应执行这样的操作。
total_rows = driver.find_elements_by_tag_name('tr').length()
for i in total_rows
driver.find_element_by_xpath('//tr[i]')
.. further code..