我正在尝试自动从NOAA网站提取数据(我使用了请求,但似乎有一个错误,所以我试图使用Selenium来自动提取报告(。
from selenium import webdriver
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome()
driver.get('https://www.ncdc.noaa.gov/cdo-web/search')
# Select type of data
t = Select(driver.find_element(By.CSS_SELECTOR, '#selectedDataset'))
t.select_by_visible_text('Daily Summaries')
基本的下拉列表(数据类型,上面的代码(和输入字段(搜索项(非常简单,没有问题。我最头疼的是日期范围选择器。我可以用这个打开它进入日历:
driver.find_element(By.CSS_SELECTOR, '#dateRangeContainer').click()
我一辈子都无法打开它的年度或月度下拉列表。我尝试过Select、ActionChains和其他一些东西,比如:
driver.find_element(By.CSS_SELECTOR, '#dateRangeContainer > div > div > div.noaa-datepicker-start-container.center.clearfix')
Select(driver.find_element(By.CSS_SELECTOR, '#dp1662812794185 > div > div'))
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#select.ui-datepicker-year")))
start_field = driver.find_element(By.CSS_SELECTOR, '#dp1662493859959 > div > div > div > select.ui-datepicker-year')
ActionChains(driver).move_to_element(start_field).click().send_keys('2020').perform()
我也在谷歌上搜索了很多,但没有发现任何有效的东西。通常情况下,它会给我一个NoSuchElementException。实际上,我已经在检查窗格中逐行复制了每一个元素,绝望地想看看是否有(没有(。以下是我尝试过的其他一些东西:
- 在带有python的selenium网络驱动程序中使用带显式等待的link_text时出现TimeOutException
- 使用Python Selenium选择DropDown值
- https://selenium-python.readthedocs.io/locating-elements.html#locating-元素
- 出乎意料的TagNameException:消息:Select仅适用于<选择>元素,不在<李>使用Selenium从下拉列表中选择li元素时出错
- https://www.swtestacademy.com/datepicker-using-selenium/
- https://www.selenium.dev/selenium/docs/api/py/webdriver_support/selenium.webdriver.support.expected_conditions.html
- 如何使用python selenium从日历中选择特定日期
Selenium是否仍在后台页面上寻找选择器,而没有切换到弹出窗口?它似乎不是iFrame(我认为(,但如果是这样的话,我该如何指示Selenium查看弹出窗口?
修订代码(感谢Barry的以下解决方案(:
from selenium import webdriver
from selenium.common.exceptions import NoSuchShadowRootException, NoSuchElementException
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select, WebDriverWait
chrome_options = Options()
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument('window-size=1280,720')
webdriver_service = Service('C:/Program Files (x86)/Google/chromedriver.exe')
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(driver)
wait = WebDriverWait(driver, 10)
driver.get('https://www.ncdc.noaa.gov/cdo-web/search')
start_date = '2020-01-01'
end_date = '2020-12-31'
station = 'USW00014739'
# Select type of data
t = Select(driver.find_element(By.CSS_SELECTOR, '#selectedDataset'))
t.select_by_visible_text('Daily Summaries')
# Select date range
dataset = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "select[id='selectedDataset']"))))
dataset.select_by_index(3)
daterange = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[class="noaa-daterange-input"]')))
daterange.click()
months = {'01': 'Jan', '02': 'Feb', '03': 'Mar', '04': 'Apr', '05': 'May', '06': 'Jun', '07': 'Jul', '08': 'Aug', '09': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'}
def select_date(calendar, date):
container = '.noaa-datepicker-start-container' if calendar == 'start' else '.noaa-datepicker-end-container'
day_select = date[8:]
day_select = day_select[1:2] if day_select[0] == '0' else day_select
parent = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, container)))
year = Select(parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectYear"]'))
year.select_by_visible_text(date[:4] )
month = Select(parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectMonth"]'))
month.select_by_visible_text(months.get(date[5:7]))
day = parent.find_element(By.XPATH, f'//a[text() = "{day_select}" ]')
day.click()
if int(end_date[8:]) > int(start_date[8:]):
select_date('end', end_date)
select_date('start', start_date)
else:
select_date('start', start_date)
select_date('end', end_date)
driver.find_element(By.CSS_SELECTOR, '#noaa-daterange-form > button.noaa-daterange-btn.noaa-daterange-applybtn').click()
driver.find_element(By.CSS_SELECTOR, '#selectedSearchString').send_keys(station)
driver.find_element(By.CSS_SELECTOR, '#searchSubmit').click()
driver.close()
这是在该页面上选择日期的一种方法(我只选择开始日期,您也可以镜像结束日期的代码(:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchShadowRootException, NoSuchElementException
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument('disable-notifications')
chrome_options.add_argument("window-size=1280,720")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
actions = ActionChains(browser)
wait = WebDriverWait(browser, 20)
url = 'https://www.ncdc.noaa.gov/cdo-web/search'
browser.get(url)
dataset = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "select[id='selectedDataset']"))))
dataset.select_by_index(3)
daterange = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'input[class="noaa-daterange-input"]')))
daterange.click()
start_date_parent = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".noaa-datepicker-start-container")))
start_year = Select(start_date_parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectYear"]'))
start_year.select_by_visible_text('2009')
print('selected 2009')
start_month = Select(start_date_parent.find_element(By.CSS_SELECTOR, 'select[data-handler="selectMonth"]'))
start_month.select_by_visible_text('Jul')
print('selected July')
start_day = start_date_parent.find_element(By.XPATH, '//a[text() = "13" ]')
start_day.click()
print('selected the 13th')
这将选择2009-07-13作为开始日期,并在终端中打印:
selected 2009
selected July
selected the 13th
您现在也应该能够编写结束日期的代码,在"搜索"/"搜索术语"中选择/输入信息,然后单击"应用"。
硒文档:https://www.selenium.dev/documentation/