尽管使用了正确的 CSS 选择器/XPATH,但我无法找到元素,并且我正在抓取的 html 中没有 iframe。如何获取元素?



下面是我的全部代码供参考。除了倒数第二行外,其他的都很好,这就是我要问的问题。给你。

from selenium import webdriver
import os
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import xlsxwriter
from datetime import datetime
import time
trade_date_lim = "4/10/2021"

chrome_driver = os.path.abspath('C:/Users/ross/Desktop/chromedriver.exe')
browser = webdriver.Chrome(chrome_driver)
browser.get('https://finra-markets.morningstar.com/BondCenter/Default.jsp')
WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#TabContainer > div > div.rtq-tab-wrap > div.rtq-tab-menus-wrap > ul > li:nth-child(3) > a > span'))).click()
WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#firscreener-cusip'))).send_keys("STWD")
WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-advanced-search-form > div.ms-finra-advanced-search-btn > input:nth-child(2)"))).click()
WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-agreement > input"))).click()
WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-grid-hd > div > div:nth-child(7) > div"))).click()
time.sleep(2)
WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-grid-hd > div > div:nth-child(7) > div"))).click()
time.sleep(2)
whole_chart = WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll"))).text

parent = browser.find_element_by_xpath('//*[@id="ms-finra-search-results"]/div/div[3]/div[1]/div[1]/div[2]/div[2]/div')
count_divs = len(parent.find_elements_by_xpath("./div"))

for row_num in range(1):
#gets values that I'm looking for
symbol = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(3)"))).text
maturity = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(7)"))).text
moody_rating = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(8)"))).text
sandp_rating = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(9)"))).text
bond_yield = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(11)"))).text
#looks to see if all values are non-empty and if moody rating and sandp rating are not equal to 'WR' and 'NR'
if symbol.strip() and maturity.strip() and moody_rating.strip() and sandp_rating.strip() and bond_yield.strip() and moody_rating != "WR" and sandp_rating != "NR":
WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(2) > div > a"))).click()
WebDriverWait(browser, 5).until(EC.frame_to_be_available_and_switch_to_it((By.ID, "ms-bond-detail-iframe")))
WebDriverWait(browser, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#tradeHistory_link"))).click()
browser.switch_to.default_content()
time.sleep(10)
#bond information has everything we need. Now we check to see the last time this bond was actually traded
last_trade_date = WebDriverWait(browser, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, '#ms-glossary > div > table > tbody > tr:nth-child(1) > td:nth-child(1) > div')))
print(last_trade_date)

引发的错误是超时异常。

我尝试过通过CSS Selector和XPATH进行查找。我相信每个路径都使用了正确的格式。我在html中找不到Iframe,所以我不需要担心。我包含了隐含的等待time.sleep(10),只是为了确保通过搜索已经完全加载了网页。对于额外好的度量,我包括了visibility_of_element_located的显式等待。我还尝试过使用presence_of_element_locatedelement_to_be_clickable。我快疯了,有人能帮忙吗?

罗斯

有2个问题。。。

首先,改变这个:

browser.switch_to.default_content()

对此:

browser.switch_to.window(browser.window_handles[-1])

切换到default_content仅在iFrame中工作时使用,这里的情况并非如此。browser.switch_to.window(browser.window_handles[-1])切换到最后打开的选项卡

第二,你的最后一行应该是:

print(last_trade_date.text)

代替:

print(last_trade_date)

打印:

1/15/2021

顺便说一句,我认为time.sleep(10)没有必要,我把它完全去掉了,它运行得很好

我已经为您处理过最后一个if块。问题是,当你点击时,你已经打开了一个新的选项卡

WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#tradeHistory_link"))).click()

因此您需要将web驱动程序的焦点更改为新选项卡:

driver.switch_to.window(new_window)

代码:

#looks to see if all values are non-empty and if moody rating and sandp rating are not equal to 'WR' and 'NR'
if symbol.strip() and maturity.strip() and moody_rating.strip() and sandp_rating.strip() and bond_yield.strip() and moody_rating != "WR" and sandp_rating != "NR":
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#ms-finra-search-results > div > div.qs-resultData > div.qs-resultData-body > div.rtq-grid.rtq-grid-auto-h > div.rtq-scrollpanel > div.rtq-grid-scroll > div > div:nth-child(" + str(row_num + 1) + ") > div:nth-child(2) > div > a"))).click()
WebDriverWait(driver, 5).until(EC.frame_to_be_available_and_switch_to_it((By.ID, "ms-bond-detail-iframe")))
windows_before  = driver.current_window_handle
WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.CSS_SELECTOR,"#tradeHistory_link"))).click()
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
windows_after = driver.window_handles
new_window = [x for x in windows_after if x != windows_before][0]
driver.switch_to.window(new_window)
#bond information has everything we need. Now we check to see the last time this bond was actually traded
#new_window = [x for x in window_after if x != window_before][0]
#driver.switch_to.window(new_window)
sleep(5)
last_trade_date = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#ms-glossary > div > table > tbody > tr:nth-child(1) > td:nth-child(1) > div")))
print(last_trade_date.text)

O/p:

1/15/2021
Process finished with exit code 0

我还建议不要为每个操作创建WebDriverWait对象。相反,你可以这样做:

wait = WebDriverWait(driver, 30)

现在到处使用等待,如下所示:

wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#ms-glossary > div > table > tbody > tr:nth-child(1) > td:nth-child(1) > div")))

通过这种方式,您可以从优化代码。您将减少空间复杂性。

最新更新