如何从网页中获取元素



我正在尝试从雅虎金融检索前5个股票元素(TTD、WST、TDG、ODFL、VMI(,并使用selenium将它们放在列表中。

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains 
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome()
driver.get('https://finance.yahoo.com/gainers')
change = driver.find_element_by_xpath('//span[text()="Change"]')
actions = ActionChains(driver)
#stockname = driver.find_element_by_id('')
for i in range(2):
WebDriverWait(driver, 3600).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="scr-  res-table"]/div[1]/table/thead/tr/th[4]'))).click()

link = driver.find_elements_by_class_name('Fw(600)')
print(link.text)

这是一个非常棘手的问题。如果你点击change链接,不确定点击多少次才能获得你想要的元素,因为这些信息并不总是在第一次点击时出现。

试试下面的代码,希望这能有所帮助。

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get('https://finance.yahoo.com/gainers')
#Cookie pop up to handle if not there then ignore
driver.maximize_window()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[text()='I agree']"))).click()
driver.execute_script("window.scrollTo(0, 250)")
element=WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,'//*[@id="scr-res-table"]/div[1]/table/thead/tr/th[4]/span[text()="Change"]')))
driver.execute_script("arguments[0].click();", element)
while(True):
try:
print("running...")
WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH, "//a[text()='TTD']")))
tablerows = WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[@id='fin-scr-res-table']//table[1]/tbody//tr")))
for row in tablerows:
if row.find_element_by_xpath("./td[1]").text in ['TTD', 'WST', 'TDG', 'ODFL', 'VMI']:
coldata = [td.text for td in row.find_elements_by_xpath(".//td") if td.text != '']
print(coldata)
break
except:
print("exception block")
driver.execute_script("arguments[0].click();", element)
continue 

这将像这样在控制台上打印。您可以删除未使用的打印选项。

running...
exception block
running...
exception block
running...
['WST', 'West Pharmaceutical Services, Inc.', '187.61', '+17.49', '+10.28%', '431,280', '509,179', '13.854B', '58.45']
['TTD', 'The Trade Desk, Inc.', '260.30', '+16.11', '+6.60%', '2.135M', '2.115M', '11.971B', '114.55']
['TDG', 'TransDigm Group Incorporated', '318.01', '+13.75', '+4.52%', '262,441', '847,228', '17.073B', '24.77']
['ODFL', 'Old Dominion Freight Line, Inc.', '138.97', '+10.72', '+8.36%', '765,680', '939,080', '16.607B', '27.21']
['VMI', 'Valmont Industries, Inc.', '115.11', '+9.64', '+9.14%', '143,588', '138,449', '2.478B', '16.30']

更新

只从表中获取前五条记录。

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get('https://finance.yahoo.com/gainers')
#Cookie pop up to handle if not there then ignore
driver.maximize_window()
WebDriverWait(driver,10).until(EC.element_to_be_clickable((By.XPATH,"//button[text()='I agree']"))).click()
tablerows = WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, "//div[@id='fin-scr-res-table']//table[1]/tbody//tr")))
for row in tablerows[:5]:
coldata = [td.text for td in row.find_elements_by_xpath(".//td") if td.text != '']
print(coldata)

输出

['RLLCF', 'Rolls-Royce Holdings plc', '0.0125', '+0.0050', '+66.67%', '38.796M', '3.637M', '272.509B', 'N/A']
['IMMU', 'Immunomedics, Inc.', '27.00', '+5.01', '+22.78%', '20.966M', '3.661M', '5.776B', 'N/A']
['PBI-PB', 'Pitney Bowes Inc. NT 43', '15.20', '+2.33', '+18.12%', '116,008', 'N/A', '2.833B', '11.53']
['KZMYY', 'KAZ Minerals PLC', '2.7500', '+0.3552', '+14.83%', '34,267', '44,605', '2.555B', '4.70']
['FWONK', 'Formula One Group', '29.39', '+3.59', '+13.91%', '3.22M', '2.083M', '6.742B', 'N/A']

相关内容

  • 没有找到相关文章

最新更新