所以问题很简单。我有一个需要在无头模式下运行的代码。该程序在非无头模式下完美运行(当浏览器自动打开时由硒打开时(,但是当我禁用无头时,它甚至不会启动。
import requests
from bs4 import BeautifulSoup
import csv
from selenium import webdriver
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
chromepath = r"C:UsershpDesktopwebScrapechromedriverchromedriver.exe"
options = Options()
#options.add_argument("--headless")
options.add_argument('--disable-gpu')
options.add_argument('--log-level=3')
options.add_argument('--lang=en')
driver = webdriver.Chrome(executable_path=chromepath, chrome_options=options)
url = "https://eresearch.fidelity.com/eresearch/evaluate/fundamentals/ownership.jhtml?stockspage=ownership&symbols=AAPL"
driver.get(url)
print("driver got url")
if driver.current_url == "https://login.fidelity.com/ftgw/Fidelity/RtlCust/Login/Init/df.chf.ra/trial?AuthRedUrl=https://oltx.fidelity.com/ftgw/webxpress/AuthorizeMember&AuthOrigUrl=https://snapshot.fidelity.com/fidresearch/gotoBL/snapshot/landing.jhtml#/dividends?symbol=AAPL":
username = driver.find_element_by_id("userId")
password = driver.find_element_by_id("password")
username.send_keys("xxxx")
password.send_keys("xxxx")
login = driver.find_element_by_xpath('//*[@id="Login"]/ol/li[4]/button/b').click()
driver.get(url)
#print(driver.current_url)
button = element = WebDriverWait(driver, 60).until(
EC.element_to_be_clickable((By.XPATH, '//*[@id="tab1"]/a'))
)
button.click()
print("clicked")
#use webdriver wait for everything else
# table = wait.until(EC.presence_of_element_located(By.CSS_SELECTOR, 'div.datatable'))
try:
WebDriverWait(driver, 60).until(EC.presence_of_element_located((By.XPATH, '/html/body/table/tbody/tr/td[4]/div[5]')))
except:
pass # Handle the exception here
# thlist = []
# tdlist = []
# my_table_th = driver.find_elements_by_tag_name('th')
# for i in range(0,len(my_table_th)):
# if my_table_th[i].text == "":
# continue
# else:
# thlist.append(my_table_th[i].text)
# my_table_td = driver.find_elements_by_tag_name('td')
# for i in range(0,len(my_table_td)):
# if my_table_td[i].text == "":
# continue
# else:
# tdlist.append(my_table_td[i].text)
# thlist = thlist[8:]
# for i in range(0,len(thlist)):
# print(i,thlist[i])
# print("-----------------------------------------------------")
# for i in range(0,len(tdlist)):
# print(i,tdlist[i])
mylist = []
soup = BeautifulSoup(driver.page_source,"html.parser")
print("bs got the site")
requests.packages.urllib3.disable_warnings()
#table borderTop table-striped dividendHistory
divparent = soup.find_all('div', attrs={'class':'tabbed-box'})
#print (len(divparent))
"""
table 1 is class left side and chart-table
table 2 is class right side and institutional-table
"""
try:
my_table = divparent[0].find_all('div', attrs = {'class':'left-side'})
#print((my_table))
except:
print("no table div here!")
#return
#try:
extractTable = my_table[0].find_all('table', attrs = {'class':'chart-table'})
rows = extractTable[0].findChildren(['tr'])
for row in rows:
for data in row.findAll('th'):
if data.text == "":
continue
else:
print(data.text)
driver.close()
print("done ^_^")
因此,当无头选项被注释时,它可以完美运行,但是当它没有被注释时,程序甚至不会启动。我在控制台上唯一能看到的是:
driver = webdriver.Chrome(executable_path=chromepath, chrome_options=options)
DevTools listening on ws://127.0.0.1:62897/devtools/browser/df3dccb4-4b97-4b06-8ca3-545d64ca2807
它甚至永远不会继续第一个打印输出,即
print("driver got url")
有人可以帮忙吗?
解决方案:
定义驱动程序时,我将关键字chrome_options
更改为options
。下载了正确的浏览器驱动程序(与我的浏览器匹配(。
解体:
复制(第一位(代码并遇到类似的问题。进行了一些更改并使其开始运行(无头(。下面是将运行的代码的第一部分(已更改(:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import os, platform
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
if platform.system() == 'Windows':
chromepath = BASE_DIR + r'chromedriver.exe'
chromepath = chromepath.replace("\","/")
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--log-level=3')
options.add_argument('--lang=en')
print("define driver")
driver = webdriver.Chrome(executable_path=chromepath, options=options)
print("define url")
url1 = "https://eresearch.fidelity.com/eresearch/evaluate/fundamentals/ownership.jhtml?stockspage=ownership&symbols=AAPL"
print("define url2")
url2 = "https://youtube.com"
print("driver: start getting url2")
driver.get(url2)
print("driver got url2")
print("driver: start getting url1")
driver.get(url1)
print("driver got url1")
注意:我假设chromedriver可执行文件与您的.py文件位于同一目录中。
附加问题:
我还添加了一些打印语句来检查执行了哪一部分代码。这样做我发现驱动程序似乎没有获得您的 URL(等待 5 分钟,它仍在运行(。当使用不同的URL时,代码确实会运行,因此造成这种情况(不同(的原因可能在于您尝试访问的网站。
当非无头运行代码时,Web 驱动程序访问您的 url 没有问题。您可能希望为此添加其他帖子/问题。