如何将webdriver_manager的路径修改为云功能环境下的自定义路径



我正在尝试在云功能上创建一个无头的web scraper。我已经使用Selenium来自动化Webdriver管理器提供的驱动程序。

你能告诉我如何更改wdm吗?根据虚拟环境设置cachePath ?下面是我的代码和我得到的错误。

import os
import logging
# selenium 4
os.environ['GH_TOKEN'] = "gkjkjhjkhjhkjhuihjhgjhg"
os.environ['WDM_LOG'] = str(logging.NOTSET)
os.environ['WDM_LOCAL'] = '1'
os.environ['WDM_SSL_VERIFY'] = '0'
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
def hello_world(request):
"""Responds to any HTTP request.
Args:
request (flask.Request): HTTP request object.
Returns:
The response text or any set of values that can be turned into a
Response object using
`make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
"""
# instance of Options class allows
# us to configure Headless Chrome
options = Options()
print("options")
options.headless = True
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager("2.26", cache_valid_range=1, path = r".\temp\Drivers").install()
), options=options)
print("driver was initiated")


# this parameter tells Chrome that
# it should be run without UI (Headless)


# initializing webdriver for Chrome with our options
# driver = webdriver.Chrome(options=options)

# driver = webdriver.Chrome(ChromeDriverManager(path = r"/temp/data").install())      

request_json = request.get_json()

if request_json and 'url' in request_json:
url = request_json['url']
driver.get('https://www.geeksforgeeks.org')
print(driver.title)
driver.close()
return f'Success!'
else:
return f'Not run'

错误日志-

Traceback (most recent call last): File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 2525, in wsgi_app response = self.full_dispatch_request() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1822, in full_dispatch_request rv = self.handle_user_exception(e) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1820, in full_dispatch_request rv = self.dispatch_request() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1796, in dispatch_request return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/functions_framework/__init__.py", line 98, in view_func return function(request._get_current_object()) File "/workspace/main.py", line 28, in hello_world driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager("2.26", cache_valid_range=1, path = r".\temp\Drivers").install() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/chrome.py", line 39, in install driver_path = self._get_driver_path(self.driver) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/manager.py", line 31, in _get_driver_path binary_path = self.driver_cache.save_file_to_cache(driver, file) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/driver_cache.py", line 45, in save_file_to_cache archive = save_file(file, path) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/utils.py", line 38, in save_file os.makedirs(directory, exist_ok=True) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok)

我认为错误是由于web驱动程序管理器试图将驱动程序保存到缓存是一些静态路径引起的,我已经使用

更改了路径设置
path = r".\temp\Drivers"

如何正确地做?

我想明白了…

import os
import logging
# selenium 4
os.environ['WDM_LOG'] = str(logging.NOTSET)
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import Chrome
def hello_world(request):
"""Responds to any HTTP request.
Args:
request (flask.Request): HTTP request object.
Returns:
The response text or any set of values that can be turned into a
Response object using
`make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
"""
# instance of Options class allows
# us to configure Headless Chrome

print("driver was initiated")


# this parameter tells Chrome that
# it should be run without UI (Headless)
opts = Options()
opts.add_experimental_option("detach", True)
opts.headless= True

# initializing webdriver for Chrome with our options
driver =  webdriver.Chrome(service= ChromeService(ChromeDriverManager(cache_valid_range=1).install() ), options = opts)

# chrome_driver_path = ChromeDriverManager().install()

request_json = request.get_json()

if request_json and 'url' in request_json:
# driver = webdriver.Chrome(service= chrome_driver_path, options = opts)
url = request_json['url']
driver.get(url)
driver.get(url)
# driver.find_element(By.XPATH,'//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[1]/div[1]/div[2]/div/div[1]/div[2]').click()
#to make sure content is fully loaded we can use time.sleep() after navigating to each page
import time
time.sleep(3)
#Find the total number of reviews
# total_number_of_reviews = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[9]').text.splitlines()[3]
# total_number_of_reviews = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[9]').text
# time.sleep(3)
# print(total_number_of_reviews)
# Find scroll layout
scrollable_div = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]')
# time.sleep(3)
#Scroll as many times as necessary to load all reviews
total_reviews = int(driver.find_element('xpath', '//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[2]/div/div[2]/div[2]').text.split(' ')[0].replace(',',''))
time.sleep(3)
print(total_reviews)
for i in range(0, min(total_reviews, 500) ):
driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight',scrollable_div)
time.sleep(1.5)

response = BeautifulSoup(driver.page_source, 'html.parser')
reviews = response.find_all('span', class_='wiI7pd')
restaurant__reviews = []
for review in reviews:
restaurant__reviews.append(review.text)
print(restaurant__reviews)
driver.close()
return f'Success!'
else:
driver.close()
return f'Not run'

相关内容

  • 没有找到相关文章

最新更新