Selenium错误名称未定义PYTHON


import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
ser = Service("C:\Users\czoca\PycharmProjects\pythonProject4\chromedriver.exe")
op = webdriver.ChromeOptions()
s = webdriver.Chrome(service=ser, options=op)
s.get('https://autosalpa.fi/fi/')
EMAIL_REGEX = r'''(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[x01-x08x0bx0cx0e-x1fx21x23-x5bx5d-x7f]|\[x01-x09x0bx0cx0e-x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])).){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[x01-x08x0bx0cx0e-x1fx21-x5ax53-x7f]|\[x01-x09x0bx0cx0e-x7f])+)])'''
list_of_emails = []
for re_match in re.finditer(EMAIL_REGEX, page_source):
list_of_emails.append(re_match.group())
for i, email in enumerate(list_of_emails):
print(f'{i+1}:{email}')

这是我的代码和错误:

File "C:UsersczocaPycharmProjectsimagesscrapingmain.py", line 14, in <module>
for re_match in re.finditer(EMAIL_REGEX, page_source):
^^^^^^^^^^^

NameError: name 'page_source' is not defined

我不知道这里出了什么问题。

下面的代码可以工作。在Selenium 4中,您不需要下载web驱动程序,您只需要导入您使用的浏览器的web驱动程序。这里解释了在python中使用Selenium 4的必要步骤

import re
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
driver.get('https://autosalpa.fi/fi/')
EMAIL_REGEX = r'''(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[x01-x08x0bx0cx0e-x1fx21x23-x5bx5d-x7f]|\[x01-x09x0bx0cx0e-x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])).){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[x01-x08x0bx0cx0e-x1fx21-x5ax53-x7f]|\[x01-x09x0bx0cx0e-x7f])+)])'''
page_source=driver.page_source
list_of_emails = []
for re_match in re.finditer(EMAIL_REGEX, page_source):
list_of_emails.append(re_match.group())
for i, email in enumerate(list_of_emails):
print(f'{i+1}:{email}')


最新更新