Python Selenium 无法执行循环



我正在编写一个代码,从网站中提取原始电影标题。我手动输入标题的测试版本似乎运行良好,但当我试图创建一个从列表中搜索标题的循环时,我遇到了一些问题。这是完整的代码:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
films_test = ["Downton Abbey",
"Squid Game"]
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
path = "C:Program Files (x86)chromedriver.exe"
driver = webdriver.Chrome(options=chrome_options,
service=Service(ChromeDriverManager().install()))
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
for film in films_test:
driver.get("https://www.filmweb.pl/")
driver.maximize_window()
try:
driver.find_element(By.ID, "didomi-notice-agree-button").click()
driver.find_element(By.XPATH, "/html/body/div[4]/header/div[2]/div[2]").click()
driver.implicitly_wait(5)
print("cookies closed")
except:
WebDriverWait(driver, 3)
print("no cookies popup")
pass
search = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[7]/div/div/div[1]/div[1]/div[1]/form/div[2]/input')))
search.send_keys(film)
driver.find_element(By.XPATH, "/html/body/div[7]/div/div/div[1]/div[1]/div[3]/div[1]/div[2]/a/div[1]/span").click()
WebDriverWait(driver, 5)
try:
close_ad = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '/html/body/div[7]/span/span')))
close_ad.click()
print("ad closed")
except:
WebDriverWait(driver, 3)
print("ad closed automatically or didnt appear")
pass
dictionary = {}
list_keys = []
list_values = []
if driver.find_element(By.CLASS_NAME, 'filmCoverSection__type').get_attribute("textContent") == "Serial":
print("title is a series")
for element in driver.find_elements(By.CLASS_NAME, 'filmInfo__header'):
key = element.get_attribute("textContent")
list_keys.append(key)
if "Sezon" in key:
list_keys.remove(key)
print("removed from key list")
else:
pass
for element in driver.find_elements(By.CLASS_NAME, 'filmInfo__info'):
value = element.get_attribute("textContent")
list_values.append(value)
if "Sezon" in value:
list_values.remove(value)
print("removed from value list")
else:
pass
else:
print("title not a series")
for element in driver.find_elements(By.CLASS_NAME, 'filmInfo__header'):
key = element.get_attribute("textContent")
list_keys.append(key)
for element in driver.find_elements(By.CLASS_NAME, 'filmInfo__info'):
value = element.get_attribute("textContent")
list_values.append(value)
dictionary = dict(zip(list_keys, list_values))
print(dictionary)
org_title = dictionary['tytuł oryg.']
print(org_title)

脚本提取列表中第一个元素的原始标题,但未能提取第二个元素的标题。这似乎是有问题的部分:

search = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[7]/div/div/div[1]/div[1]/div[1]/form/div[2]/input')))
search.send_keys(film)
driver.find_element(By.XPATH, "/html/body/div[7]/div/div/div[1]/div[1]/div[3]/div[1]/div[2]/a/div[1]/span").click()

当它返回以下消息时:

Traceback (most recent call last):
File "C:UserskubanPycharmProjectsbaza_danych_filmowychfilmweb_lista.py", line 36, in <module>
search = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, '/html/body/div[7]/div/div/div[1]/div[1]/div[1]/form/div[2]/input')))
File "C:UserskubanAppDataLocalProgramsPythonPython39libsite-packagesseleniumwebdriversupportwait.py", line 90, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message: 
Stacktrace:
Backtrace:
Ordinal0 [0x00D75FD3+2187219]
Ordinal0 [0x00D0E6D1+1763025]
Ordinal0 [0x00C23E78+802424]
Ordinal0 [0x00C51C10+990224]
Ordinal0 [0x00C51EAB+990891]
Ordinal0 [0x00C7EC92+1174674]
Ordinal0 [0x00C6CBD4+1100756]
Ordinal0 [0x00C7CFC2+1167298]
Ordinal0 [0x00C6C9A6+1100198]
Ordinal0 [0x00C46F80+946048]
Ordinal0 [0x00C47E76+949878]
GetHandleVerifier [0x010190C2+2721218]
GetHandleVerifier [0x0100AAF0+2662384]
GetHandleVerifier [0x00E0137A+526458]
GetHandleVerifier [0x00E00416+522518]
Ordinal0 [0x00D14EAB+1789611]
Ordinal0 [0x00D197A8+1808296]
Ordinal0 [0x00D19895+1808533]
Ordinal0 [0x00D226C1+1844929]
BaseThreadInitThunk [0x76D46739+25]
RtlGetFullPathName_UEx [0x77D48FEF+1215]
RtlGetFullPathName_UEx [0x77D48FBD+1165]

有时,即使在搜索列表上的第一个元素时,脚本也会随机停止精确处理代码的这一部分。有人知道怎么修吗?

以下代码有效(设置是针对linux的,但您可以根据自己的设置进行调整(。考虑到您正在使用的千米扩展,我不确定您自己的代码中的确切问题在哪里。您可以更改这些xpath,如下所示。我怀疑这个问题是由于页面中的html发生了更改,所以您需要重新初始化用于搜索的变量:

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time as t
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")
webdriver_service = Service("chromedriver/chromedriver") ## path to where you saved chromedriver binary
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)
url = 'https://www.filmweb.pl/'
films_test = ["Downton Abbey",
"Squid Game"]
browser.get(url)
try:
WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[@id='didomi-notice-agree-button']"))).click()
print('cookies accepted')
except Exception as e:
print('no cookies for you!') 
search_opener = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[@id='searchOpener']")))
search_opener.click()
t.sleep(2)
search_input = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@class='form__input form__input--empty']")))
for film in films_test:
search_input.send_keys(film)
first_search_result = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[@class='resultsSection']/div[@class='resultItem']")))
first_search_result.click()
print(f'clicked the first result for film {film}')
t.sleep(5)
search_opener = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//button[@id='searchOpener']")))
search_opener.click()
t.sleep(2)
search_input = WebDriverWait(browser, 20).until(EC.element_to_be_clickable((By.XPATH, "//input[@class='form__input form__input--empty']")))
print('all done')
browser.quit()

终端打印输出:

cookies accepted
clicked the first result for film Downton Abbey
clicked the first result for film Squid Game

相关内容

  • 没有找到相关文章

最新更新