使用python "cwjobs"网站进行网页抓取不起作用


from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import csv
# Create a new instance of the Firefox driver
driver = webdriver.Chrome()

# Navigate to the website
url = "https://www.cwjobs.co.uk/jobs/cloud-security-engineer?searchOrigin=Resultlist_top-search"
driver.get(url)
# Wait for the job listings to load
job_listings = WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, ".jobsearch-SerpJobCard")))

# Create a CSV file and write the header
with open('job_listings.csv', mode='w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Title', 'Company', 'Location', 'Salary', 'Summary'])
# Loop through the job listings and write the data to the CSV file
for job in job_listings:
title = job.find_element_by_css_selector('.title a').text
company = job.find_element_by_css_selector('.company').text
location = job.find_element_by_css_selector('.location').text
salary = job.find_element_by_css_selector('.salaryText').text if job.find_element_by_css_selector('.salaryText') else ''
summary = job.find_element_by_css_selector('.summary').text
writer.writerow([title, company, location, salary, summary])
# Close the browser
driver.quit()

得到:

Traceback(最近一次调用):文件"D:OneDriveDocumentsPythontest.py",第16行,在job_listings = WebDriverWait(driver, 10).until(ec . visbility_of_all_elements_located ((By.CSS_SELECTOR, ".jobsearch- serjobcard "))))文件"C:UserssalmaAppDataRoamingPythonPython37site-packagesseleniumwebdriversupportwait.py",第95行,直到抛出TimeoutException(message, screen, stacktrace)selenium.common.exceptions.TimeoutException:信息:

这是它创建的文件

输入图片描述

我建议通过编辑第16行WebDriverWait(driver, 20)将超时时间从10秒扩展到20秒。如果这还不够,我建议将超时扩展为一个工作值。