import requests
from bs4 import BeautifulSoup
import csv
from itertools import zip_longest
job_title = []
company_name = []
location_name = []
job_skill = []
links = []
salary = []
result = requests.get("https://www.indeed.com/jobs?q=web+development&l=&from=searchOnHP")
source = result.content
soup = BeautifulSoup(source, "lxml")
job_titles = soup.find_all("a", {"class", "jcs-JobTitle"})
company_names = soup.find_all("span", {"class": "companyName"})
location_names = soup.find_all("div", {"class": "companyLocation"})
job_skills = soup.find_all("div", {"class": "job-snippet"})
for i in range(len(job_titles)):
job_title.append(job_titles[i].text.strip())
links.append("https://www.indeed.com" + job_titles[i].attrs["href"])
company_name.append(company_names[i].text.strip())
location_name.append(location_names[i].text.strip())
job_skill.append(job_skills[i].text.strip())
for link in links:
result = requests.get(link)
source = result.content
soup = BeautifulSoup(source, "lxml")
salaries = soup.find("span", {"class": "icl-u-xs-mr--xs attribute_snippet"})
salary.append(salaries.text)
my_file = [job_title, company_name, location_name, job_skill, salary]
exported = zip_longest(*my_file)
with open("/Users/Rich/Desktop/testing/indeed.csv", "w") as myfile:
writer = csv.writer(myfile)
writer.writerow(["Job titles", "Company names", "Location names", "Job skills", "salaries"])
writer.writerows(exported)
我正在抓取这个网站来获取职位名称,公司名称,地点名称,工作技能,以及通过抓取每一页的薪水,当我打印推荐时它工作:salary = soup.find("span", {"class"; "icl-u-xs-mr- xs attribute_snippet"})但当我试图只追加文本从它我得到这个错误:AttributeError: 'NoneType'对象没有属性'text'请帮我解决这个问题,我将不胜感激。
主要问题是并不总是有工资,所以你必须处理这个问题,例如if condition
,你也可以避免所有这些列表。
salary.text if salary else None
import requests
from bs4 import BeautifulSoup
result = requests.get("https://www.indeed.com/jobs?q=web+development&l=&from=searchOnHP")
source = result.content
soup = BeautifulSoup(source, "lxml")
data = []
for e in soup.select('ul.jobsearch-ResultsList .slider_item'):
salary = e.find("div",{"class": "salary-snippet-container"})
data.append({
'title': e.find("a", {"class", "jcs-JobTitle"}).get_text(strip=True),
'company': e.find("span", {"class": "companyName"}).get_text(strip=True),
'location': e.find("div", {"class": "companyLocation"}).get_text(strip=True),
'skills': e.find("div", {"class": "job-snippet"}).get_text(strip=True),
'salary': salary.text if salary else None
})
with open('indeed.csv', 'w', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = data[0].keys())
writer.writeheader()
writer.writerows(data)
import requests
from bs4 import BeautifulSoup
result = requests.get("https://www.indeed.com/jobs?q=web+development&l=&from=searchOnHP")
source = result.content
soup = BeautifulSoup(source, "lxml")
data = []
for e in soup.select('ul.jobsearch-ResultsList .slider_item'):
salary = e.find("div",{"class": "salary-snippet-container"})
data.append({
'title': e.find("a", {"class", "jcs-JobTitle"}).get_text(strip=True),
'company': e.find("span", {"class": "companyName"}).get_text(strip=True),
'location': e.find("div", {"class": "companyLocation"}).get_text(strip=True),
'skills': e.find("div", {"class": "job-snippet"}).get_text(strip=True),
'salary': salary.text if salary else None
})
with open('indeed.csv', 'w', encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = data[0].keys())
writer.writeheader()
writer.writerows(data)