尝试解析div,但我得到一个错误


import requests
from bs4 import BeautifulSoup
from texttable import Texttable
url = "https://realpython.github.io/fake-jobs/"
site = requests.get(url) #send a request to the site
table = Texttable() #create a table
table.set_chars(['-', '|', '+', '='])
table.header(['Titel','Company','Location'])
table.set_cols_dtype(['t','i','a'])
table.set_cols_align(["c", "c", "c"])
table.set_cols_valign(["m", "m", "m"])
table.set_cols_width([20,20,20])
table.set_deco(Texttable.BORDER|Texttable.HEADER |Texttable.HLINES| Texttable.VLINES)

with open('Shore.txt', 'w') as f: #create a file
pass
soup = BeautifulSoup(site.content, "html.parser")
results = soup.find(id="ResultsContainer")
job_elements = results.find_all("div", class_="card-content") #find all div with class "card-content"

for job_element in job_elements:
title_element = job_element.find("h2", class_="title") #get the different elements from divs with class "card-content"
company_element = job_element.find("h3", class_="company") #get the different elements from divs with class "card-content"
location_element = job_element.find("p", class_="location") #get the different elements from divs with class "card-content"
item_element = job_element.find("a", class_="card-footer-item") #get the link with divs from class "card-content"

item_site = requests.get(item_element["href"]) #send a request to the site from link
item_soup = BeautifulSoup(item_site.content, "html.parser")
results_site = item_soup.find(id="ResultsContainer") 
item_element_elements = results_site.find("div", class_="content")
item_element_element = item_element_elements.find("p", class_=False) #get the text without class

print(title_element.text.strip()) #get it all data received into the console
print(company_element.text.strip()) #get all data received into the console
print(location_element.text.strip()) #get all data received into the console
print(item_element_element.text.strip()) # get all data received into the console
table.add_row([title_element.text.strip(),company_element.text.strip(),location_element.text.strip()]) #add rows in corrects rows "add_rows"
with open('Shore.txt', 'w') as f: #enter all data received into a table file
f.write(table.draw())
f.write(str(len(job_elements)))
f.close
print(len(job_elements)) #get the number of elements with the class

错误:

line 38, in <module>
item_element_elements = results_site.find("div", class_="content")
AttributeError: 'NoneType' object has no attribute 'find'

尝试解析div:

item_element_elements = results_site.find("div", class_="content")
item_element_element = item_element_elements.find("p", class_=False)

但是我得到一个错误。Can't find the "find" attribute。我能够解析所有其他元素。我不知道如何解决这个问题。

尽量选择更具体的元素-这里的问题是您选择了第一个链接,而不是指向细节的链接:

item_element = job_element.select_one("a.card-footer-item[href*='fake-jobs/jobs']")

item_element = job_element.find_all("a", class_="card-footer-item")[-1]

您也可以对.find()执行此操作,但可能会检查css selectors

from bs4 import BeautifulSoup
import requests

url = "https://realpython.github.io/fake-jobs/"
soup = BeautifulSoup(requests.get(url).text)
for job_element in soup.select('#ResultsContainer .card-content')[:1]:
#...
item_element = job_element.select_one("a.card-footer-item[href*='fake-jobs/jobs']") #get the link with divs from class "card-content"
item_soup = BeautifulSoup(requests.get(item_element.get("href")).text)
item_element_element = item_soup.select_one("div.content p").text #get the text without class
print(item_element_element)

相关内容

最新更新