Hie在那里!我无法从BS4中的标签获取href。这是我的代码:
import requests
from bs4 import BeautifulSoup
URL = 'https://auto.ria.com/newauto/marka-jeep/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'accept':'*/*'}
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all(class_='proposition_area')
cars=[]
for item in items:
cars.append({
'title': item.find('h3', class_='proposition_name').get_text(strip=True),
'link': item.find('a', class_='proposition_link').getAttribute("href")
})
print(cars)
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else:
print('error')
parse()
输出:
'link': item.find('a', class_='proposition_link').getAttribute("href")
错误:
AttributeError: 'NoneType' object has no attribute 'getAttribute'
您有两个问题:
-
您正在执行
items = soup.find_all(class_='proposition_area')
,然后在该类上循环,以使用搜索类proposition_area
for item in items: cars.append({ 'title': item.find('h3', class_='proposition_name').get_text(strip=True), 'link': item.find('a', class_='proposition_link').get("href") })
item.find('a', class_='proposition_link')
不在您循环使用的类proposition_area
的中,因此,改为:items = soup.find_all(class_='proposition')
这里有一个完整的工作示例: 输出:.getAttribute()
方法,而是使用.get()
import requests
from bs4 import BeautifulSoup
URL = 'https://auto.ria.com/newauto/marka-jeep/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'accept':'*/*'}
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all(class_='proposition')
cars=[]
for item in items:
cars.append({
'title': item.find('h3', class_='proposition_name').get_text(strip=True),
'link': item.find('a', class_='proposition_link').get("href")
})
print(cars)
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else:
print('error')
parse()
[{'title': 'Jeep Gladiator 2021', 'link': '/newauto/auto-jeep-gladiator-1862595.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1859603.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1863650.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1842428.html'}, {'title': 'Jeep Renegade 2021', 'link': '/newauto/auto-jeep-renegade-1838198.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1853604.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1838190.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1811781.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1857232.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1860925.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1836192.html'}, {'title': 'Jeep Renegade 2021', 'link': '/newauto/auto-jeep-renegade-1857781.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1838297.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1860927.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1860588.html'}, {'title': 'Jeep Gladiator 2021', 'link': '/newauto/auto-jeep-gladiator-1856629.html'}, {'title': 'Jeep Renegade 2021', 'link': '/newauto/auto-jeep-renegade-1857246.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1857805.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1829808.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1862123.html'}]