如何通过 <a> BS4 从标签 (html) 获取 href?



Hie在那里!我无法从BS4中的标签获取href。这是我的代码:

import requests
from bs4 import BeautifulSoup
URL = 'https://auto.ria.com/newauto/marka-jeep/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)                        AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'accept':'*/*'}
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r 
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all(class_='proposition_area')
cars=[]
for item in items:
cars.append({
'title': item.find('h3', class_='proposition_name').get_text(strip=True),
'link': item.find('a', class_='proposition_link').getAttribute("href")              
})
print(cars)
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else: 
print('error')  
parse()

输出:

'link': item.find('a', class_='proposition_link').getAttribute("href")

错误:

AttributeError: 'NoneType' object has no attribute 'getAttribute'

您有两个问题:

  1. 您正在执行items = soup.find_all(class_='proposition_area'),然后在该类上循环,以使用搜索类proposition_area

    for item in items:
    cars.append({
    'title': item.find('h3', class_='proposition_name').get_text(strip=True),
    'link': item.find('a', class_='proposition_link').get("href")
    })
    

    item.find('a', class_='proposition_link')不在您循环使用的类proposition_area的中,因此,改为:

    items = soup.find_all(class_='proposition')
    

  • 不要使用.getAttribute()方法,而是使用.get()
  • 这里有一个完整的工作示例:

    import requests
    from bs4 import BeautifulSoup
    URL = 'https://auto.ria.com/newauto/marka-jeep/'
    HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'accept':'*/*'}
    def get_html(url, params=None):
    r = requests.get(url, headers=HEADERS, params=params)
    return r
    def get_content(html):
    soup = BeautifulSoup(html, 'html.parser')
    items = soup.find_all(class_='proposition')
    cars=[]
    for item in items:
    cars.append({
    'title': item.find('h3', class_='proposition_name').get_text(strip=True),
    'link': item.find('a', class_='proposition_link').get("href")
    })
    print(cars)
    def parse():
    html = get_html(URL)
    if html.status_code == 200:
    get_content(html.text)
    else:
    print('error')
    parse()
    

    输出:

    [{'title': 'Jeep Gladiator 2021', 'link': '/newauto/auto-jeep-gladiator-1862595.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1859603.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1863650.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1842428.html'}, {'title': 'Jeep Renegade 2021', 'link': '/newauto/auto-jeep-renegade-1838198.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1853604.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1838190.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1811781.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1857232.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1860925.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1836192.html'}, {'title': 'Jeep Renegade 2021', 'link': '/newauto/auto-jeep-renegade-1857781.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1838297.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1860927.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1860588.html'}, {'title': 'Jeep Gladiator 2021', 'link': '/newauto/auto-jeep-gladiator-1856629.html'}, {'title': 'Jeep Renegade 2021', 'link': '/newauto/auto-jeep-renegade-1857246.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1857805.html'}, {'title': 'Jeep Grand Cherokee 2021', 'link': '/newauto/auto-jeep-grand-cherokee-1829808.html'}, {'title': 'Jeep Wrangler 2021', 'link': '/newauto/auto-jeep-wrangler-1862123.html'}]
    

    相关内容

    • 没有找到相关文章