BeautifulSoup属性错误



我正在尝试使用BeautifulSoup和请求来抓取谷歌购物。这是我的代码,非常简单:

from bs4 import BeautifulSoup
import requests
import lxml
import json
def gshop(q):
q = q.replace(' ', '+')

headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}

response = requests.get(f'https://www.google.com/search?q={q}&tbm=shop', headers=headers).text
soup = BeautifulSoup(response, 'lxml')
data = []
for container in soup.findAll('div', class_='sh-dgr__content'):
title = container.find('h4', class_='A2sOrd').text
price = container.find('span', class_='a8Pemb').text
supplier = container.find('div', class_='aULzUe IuHnof').text
buy = 'https://google.com'+(container.find('a', class_='eaGTj mQaFGe shntl')['href'])
rating = container.find('span', class_='Rsc7Yb').text
data.append({
"Title": title,
"Price": price,
"Rating": rating,
"Supplier": supplier,
"Link": buy
})
return json.dumps(data, indent = 2, ensure_ascii = False)
print(gshop('toys'))

这引发了一个错误:

Traceback (most recent call last):
File "c:/Users/Maanav/Desktop/ValRal/main.py", line 45, in <module>
print(gshop('toys'))
File "c:/Users/Maanav/Desktop/ValRal/main.py", line 34, in gshop
rating = container.find('span', class_='Rsc7Yb').text
AttributeError: 'NoneType' object has no attribute 'text'

请查看谷歌购物网址的来源,以更好地理解我的代码。出了什么问题?

由@simpleApp在注释中解决:

有时,谷歌购物清单上的产品可能没有评级,或者卖家可能没有添加供应商名称。这将阻止程序运行。为了防止这种情况发生,我们必须使用异常处理。

from bs4 import BeautifulSoup
import requests
import lxml
import json
def gshop(q):
q = q.replace(' ', '+')

headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}

response = requests.get(f'https://www.google.com/search?q={q}&tbm=shop', headers=headers).text
soup = BeautifulSoup(response, 'lxml')
data = []
for container in soup.findAll('div', class_='sh-dgr__content'):
try:
title = container.find('h4', class_='A2sOrd').text
except:
title = None
try:
price = container.find('span', class_='a8Pemb').text
except:
price = None
try:
supplier = container.find('div', class_='aULzUe IuHnof').text
except:
supplier = None
try:
buy = 'https://google.com'+(container.find('a', class_='eaGTj mQaFGe shntl')['href'])
except:
buy = None
try:
rating = container.find('span', class_='Rsc7Yb').text
except:
rating = None
data.append({
"Title": title,
"Price": price,
"Rating": rating,
"Supplier": supplier,
"Link": buy
})
return json.dumps(data, indent = 2, ensure_ascii = False)

相关内容

最新更新