Bs4在尝试获取下一个url时失败

这是我的代码

def parser():
flag = True
url = 'https://quotes.toscrape.com'
while flag:
responce = requests.get(url)
soup = BeautifulSoup(responce.text, 'html.parser')
quote_l = soup.find_all('span', {'class': 'text'})
q_count = 0
for i in range(len(quote_l)):
if q_count >= 5:
flag = False
break
quote = soup.find_all('span', {'class': 'text'})[i]
if not Quote.objects.filter(quote=quote.string).exists():
author = soup.find_all('small', {'class': 'author'})[i]
if not Author.objects.filter(name=author.string).exists():
a = Author.objects.create(name=author.string)
Quote.objects.create(quote=quote.string, author_id=a.id)
q_count += 1
else:
a = Author.objects.get(name=author.string)
Quote.objects.create(quote=quote.string, author_id=a.id)
q_count += 1

url += soup.find('li', {'class': 'next'}).a['href']

我需要得到下一页，但我有这个执行。"NoneType"对象没有属性"a">

如何解决这个问题，也许我可以优化我的代码。Thx

到达最后一页时，将没有Next按钮，因此您需要在尝试访问下一页的href之前检查退出条件。一种可能是在当前的最后一行之前添加以下行:

next_page = soup.find('li', {'class': 'next'})
if not next_page: flag = False  # or return

或者简写为return。

您还将更新最后一行以使用变量，当然，并确保您没有不断地用下一页的后缀扩展url。例如，可以在请求调用期间添加后缀:

def parser():
flag = True
url = 'https://quotes.toscrape.com'
suffix = ''
while flag:
responce = requests.get(url + suffix)
soup = BeautifulSoup(responce.text, 'html.parser')
# other code


next_page = soup.find('li', {'class': 'next'})
if not next_page: 
return
suffix = next_page.a['href']

相关内容

最新更新

热门标签：