这是我的代码
def parser():
flag = True
url = 'https://quotes.toscrape.com'
while flag:
responce = requests.get(url)
soup = BeautifulSoup(responce.text, 'html.parser')
quote_l = soup.find_all('span', {'class': 'text'})
q_count = 0
for i in range(len(quote_l)):
if q_count >= 5:
flag = False
break
quote = soup.find_all('span', {'class': 'text'})[i]
if not Quote.objects.filter(quote=quote.string).exists():
author = soup.find_all('small', {'class': 'author'})[i]
if not Author.objects.filter(name=author.string).exists():
a = Author.objects.create(name=author.string)
Quote.objects.create(quote=quote.string, author_id=a.id)
q_count += 1
else:
a = Author.objects.get(name=author.string)
Quote.objects.create(quote=quote.string, author_id=a.id)
q_count += 1
url += soup.find('li', {'class': 'next'}).a['href']
我需要得到下一页,但我有这个执行。"NoneType"对象没有属性"a">
如何解决这个问题,也许我可以优化我的代码。Thx
到达最后一页时,将没有Next按钮,因此您需要在尝试访问下一页的href之前检查退出条件。一种可能是在当前的最后一行之前添加以下行:
next_page = soup.find('li', {'class': 'next'})
if not next_page: flag = False # or return
或者简写为return
。
您还将更新最后一行以使用变量,当然,并确保您没有不断地用下一页的后缀扩展url。例如,可以在请求调用期间添加后缀:
def parser():
flag = True
url = 'https://quotes.toscrape.com'
suffix = ''
while flag:
responce = requests.get(url + suffix)
soup = BeautifulSoup(responce.text, 'html.parser')
# other code
next_page = soup.find('li', {'class': 'next'})
if not next_page:
return
suffix = next_page.a['href']