我正在进行一个Django项目,该项目收集一些数据并将其存储在SQLite数据库中:-
models.py文件
from django.db import models
from django.utils.text import slugify
class News(models.Model):
title = models.CharField(max_length=120)
datess = models.CharField(max_length=120)
linkss = models.CharField(max_length=120)
slug = models.SlugField(blank=True, null=True)
def save(self, *args, **kwargs):
if not self.slug and self.title:
self.slug = slugify(self.title)
super(News, self).save(*args, **kwargs)
class Meta:
verbose_name_plural = "news"
def __str__(self):
return f'{self.title}'
def get_absolute_url(self):
return f"/news/{self.slug}"
这是views.py文件
from django.shortcuts import render
from .models import News
from django.core.paginator import Paginator
from django.db.models import Q
# For scraping part
import requests
from bs4 import BeautifulSoup
def news_list(request, *args, **kwargs):
# fOR scraping part - START::::::::::::::::::::::::::::::::::::::::::::::::::::::::
response = requests.get("http://www.iitg.ac.in/home/eventsall/events")
soup = BeautifulSoup(response.content,"html.parser")
cards = soup.find_all("div", attrs={"class": "newsarea"})
iitg_title = []
iitg_date = []
iitg_link = []
for card in cards[0:6]:
iitg_date.append(card.find("div", attrs={"class": "ndate"}).text)
iitg_title.append(card.find("div", attrs={"class": "ntitle"}).text.strip())
iitg_link.append(card.find("div", attrs={"class": "ntitle"}).a['href'])
# fOR scraping part - END::::::::::::::::::::::::::::::::::::::::::::::::::::::::
# fOR storing the scraped data directly into the dtatbase from the views.py file - START---------------------------------------------------------------
for i in range(len(iitg_title)):
News.objects.create(title = iitg_title[i], datess = iitg_date[i], linkss = iitg_link[i])
# fOR storing the scraped data directly into the dtatbase from the views.py file - END-----------------------------------------------------------------
queryset = News.objects.all() #Getting all the objects from the database
search_query = request.GET.get('q')
if search_query:
queryset = queryset.filter(
Q(title__icontains = search_query) |
Q(description__icontains = search_query)
)
paginator = Paginator(queryset, 5) #Adding pagination
page_number = request.GET.get('page')
queryset = paginator.get_page(page_number)
context = {
'object_list': queryset
}
return render(request, 'news_list.html', context)
每次刷新网页时,都会创建相同的对象,一次又一次,我尝试了一些方法来检查数据是否已经存在于数据库中,但我无法确定具体的condition
。
如果用于创建对象的数据在每次迭代中都相同,则应使用此函数get_or_create
,而不是此处所述的create
。它将用CCD_ 4上传递的参数搜索数据库。如果不存在,django将为您创建一个新对象。在您的示例中,代码应该如下所示:
for i in range(len(iitg_title)):
News.objects.get_or_create(title = iitg_title[i], datess = iitg_date[i], linkss = iitg_link[i])