用于批量创建的 Django 序列化验证优化



我正在尝试编写一个 api 来处理大量数据行,使用压缩的 csv 记录上的bulk_create。 但是当调用serializer.is_valid()在调用serializer.save()之前验证数据时,由于device_id(sensorReading_device)的外键约束,验证需要很长时间。

我尝试过prefetch_related()&select_related()和嵌套序列化程序,性能相似或更糟,因为我怀疑模型序列化程序由于外键而提交n + 1 DB往返问题以进行验证。唯一有效的方法是在我的模型中删除外键并实现为 charfield,它变得非常快,但这意味着不会再有外键约束。删除外键是前进的方向还是我错过了什么?

这是我的代码,任何建议都非常感谢!

serializer.py

class SensorReadingListSerializer(serializers.ListSerializer):
    def create(self, validated_data):
        sensor_readings = [SensorReading(**item) for item in validated_data]
        return SensorReading.objects.bulk_create(sensor_readings)
class SensorReadingSerializer(serializers.ModelSerializer)
    device_qs = Device.objects.all()
    sensorReading_device = PrimaryKeyRelatedField(label='SensorReading device', many=True, queryset=device_qs)
    class Meta:
        model = ReadingsModel.SensorReading
        fields = ('id', 'device_timestamp', 'server_timestamp', 'payload', 'sensorReading_device')
        list_serializer_class = SensorReadingListSerializer

model.py

class Device(models.Model):
    device_id = models.CharField(primary_key=True, max_length=120)
    device_deviceType = models.ForeignKey(DeviceType, on_delete=models.CASCADE)
    device_softwareVersion = models.ForeignKey(SoftwareVersion, on_delete=models.CASCADE)

class SensorReading(models.Model):
    device_timestamp = models.DateTimeField(default=datetime.today)
    server_timestamp = models.DateTimeField(default=datetime.today)
    payload = JSONField()
    sensorReading_device = models.ForeignKey(Device, on_delete=models.CASCADE)
    # sensorReading_device = models.CharField(max_length=120)

view.py

class SensorReadingViewSet(viewsets.ModelViewSet):
    """
    API endpoint that allows SensorReading to be viewed or edited.
    """
    serializer_class = SensorReadingSerializer
    # queryset = SensorReading.objects.all().select_related('device')
    # queryset = SensorReading.objects.prefetch_related('sensorReading_device').all()
    queryset = SensorReading.objects.all()
    parser_classes = (MultiPartParser, FormParser,)

    @list_route(methods=['post'])
    def post_readings_zip(self, request):
        if not sys.warnoptions:
            warnings.simplefilter("ignore")
        # print(self.queryset.len())
        request_file = request.FILES['zipped_file']
        device_id = request.data['device_id']
        # server_timestamp = datetime.datetime.today
        if Device.objects.filter(device_id=device_id).exists():
            print('device exist')
            if zipfile.is_zipfile(request_file):
                print('file is zip')
                zf = zipfile.ZipFile(request_file)
                filelist = zf.namelist()
                data = []
                print("Processing data: " + str(datetime.datetime.now()))
                for csv_file_name in filelist:
                    with zf.open(csv_file_name) as reading_csv:
                        rc = reading_csv.read()
                        c = csv.StringIO(rc.decode())
                        for row in c:
                            row = row.rstrip()
                            data.append({'payload': {'data': row}, 'sensorReading_device': device_id})
                print("Done processing csvs: " + str(datetime.datetime.now()))
                # data = [{'payload': {'data': 'hello world payload 01'}, 'sensorReading_device': '001'},
                #         {'payload': {'data': 'hello world payload 02'}, 'sensorReading_device': '001'}]
                # qs = SensorReadingSerializer.setup_eager_loading(self.queryset)
                serializer = SensorReadingSerializer(data=data, many=True)
                # print(repr(serializer))
                print("Validating serializer: " + str(datetime.datetime.now()))
                if serializer.is_valid():
                    print("Saving: " + str(datetime.datetime.now()))
                    serializer.save()
                    print("Done: " + str(datetime.datetime.now()))
                    # return JsonResponse(serializer.data, status=201, safe=False)
                    return JsonResponse({'status':serializer.data}, status=201, safe=False)
                return JsonResponse(serializer.errors, status=400, safe=False)
            else:
                return JsonResponse(
                    {'status': status.HTTP_400_BAD_REQUEST, 'message': 'Bad Request: Uploaded file is not a zip'})
        else:
            return JsonResponse({'status': status.HTTP_400_BAD_REQUEST,'message': 'Device ID does not exist, please register'})

您是否尝试过添加index_db?快速阅读 Django Doc。您是否使用 django-toolbar 或类似的东西分析过您的查询?

最新更新