修复了它
对于一个教育项目,我正在尝试将抓取的数据存储在MS SQL数据库中。首先,我希望每个独特的项目都放在products_tb
中。插入唯一产品后,SQL 必须为所述项目生成唯一 ID,productgroupid
。products_tb
表只会产生永远不会改变的产品信息,例如productid, category, name and description
。在第二个表中,我将在工作后创建该表,我将存储以下数据:productgroupid, price, timestamp
。这样做的原因是这些可能会时不时地改变。有了productgroupid
我可以随时对所有数据进行分组并创建图表等。
问题是我无法让我的pipelines.py
工作。但是,我确实设法使用注释的代码块将数据插入我的SQL数据库:
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
它似乎正在使用以下代码
pipelines.py
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXISTS (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, '{item['category']}', '{item['name']}', '{item['description']}')
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item
items.py
import scrapy
class KrcItem(scrapy.Item):
productid=scrapy.Field()
name=scrapy.Field()
description=scrapy.Field()
price=scrapy.Field()
producttype=scrapy.Field()
timestamp=scrapy.Field()
category=scrapy.Field()
pass
编辑:
我错过了另一个小错误。"如果不存在"需要更改为"如果不存在S"。
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXISTS (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, {item['category']}, {item['name']}, {item['description']})
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item
源语言:
定义sql_statement时,未正确调用项字典中的值。试试这个:
import pymssql
class KrcPipeline(object):
def __init__(self):
self.conn = pymssql.connect(host='DESKTOP-P1TF28R', user='sa', password='123', database='kaercher')
self.cursor = self.conn.cursor()
def process_item(self, item, spider):
# self.cursor.execute("INSERT INTO products_tb(productid, category, name, description, price, timestamp) VALUES (%s, %s, %s, %s, %s, %s)",
# (item['productid'], item['category'], item['name'], item['description'], item['price'], item['timestamp']))
sql_statement = f'''
BEGIN
IF NOT EXIST (SELECT * FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']})
BEGIN
INSERT INTO [kaercher].[dbo].[products_tb] (productid, category, name, description)
OUTPUT (Inserted.productgroupid)
VALUES ({item['productid']}, {item['category']}, {item['name']}, {item['description']})
END
ELSE
BEGIN
SELECT productgroupid FROM [kaercher].[dbo].[products_tb]
WHERE productid = {item['productid']}
END
END
'''
self.cursor.execute(sql_statement)
self.conn.commit()
return item