import pymysql
import time
import csv,os
from multiprocessing import Pool
start = time.time()
db = pymysql.connect(host="localhost",user="root",passwd="root",db="dummydb")
mycursor = db.cursor()
在这里,我获取csv
文件的输入:
csv_file = raw_input("Please enter absolute path for the input File (Just file name, to be precise), this script can process csv files onlyn")
save_path = './'
input_csv = csv.reader(open(csv_file,'rU'), delimiter=",")
headers = input_csv.next()
for row in input_csv:
validity_start = "2018-03-03 00:00:00"
firstName = row[0]
SMC = row[3]
lastName = ""
countrycode = "IN"
validity_end = row[6]
mobile = row[2]
state = row[4]
city = row[5]
mycursor.execute("INSERT INTO dummydb.thirdparty_subscriber_o(first_name,last_name,country_code,validity_start_date,validity_end_date,mobile) VALUES ('"+str(firstName)+"','"+str(lastName)+"','"+str(countrycode)+"','"+str(validity_start)+"','"+str(validity_end)+"','"+str(mobile)+"')")
db.commit()
print "Inserted"
end = time.time()
print end-start
我想实现多处理池模块以比平时更快地读取和插入文件。
假设您的 csv 包含 N 条记录。读取 csv 并将记录填充拆分为 M 个数据块,每个数据块的大小为 S。
现在您有 M 个列表,每个列表的大小为 S。创建一个进程池并将列表传递给池。"func"是将 INSERT 执行 INSERT 到 DB 中的函数