如何在postgresql中加载csv时更改列数据类型



我有一个大脚本,结果是数据存储在数据框中,然后存储在csv中。然后将csv文件打开并写入PostgreSQL。但有一个问题,一列的数据类型是int4,并打开csv后的列格式是"文本"。我不能改变数据库中的数据类型,它们必须完全是整型。请告诉我怎么做。

total.to_csv("C:/Users/.../total_type19.csv", index = False, sep =';')
conn5 = psycopg2.connect(dbname='', user='',
password='', host='', port = '')
cursor5 = conn5.cursor()

with open("C:/Users/.../total_type19.csv", "r",encoding = 'utf-8') as file:
reader = csv.reader(file, delimiter = ";")
for row in reader:
# print(row)
cursor5.execute(
'INSERT INTO interaction_fillword (test_id,data_size,data_matrix, data_words_selection, data_colors, data_answers) VALUES(%s,%s, %s, %s, %s, %s)',
(row[0], row[1], row[2], row[3], row[4], row[5]))
conn5.commit()

test_id列必须为int4格式

['312229', "['[{from:[3,4],to:[7,4],color:2},{from:[3,6],to:[10,6],color:3},{from:[5,8],to:[9,8],color:5},{from:[5,11],to:[10,11],color:6},{from:[1,0],to:[1,11],color:0},{from:[10,1],to:[10,6],color:4},{from:[3,0],to:[8,0],color:1}],']", '['["v","b","c","c","a","h","i","e","r","s","f","j"],["d","i","w","s","s","r","i","f","y","y","f","c"],["j","b","m","w","d","q","s","q","t","w","e","m"],["x","l","m","m","l","s","o","x","d","q","u","t"],["l","i","f","p","l","a","c","e","t","u","t","o"],["m","o","s","b","r","t","c","y","z","v","r","r"],["j","t","x","c","a","r","t","a","b","l","e","o"],["b","h","k","m","d","b","r","y","q","u","i","y"],["y","è","s","r","h","g","o","m","m","e","w","h"],["u","q","p","c","s","c","x","b","k","e","d","o"],["u","u","o","l","q","v","y","y","b","y","e","h"],["r","e","o","u","j","b","u","r","e","a","u","k"]],']', '['"#ff0000","#00fe00","#0000ff","#d2ea9a","#407f76","#211f95","#e1f233"']', '['"place","cartable","gomme","bureau","bibliothèque","feutre","cahier"']']

这是csv中的一行示例。看起来很糟糕,但这是应该的方式

你能把你的数据改成int还是像"m22"非整数吗?

# to remove non-numeric digits from string
with open("C:/Users/.../total_type19.csv", "r",encoding = 'utf-8') as file:
reader = csv.reader(file, delimiter = ";")
header = next(reader )
print(f"HEADER {header}")
counter = 1 #or whatever number you want to start with
for row in reader:
print(row)
test_id =row[0]
test_id = ''.join([i for i in test_id if i.isdigit()])
if test_id == '':
counter +=1
test_id = counter
else:
test_id = int(test_id)
print(test_id)
cursor5.execute(
'INSERT INTO interaction_fillword (test_id,data_size,data_matrix, data_words_selection, data_colors, data_answers) VALUES(%s,%s, %s, %s, %s, %s)',
(test_id, row[1], row[2], row[3], row[4], row[5]))

使用copy_expert from ' psycopg2.

import psycopg2
conn5 = psycopg2.connect(dbname='', user='',
password='', host='', port = '')
cursor5 = conn5.cursor()
with open("C:/Users/.../total_type19.csv", "r") as csv_file:
cursor5.copy_expert("COPY interaction_fillword FROM STDIN WITH CSV HEADER", csv_file)

CSV HEADER将做几件事:

  1. 自动跳过标题行
  2. 将空的非引号字符串作为NULL。

copy_expert使用Postgres COPY进行批量数据导入(或导出)比插入快得多。缺点是COPY要么全部要么没有,要么整个导入/导出成功,要么一个错误将回滚整个事情。

最新更新