如何从csv转换到嵌套json字典(初学者)



My RESULTS:

[
{
"FIRST NAME": "JOHN",
"PRY SCHOOL": "OLIVETTE",
"HIGH SCHOOL": "BAPTIST",
"VEHICLEMAKE ": "TOYOTA",
"VEHICLE COL": "BLACK",
"TV MAKE": "SAMSUNG"
},
{
"FIRST NAME": "KOFI",
"PRY SCHOOL": "ACADAMY", 
"HIGH SCHOOL": "MAYFLOWER",
"VEHICLEMAKE ": "HONDA",
"VEHICLE COL": "YELLOW",
"TV MAKE": "TECHWOOD"
},
{
"FIRST NAME": "BISI",
"PRY SCHOOL": "IGBOBI",
"HIGH SCHOOL": "ANGUS",
"VEHICLEMAKE ": "HYUNDAI",
"VEHICLE COL": "BLUE",
"TV MAKE": "THERMOC"
}
]

预期结果:

[
{
"FIRST NAME": "JOHN",
"SCHOOL": {
"primary": "OLIVETTE",
"HIGH SCHOOL": "BAPTIST"
},
"VEHICLE": {
"MAKE": "TOYOTA",
"COL": "BLACK"
},
"TV MAKE": "SAMSUNG"
},
{
"FIRST NAME": "KOFI",
"SCHOOL": {
"primary": "ACADAMY",
"HIGH SCHOOL": "MAYFLOWER"
},
"VEHICLE": {
"MAKE": "HONDA",
"COL": "YELLOW"
},
"TV MAKE": "TECHWOOD"
},
{
"FIRST NAME": "BISI",
"SCHOOL": {
"primary": "IGBOBI",
"HIGH SCHOOL": "ANGUS"
},
"VEHICLE": {
"MAKE": "HYUNDAI",
"COL": "BLUE"
},
"TV MAKE": "THERMO"
}
]
我的代码:

import csv
import json
filenames = 'csvfilepath.csv'
my_dic = []
with open(filenames, encoding='utf-8') as csv_file:
csv_reader = csv.DictReader(csv_file)
for row in csv_reader:
my_dic.append(row)
with open('jasonfilepath.json', 'w', encoding='utf-8') as file_object:
json.dump(my_dic, file_object ,indent = 4)

我的数据:

firstname  PRY SCHOOL HIGH SCHOOL VEHICLEMAKE VEHICLECOL  TVMAKE
JOHN       OLIVETTE   BAPTIST     TOYOTA      BLACK       SAMSUNG
KOFI       ACADAMY    MAYFLOWER    HONDA      YELLOW      TECHWOOD
BISI       IGBOBI     ANGUS        HYUNDAI    BLUE         THERMO

注意:行数大于1000行

我想确保学校(包括小学和中学)和车辆(品牌和颜色)的嵌套结构

DictReader无法读取嵌套数据,需要手动构建具有所需结构的字典。对于这种情况,我将使用简单的csv.reader

代码:

import csv
import json
with open(r"csvfilepath.csv", newline="") as inp_f, 
open(r"jsonfilepath.json", "w") as out_f:
reader = csv.reader(inp_f, delimiter="t")
next(reader)  # skip header
my_dic = []
for row in reader:
if len(row) >= 6:  # skip rows which missing columns
my_dic.append({
"FIRST NAME": row[0],
"SCHOOL": {
"primary": row[1],
"HIGH SCHOOL": row[2]
},
"VEHICLE": {
"MAKE": row[3],
"COL": row[4]
},
"TV MAKE": row[5]
})
if my_dic:  # if my_dic is not empty
json.dump(my_dic, out_f, indent=4)

像下面这样(你确定'TV MAKE'的位置吗?)

import json
data = [ { "FIRST NAME": "JOHN", "PRY SCHOOL": "OLIVETTE", "HIGH SCHOOL": "BAPTIST", "VEHICLEMAKE ": "TOYOTA", "VEHICLE COL": "BLACK", "TV MAKE": "SAMSUNG" }, { "FIRST NAME": "KOFI", "PRY SCHOOL": "ACADAMY", "HIGH SCHOOL": "MAYFLOWER", "VEHICLEMAKE ": "HONDA", "VEHICLE COL": "YELLOW", "TV MAKE": "TECHWOOD" }, { "FIRST NAME": "BISI", "PRY SCHOOL": "IGBOBI", "HIGH SCHOOL": "ANGUS", "VEHICLEMAKE ": "HYUNDAI", "VEHICLE COL": "BLUE", "TV MAKE": "THERMOC" } ]

new = [{'SCHOOL':{'PRIMARY':d['PRY SCHOOL'],'HIGH SCHOOL':d['HIGH SCHOOL']},'VEHICLE':{'MAKE':d['VEHICLEMAKE '],'COL':d['VEHICLE COL']},'FIRST NAME':d['FIRST NAME'],'TV MAKE':d['TV MAKE']} for d in data]
print(json.dumps(new,indent=4))

输出
[
{
"SCHOOL": {
"PRIMARY": "OLIVETTE",
"HIGH SCHOOL": "BAPTIST"
},
"VEHICLE": {
"MAKE": "TOYOTA",
"COL": "BLACK"
},
"FIRST NAME": "JOHN",
"TV MAKE": "SAMSUNG"
},
{
"SCHOOL": {
"PRIMARY": "ACADAMY",
"HIGH SCHOOL": "MAYFLOWER"
},
"VEHICLE": {
"MAKE": "HONDA",
"COL": "YELLOW"
},
"FIRST NAME": "KOFI",
"TV MAKE": "TECHWOOD"
},
{
"SCHOOL": {
"PRIMARY": "IGBOBI",
"HIGH SCHOOL": "ANGUS"
},
"VEHICLE": {
"MAKE": "HYUNDAI",
"COL": "BLUE"
},
"FIRST NAME": "BISI",
"TV MAKE": "THERMOC"
}
]

假设输入文件是制表符分隔的,您可以在读取输入文件时通过稍微修改您的逻辑来构造嵌套:

with open(filenames, encoding='utf-8') as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter="t")
for row in csv_reader:
my_dic.append({"FIRST NAME": row['firstname'],
"SCHOOL" : { "PRIMARY": row['PRY SCHOOL'],
"HIGH SCHOOL" : row['HIGH SCHOOL']},
"VEHICLE" : { "MAKE": row['VEHICLEMAKE'],
"COL": row['VEHICLECOL']},
"TV MAKE" : row["TVMAKE"]})

最新更新