如何将CSV转换为深度嵌套的JSON



我正在使用的Python代码:

import csv
import json
class SetEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, set):
return list(obj)
return json.JSONEncoder.default(self, obj)
def str_to_bool(s):
if s == "TRUE":
return True
elif s == "FALSE":
return False
else:
return None
file = "sample"
csvfile = open(f'csv/{file}.csv', encoding='utf-8-sig')
next(csvfile, None) 
jsonfile = open(f'output/{file}.json', 'w')
fieldnames = ("key", "name", "loc_type",
"loc_id", "cities_name", "shippingMethods_name",
"isExcluded", "cutoffWindows_startTime", "cutoffWindows_endTime",
"cutoffWindows_capacity", "cutoffWindows_slots", "category",
"furniture", "removeFallbacks", "cutoffWindows")
reader = csv.DictReader(csvfile, fieldnames)
for row in reader:
row['furniture'] = str_to_bool(row.pop('furniture'))
cities_name = row.pop('cities_name')
row['cities'] = [{'name': cities_name}]
for smethod in row['cities']:
shippingMethods_name = row.pop('shippingMethods_name')
isExcluded = row.pop('isExcluded')
removeFallbacks = row.pop('removeFallbacks')
smethod['shippingMethods'] = [{'name': shippingMethods_name, 'isExcluded': str_to_bool(isExcluded), 'removeFallbacks': str_to_bool(removeFallbacks)}]
for cwindows in smethod['shippingMethods']:
cutoffWindows = row.pop('cutoffWindows')
startTime = row.pop('cutoffWindows_startTime')
endTime = row.pop('cutoffWindows_endTime')
capacity = row.pop('cutoffWindows_capacity')
cwindows['cutoffWindows'] = [{'startTime': startTime, 'endTime': endTime, 'capacity': capacity}]
for s in cwindows['cutoffWindows']:
slots = row.pop('cutoffWindows_slots')
s['slots'] = [{slots}]
json.dump(row, jsonfile, indent=4, cls=SetEncoder)
jsonfile.write(',')

示例文件:

key,name,loc_type,loc_id,cities_name,shippingMethods_name,isExcluded,cutoffWindows_startTime,cutoffWindows_endTime,cutoffWindows_capacity,cutoffWindows_slots,category,furniture,removeFallbacks,Mode,Country,sm_omscode,slots_omscode
store-fashion,UAE - Store (Fashion),S,8502,dubai,next-day-delivery,FALSE,0:01,23:59,20,9pm12am,fashion,,,Normal,BloomingDales AE,NEXTDAY,SLOT21-24
store-fashion,UAE - Store (Fashion),S,8502,dubai,1-3-days-delivery,FALSE,0:01,23:59,,,fashion,,TRUE,Normal,BloomingDales AE,1TO3DAYS,
store-fashion,UAE - Store (Fashion),S,8502,non-dubai,next-day-delivery,FALSE,0:01,21:00,40,12am12am,fashion,,,Normal,BloomingDales AE,NEXTDAY,NEXTDAY
store-fashion,UAE - Store (Fashion),S,8502,non-dubai,1-3-days-delivery,TRUE,0:01,23:59,,,fashion,TRUE,TRUE,Normal,BloomingDales AE,1TO3DAYS,

电流输出:

{
"key": "store-fashion",
"name": "UAE - Store (Fashion)",
"loc_type": "S",
"loc_id": "8502",
"category": "fashion",
"null": [
"BloomingDales AE",
"NEXTDAY",
"SLOT21-24"
],
"furniture": null,
"cities": [
{
"name": "dubai",
"shippingMethods": [
{
"name": "next-day-delivery",
"isExcluded": false,
"removeFallbacks": null,
"cutoffWindows": [
{
"startTime": "0:01",
"endTime": "23:59",
"capacity": "20",
"slots": [
[
"9pm12am"
]
]
}
]
}
]
}
]
},{
"key": "store-fashion",
"name": "UAE - Store (Fashion)",
"loc_type": "S",
"loc_id": "8502",
"category": "fashion",
"null": [
"BloomingDales AE",
"1TO3DAYS",
""
],
"furniture": null,
"cities": [
{
"name": "dubai",
"shippingMethods": [
{
"name": "1-3-days-delivery",
"isExcluded": false,
"removeFallbacks": true,
"cutoffWindows": [
{
"startTime": "0:01",
"endTime": "23:59",
"capacity": "",
"slots": [
[
""
]
]
}
]
}
]
}
]
},{
"key": "store-fashion",
"name": "UAE - Store (Fashion)",
"loc_type": "S",
"loc_id": "8502",
"category": "fashion",
"null": [
"BloomingDales AE",
"NEXTDAY",
"NEXTDAY"
],
"furniture": null,
"cities": [
{
"name": "non-dubai",
"shippingMethods": [
{
"name": "next-day-delivery",
"isExcluded": false,
"removeFallbacks": null,
"cutoffWindows": [
{
"startTime": "0:01",
"endTime": "21:00",
"capacity": "40",
"slots": [
[
"12am12am"
]
]
}
]
}
]
}
]
},{
"key": "store-fashion",
"name": "UAE - Store (Fashion)",
"loc_type": "S",
"loc_id": "8502",
"category": "fashion",
"null": [
"BloomingDales AE",
"1TO3DAYS",
""
],
"furniture": true,
"cities": [
{
"name": "non-dubai",
"shippingMethods": [
{
"name": "1-3-days-delivery",
"isExcluded": true,
"removeFallbacks": true,
"cutoffWindows": [
{
"startTime": "0:01",
"endTime": "23:59",
"capacity": "",
"slots": [
[
""
]
]
}
]
}
]
}
]
}

预期结果:

[{
"key": "store-fashion",
"name": "UAE - Store (Fashion)",
"loc_type": "S",
"loc_id": "8502",
"cities": [{
"name": "dubai",
"shippingMethods": [{
"name": "same-day-delivery",
"isExcluded": false,
"cutoffWindows": [{
"startTime": "00:01",
"endTime": "10:00",
"capacity": 20,
"slots": [
"1pm6pm"
]
},
{
"startTime": "00:01",
"endTime": "16:00",
"capacity": 20,
"slots": [
"6pm9pm"
]
},
{
"startTime": "00:01",
"endTime": "16:00",
"capacity": 15,
"slots": [
"9pm12am"
]
}
]
},
{
"name": "next-day-delivery",
"isExcluded": false,
"cutoffWindows": [{
"startTime": "00:01",
"endTime": "21:00",
"capacity": 20,
"slots": [
"9am1pm"
]
},
{
"startTime": "00:01",
"endTime": "23:59",
"capacity": 20,
"slots": [
"1pm5pm"
]
},
{
"startTime": "00:01",
"endTime": "23:59",
"capacity": 20,
"slots": [
"6pm9pm"
]
},
{
"startTime": "00:01",
"endTime": "23:59",
"capacity": 20,
"slots": [
"9pm12am"
]
}
]
},
{
"name": "1-3-days-delivery",
"isExcluded": false,
"removeFallbacks": true
}
]
},
{
"name": "non-dubai",
"shippingMethods": [{
"name": "next-day-delivery",
"isExcluded": false,
"cutoffWindows": [{
"startTime": "00:01",
"endTime": "21:00",
"capacity": 40,
"slots": [
"12am12am"
]
}]
},
{
"name": "1-3-days-delivery",
"isExcluded": false,
"removeFallbacks": true
}
]
}
]
}]

问题:

  1. 我希望创建一个带有嵌套数据的对象,但即使键相同,数据也会根据csv数据的行数重复
  2. 我已经在字段名中定义了字段,但不属于字段名的数据仍在json文件中
  3. csv数据中的空字段仍以json形式出现
  4. 数据应该在数组[]内,并且它应该以"NOT"结尾"如果有多个键

非常感谢您。

我认为这是一个太大的问题,无法指望任何人完全解决,但我想帮助您开始。我认为这个问题是认识到,从平面2D数据中,我们需要创建从左到右的层次结构:一个密钥列表,每个密钥都有一个城市列表,每个城市都有一份运输方式列表,等等

我不能一下子把这些都记在脑子里,所以我要把这个问题分解成很多步骤,实际上从最后开始,从最后的Python dict向后工作到CSV文件。

在将最后一个Python dict传递给json.dump((之前,它需要看起来像下面的精简示例:

final = [
{
"key": "store-fashion",
"loc_id": "8502",
"loc_type": "S",
"cities": [
{
"name": "dubai",
"shippingMethods": [
{"name": "1-3-days-delivery", "isExcluded": False},
{"name": "next-day-delivery", "isExcluded": False},
],
},
{
"name": "non-dubai",
"shippingMethods": [
{"name": "1-3-days-delivery", "isExcluded": False},
{"name": "next-day-delivery", "isExcluded": False},
],
},
],
}
]

(您的数据在运输方法下有更多的层次结构——截止窗口,然后是插槽——但我将停留在运输方法上。(

我们如何到达最终结构?我看到创建了以下三个数据结构,然后将它们拼接在一起:

  • 钥匙字典,带有对城市的命名引用
  • 城市格言,对运输方法的命名参考
  • 运输方法的格言,与
keys = {
"store-fashion": {
"loc_id": "8502",
"loc_type": "S",
"_cities": set(["dubai", "non-dubai"]),
}
}
cities = {
"dubai": {
"_shippingMethods": set(["next-day-delivery", "1-3-days-delivery"]),
},
"non-dubai": {
"_shippingMethods": set(["next-day-delivery", "1-3-days-delivery"]),
},
}
ship_methods = {
"next-day-delivery": {"isExcluded": False},
"1-3-days-delivery": {"isExcluded": False},
}

keys有指向各个城市的cities引用,cities有指向各个装运方法的_shippingMethods引用。

为了将它们缝合在一起,我从顶部开始使用键,创建一个新的_key对象,并立即迭代其城市,并对每个城市执行同样的操作。对于每个嵌套层次结构,此过程都将继续。每个new_object只有在它下面的所有内容都被命名后才被附加到对象列表中(从下到下创建层次结构(:

final_test = []
for key_name in keys:
key_props = keys[key_name]
new_key = {"key": key_name}
new_key.update(key_props)
del new_key["_cities"]
new_key["cities"] = []
for city_name in sorted(key_props["_cities"]):
city_props = cities[city_name]
new_city = {"name": city_name}
new_city["shippingMethods"] = []
for shipmeth_name in sorted(city_props["_shipMethods"]):
shipmeth_props = ship_methods[shipmeth_name]
new_shipmeth = {"name": shipmeth_name}
new_shipmeth.update(shipmeth_props)
new_city["shippingMethods"].append(new_shipmeth)
new_key["cities"].append(new_city)
final_test.append(new_key)
assert final_test == final

这很管用!

转到CSV文件,并将这些数据行转换为键、城市和ship_methods。同样,我认为这个问题太大了,无法一次性完成所有工作。我会将整个CSV文件读入一个变量一次,然后在该变量的行上循环,以创建所需的分组。

我将使用csv。DictReader((,就像你以前一样,但我会按原样使用标题名称:

with open("input.csv", newline="") as f:
reader = csv.DictReader(f)
csv_data = list(reader)
print(csv_data)
[
{'key': 'store-fashion', 'name': 'UAE - Store (Fashion)', 'loc_type': 'S', 'loc_id': '8502', 'cities_name': 'dubai',     ...
{'key': 'store-fashion', 'name': 'UAE - Store (Fashion)', 'loc_type': 'S', 'loc_id': '8502', 'cities_name': 'dubai',     ...
{'key': 'store-fashion', 'name': 'UAE - Store (Fashion)', 'loc_type': 'S', 'loc_id': '8502', 'cities_name': 'non-dubai', ...
{'key': 'store-fashion', 'name': 'UAE - Store (Fashion)', 'loc_type': 'S', 'loc_id': '8502', 'cities_name': 'non-dubai', ...
]

对于每个分组/层次结构,我需要迭代行,只关注组成该分组的列。

对于密钥,我只使用{key: row[key] for key in ["loc_id", "loc_type"]}:复制我想要的列

keys_test = {}
for row in csv_data:
Key = row["key"]
if Key not in keys_test:
keys_test[Key] = {key: row[key] for key in ["loc_id", "loc_type"]}
keys_test[Key]["_cities"] = set()
keys_test[Key]["_cities"].add(row["cities_name"])
assert keys_test == keys

除了名称之外,城市没有任何数据可以复制:

cities_test = {}
for row in csv_data:
City = row["cities_name"]
if City not in cities_test:
cities_test[City] = {"_shippingMethods": set()}
cities_test[City]["_shippingMethods"].add(row["shippingMethods_name"])
assert cities_test == cities

对于布尔值转换,我直接在分类中进行:

ship_methods_test = {}
for row in csv_data:
Shipmeth = row["shippingMethods_name"]
if Shipmeth not in ship_methods_test:
ship_methods_test[Shipmeth] = {}
ship_methods_test[Shipmeth]["isExcluded"] = row["isExcluded"] == "TRUE"
# add set for cut-off windows
# add cut-off window to set
assert ship_methods_test == ship_methods

这是一个草图,展示了所有东西放在一起的样子:

# read CSV
# iterate CSV and group keys
# iterate CSV and group cities
# iterate CSV and group ship methods
# iterate CSV and group cut-off windows
# iterate CSV and group slots
# stitch together
# for key in keys
# for city in cities
# for ship_method in ship_methods
# for cutoff_window in cutoff_windows
# for slot in slots
# append slot
# append cutoff_window
# append ship_method
# append city
# append key

相关内容

  • 没有找到相关文章

最新更新