如何使用python将泛型或任何json解析为csv



我有一些JSON文件需要解析成CSV。
以下是我的代码:

import json
from collections import defaultdict
def flatten_json1(y):
out = defaultdict(list)
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], a )
elif type(x) is list:
i = 0
for a in x:
flatten(a, name )
i += 1
else:
out[name].append(x)
flatten(y)
return out

import json
from pprint import pprint
with open('testjson.json') as f:
data = json.load(f)
pprint(flatten_json1(data))

这是我正在使用的一个示例JSON:

{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}

运行时,它为我提供以下输出:

defaultdict(<type 'list'>, {u'type': [u'None', u'Glazed', u'Sugar', 
u'Powdered Sugar', u'Chocolate with Sprinkles', u'Chocolate', u'Maple', 
u'Regular', u'Chocolate', u'Blueberry', u"Devil's Food", u'donut'], 
u'id': [u'5001', u'5002', u'5005', u'5007', u'5006', u'5003', u'5004', 
u'1001', u'1002', u'1003', u'1004', u'0001'], u'ppu': [0.55], u'name': 
[u'Cake']})

但是正如您所看到的,属性"ppu"one_answers"name"键的列表大小不等于其他2个键。

那么,我如何才能使它成为可能,如下所示:

defaultdict(<type 'list'>, {u'type': [u'None', u'Glazed', u'Sugar', 
u'Powdered Sugar', u'Chocolate with Sprinkles', u'Chocolate', u'Maple', 
u'Regular', u'Chocolate', u'Blueberry', u"Devil's Food", u'donut'], 
u'id': [u'5001', u'5002', u'5005', u'5007', u'5006', u'5003', u'5004', 
u'1001', u'1002', u'1003', u'1004', u'0001'], u'ppu': 
[0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55], u'name': [u'Cake’,u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake']})

每个长度为12。请帮忙。

提前感谢。

由于在构建平面dict结果时不知道最长数组的最大长度,因此可以在构建结果后循环遍历结果键,并用最后一个元素填充每个数组。

在第二个示例中,子节点的根中不存在新的键,这会导致行对齐。我为此添加了一个测试,以确定缺失的密钥是否包含在根中;如果是,请取消移动空白以保持对齐。请注意,如果您在所有项目上都有完全动态的密钥,则需要一个更健壮的解决方案,但这似乎为时过早。

def flatten_json(data):
result = defaultdict(list)
def flatten(x, name=""):
if type(x) is dict:
for k, v in x.items():
flatten(v, k)
elif type(x) is list:
for v in x:
flatten(v, name)
else:
result[name].append(x)
flatten(data)
max_length = max([len(v) for v in result.values()])
for v in result.values():
if max_length - len(v) == 1:
v.insert(0, "")
v.extend([v[-1]] * (max_length - len(v)))
return result

输出:

defaultdict(<class 'list'>,
{'id': ['0001', '1001', '1002', '1003', '1004', '5001', '5002',
'5005', '5007', '5006', '5003', '5004'],
'name': ['Cake', 'Cake', 'Cake', 'Cake', 'Cake', 'Cake', 'Cake',
'Cake', 'Cake', 'Cake', 'Cake', 'Cake'],
'ppu': [0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55,
0.55, 0.55],
'type': ['donut', 'Regular', 'Chocolate', 'Blueberry',
"Devil's Food", 'None', 'Glazed', 'Sugar',
'Powdered Sugar', 'Chocolate with Sprinkles', 'Chocolate',
'Maple']})

defaultdict(<class 'list'>,
{'active': [False, True, True, True, False, True],
'ages': ['123', '123', '123', '123', '123', '123'],
'availableDate': ['2018-24-11', '2018-24-12', '2018-24-13',
'2018-24-14', '2018-24-15', '2018-24-16'],
'build': ['Jack12', 'Jack12', 'Jack12', 'Jack12', 'Jack12',
'Jack12'],
'country': ['', 'IND1', 'IND2', 'IND3', 'IND4', 'IND5'],
'hierID': ['jack', 'jack', 'jack', 'jack', 'jack', 'jack'],
'locID': ['Jack123', 'Jack123', 'Jack123', 'Jack123', 'Jack123',
'Jack123'],
'org': ['', 'jack1', 'jack2', 'jac3', 'jack4', 'jack5']})

试试看!

最新更新