Flatten/Denormalize Dict/Json in Python



嗨,我正在尝试将 JSON 非规范化/扁平化为字典,在下面我有一种扁平化 JSON 的通用方法,下面的函数不知何故没有照顾嵌套的 JSON。目标是将嵌套的字典/json展平为CSV兼容对象。

我使用了以下代码:

def flatten(s):
for i in s:
if not isinstance(s[i], dict):
yield (i, s[i])
else:
for b in flatten(s[i]):
yield b

new_data = dict(list(flatten(mydict)))
print(new_data)

使用此输入字典

mydict = "{
'G_TRANSACTIONS': {
'INVOICE_NUMBER': '31002',
'TRANSACTION_CLASS': 'Invoice',
'LIST_G_LINES': {
'G_LINES': [
{
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Line',
'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '600',
'LIST_G_LINES_ITEM_NUMBER': {
'G_LINES_ITEM_NUMBER': {
'ITEM_NUMBER': '136864001'
}
}
},
{
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Other Line',
'LN_DESCRIPTION': 'Johar Town Lahore',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '999',
'LIST_G_LINES_ITEM_NUMBER': {
'G_LINES_ITEM_NUMBER': {
'ITEM_NUMBER': '99999999'
}
}
}
]
},
'TR_LN_AMOUNT': '600'
}
}"

我得到的输出是:

{
'INVOICE_NUMBER': '31002',
'TRANSACTION_CLASS': 'Invoice',
'G_LINES': [
{
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Line',
'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '600',
'LIST_G_LINES_ITEM_NUMBER': {
'G_LINES_ITEM_NUMBER': {
'ITEM_NUMBER': '136864001'
}
}
},
{
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Other Line',
'LN_DESCRIPTION': 'Johar Town Lahore',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '999',
'LIST_G_LINES_ITEM_NUMBER': {
'G_LINES_ITEM_NUMBER': {
'ITEM_NUMBER': '99999999'
}
}
}
],
'TR_LN_AMOUNT': '600'
}

期望的输出

{
'INVOICE_NUMBER': '31002',
'TRANSACTION_CLASS': 'Invoice',
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Line',
'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '600',
'ITEM_NUMBER': '136864001',
'TR_LN_AMOUNT': '600'
}
,
{
'INVOICE_NUMBER': '31002',
'TRANSACTION_CLASS': 'Invoice',
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Other Line',
'LN_DESCRIPTION': 'Johar Town Lahore',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '999',
'ITEM_NUMBER': '99999999',
'TR_LN_AMOUNT': '600'
}

您可以尝试此脚本,它将扁平化mydict以更正形式:

mydict = {
'G_TRANSACTIONS': {
'INVOICE_NUMBER': '31002',
'TRANSACTION_CLASS': 'Invoice',
'LIST_G_LINES': {
'G_LINES': [
{
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Line',
'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '600',
'LIST_G_LINES_ITEM_NUMBER': {
'G_LINES_ITEM_NUMBER': {
'ITEM_NUMBER': '136864001'
}
}
},
{
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Other Line',
'LN_DESCRIPTION': 'Johar Town Lahore',
'LN_UNIT_OF_MEASURE': 'EA',
'LN_QUANTITY': '3',
'LN_NET_SELLING_PRICE': '200',
'LN_NET_EXTENDED_AMOUNT': '999',
'LIST_G_LINES_ITEM_NUMBER': {
'G_LINES_ITEM_NUMBER': {
'ITEM_NUMBER': '99999999'
}
}
}
]
},
'TR_LN_AMOUNT': '600'
}
}

def flatten(d, depth=0):
rv = [({}, depth)]
if isinstance(d, dict):
for k, v in d.items():
if not isinstance(v, dict) and not isinstance(v, list):
for i in rv:
i[0][k] = v
else:
for (vv, _depth) in flatten(v,depth+1):
rv.append((rv[-1][0].copy(), _depth))
for kkk, vvv in vv.items():
rv[-1][0][kkk] = vvv
elif isinstance(d, list):
for v in d:
rv.append((rv[-1][0].copy(), depth+1))
for (vv, _) in flatten(v,depth+1):
for kkk, vvv in vv.items():
rv[-1][0][kkk] = vvv
for i, _depth in rv:
yield i, _depth
from itertools import groupby
out = []
for v, g in groupby(sorted(flatten(mydict), key=lambda k: -k[1]), lambda k: k[1]):
out.extend(i[0] for i in g)
break
from pprint import pprint
for d in out:
pprint(d)
print('-' * 80)

指纹:

{'INVOICE_NUMBER': '31002',
'ITEM_NUMBER': '136864001',
'LN_DESCRIPTION': 'PKG980N-MAIN STREET 16C POCKET PLUGS',
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Line',
'LN_NET_EXTENDED_AMOUNT': '600',
'LN_NET_SELLING_PRICE': '200',
'LN_QUANTITY': '3',
'LN_UNIT_OF_MEASURE': 'EA',
'TRANSACTION_CLASS': 'Invoice',
'TR_LN_AMOUNT': '600'}
--------------------------------------------------------------------------------
{'INVOICE_NUMBER': '31002',
'ITEM_NUMBER': '99999999',
'LN_DESCRIPTION': 'Johar Town Lahore',
'LN_LINE_NUMBER': '1',
'LN_LINE_TYPE': 'Other Line',
'LN_NET_EXTENDED_AMOUNT': '999',
'LN_NET_SELLING_PRICE': '200',
'LN_QUANTITY': '3',
'LN_UNIT_OF_MEASURE': 'EA',
'TRANSACTION_CLASS': 'Invoice',
'TR_LN_AMOUNT': '600'}
--------------------------------------------------------------------------------

最新更新