在Python中提取嵌套的json/list



我在Python中有以下json/list结构:

{
u'week': 45,
u'value': 
{
u'team': u'accounts', 
u'KPI': 4, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 1000, 
u'subGroups': [
{
u'team': u'HR', 
u'KPI': 4, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 2000, 
u'subGroups': [
{
u'team': u'Marketing', 
u'KPI': 4, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 3000, 
u'subGroups': []
}
]
}
]
}
},
{
u'week': 44, 
u'value': {
u'team': u'accounts', 
u'KPI': 4, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 4000, 
u'subGroups': [
{
u'team': u'HR', 
u'KPI': 4, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 5000, 
u'subGroups': [
{
u'team': u'Marketing', 
u'KPI': 4, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 6000, 
u'subGroups': []
}
]
}
]
}
},
{
u'week': 34, 
u'value': {
u'team': u'accounts', 
u'KPI': 29, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 20000, 
u'subGroups': [
{
u'team': u'HR', 
u'KPI': 29, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 20000, 
u'subGroups': [
{
u'team': u'Marketing', 
u'KPI': 29, 
u'Mgr': 1, 
u'change': 0, 
u'risk': 20000, 
u'subGroups': []
}
]
}
]
}
}
]

我需要提取一些值来创建以下

[
{
'team':'accounts', 
risk : [
1000,
4000,
20000
]
},
{
'team': 'HR', 
'risks'[
2000,
5000,
2000
]
},
{
'team' : 'Marketing', 
risk : [
3000,
6000,
2000
]
}
]

在实践中,可以有任何数量的周和任何数量的亚组。此外,由于Docker容器的限制,我只需要使用标准的Python 2库。

我一直在努力让它发挥作用,所以任何帮助都将不胜感激,谢谢。

您可以使用一个函数来展开嵌套的json,然后重新构建它。在这里,我把它扔到了一张桌子上,然后你可以随心所欲地切片和骰子:

import pandas as pd
import re

data = [{u'week': 45, u'value': {u'team': u'accounts', u'KPI': 4, u'Mgr': 1, u'change': 0, u'risk': 1000, u'subGroups': [{u'team': u'HR', u'KPI': 4, u'Mgr': 1, u'change': 0, u'risk': 2000, u'subGroups': [{u'team': u'Marketing', u'KPI': 4, u'Mgr': 1, u'change': 0, u'risk': 3000, u'subGroups': []}]}]}},
{u'week': 44, u'value': {u'team': u'accounts', u'KPI': 4, u'Mgr': 1, u'change': 0, u'risk': 4000, u'subGroups': [{u'team': u'HR', u'KPI': 4, u'Mgr': 1, u'change': 0, u'risk': 5000, u'subGroups': [{u'team': u'Marketing', u'KPI': 4, u'Mgr': 1, u'change': 0, u'risk': 6000, u'subGroups': []}]}]}},
{u'week': 34, u'value': {u'team': u'accounts', u'KPI': 29, u'Mgr': 1, u'change': 0, u'risk': 20000, u'subGroups': [{u'team': u'HR', u'KPI': 29, u'Mgr': 1, u'change': 0, u'risk': 20000, u'subGroups': [{u'team': u'Marketing', u'KPI': 29, u'Mgr': 1, u'change': 0, u'risk': 20000, u'subGroups': []}]}]}}]

def flatten_json(y):
out = {}
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], name + a + '_')
elif type(x) is list:
i = 0
for a in x:
flatten(a, name + str(i) + '_')
i += 1
else:
out[name[:-1]] = x
flatten(y)
return out

flat = flatten_json(data)
columns_list = list(flat.keys())
rows = {}
for item in columns_list:
row_idx = re.findall(r'(d+)_', item )[0]
column = re.findall(r'd+_(.*)', item )[0]
row_idx = int(row_idx)
value = flat[item]
if row_idx in rows:
rows[row_idx][column] = value
else:
rows[row_idx] = {}
rows[row_idx][column] = value
results = pd.DataFrame()       
for idx, row in rows.items():
results = results.append(pd.DataFrame(row, index=[idx]), sort=True)

输出:

print (results.to_string())
value_KPI  value_Mgr  value_change  value_risk  value_subGroups_0_KPI  value_subGroups_0_Mgr  value_subGroups_0_change  value_subGroups_0_risk  value_subGroups_0_subGroups_0_KPI  value_subGroups_0_subGroups_0_Mgr  value_subGroups_0_subGroups_0_change  value_subGroups_0_subGroups_0_risk value_subGroups_0_subGroups_0_team value_subGroups_0_team value_team  week
0          4          1             0        1000                      4                      1                         0                    2000                                  4                                  1                                     0                                3000                          Marketing                     HR   accounts    45
1          4          1             0        4000                      4                      1                         0                    5000                                  4                                  1                                     0                                6000                          Marketing                     HR   accounts    44
2         29          1             0       20000                     29                      1                         0                   20000                                 29                                  1                                     0                               20000                          Marketing                     HR   accounts    34

最新更新