将包含多个嵌套列表的字典转换为pandas数据框架



我有一个像这样的字典:

my_dict = {
"end_time": "2022-10-21T20:00:00",
"time_samples": [
"2022-10-21T15:00:00+00:00",
"2022-10-21T16:00:00+00:00",
"2022-10-21T17:00:00+00:00",
"2022-10-21T18:00:00+00:00",
"2022-10-21T19:00:00+00:00",
],
"groupings": [
{
"grouping": "day_of_week",
"groupings": ["day_of_week"],
"grouping_value": "5",
"grouping_values": [5],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
17.533546325878596,
17.19327731092437,
13.44502617801047,
11.5010395010395,
9.649484536082475,
],
}
],
},
{
"grouping": "device_type",
"groupings": ["device_type"],
"grouping_value": "bicycle",
"grouping_values": ["bicycle"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
2.3736263736263736,
0.5454545454545454,
None,
None,
None,
],
}
],
},
{
"grouping": "device_type",
"groupings": ["device_type"],
"grouping_value": "moped",
"grouping_values": ["moped"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
19.160377358490564,
18.88888888888889,
14.799076212471132,
12.702640642939151,
10.669714285714285,
],
}
],
},
{
"grouping": "geo_fence",
"groupings": ["geo_fence"],
"grouping_value": "/geo_features/1448150377",
"grouping_values": ["/geo_features/1448150377"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
17.533546325878596,
17.19327731092437,
13.44502617801047,
11.5010395010395,
9.649484536082475,
],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "17",
"grouping_values": [17],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [17.533546325878596, None, None, None, None],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "18",
"grouping_values": [18],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [None, 17.19327731092437, None, None, None],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "19",
"grouping_values": [19],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [None, None, 13.44502617801047, None, None],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "20",
"grouping_values": [20],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [None, None, None, 11.5010395010395, None],
}
],
},
{
"grouping": "hour_of_day",
"groupings": ["hour_of_day"],
"grouping_value": "21",
"grouping_values": [21],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [None, None, None, None, 9.649484536082475],
}
],
},
{
"grouping": "no_grouping",
"groupings": ["no_grouping"],
"grouping_value": "all",
"grouping_values": ["all"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
17.533546325878596,
17.19327731092437,
13.44502617801047,
11.5010395010395,
9.649484536082475,
],
}
],
},
{
"grouping": "provider",
"groupings": ["provider"],
"grouping_value": "/providers/12",
"grouping_values": ["/providers/12"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
15.849056603773583,
16.383561643835616,
13.254545454545454,
10.914027149321267,
10.232142857142858,
],
}
],
},
{
"grouping": "provider",
"groupings": ["provider"],
"grouping_value": "/providers/180",
"grouping_values": ["/providers/180"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
2.3736263736263736,
0.5454545454545454,
None,
None,
None,
],
}
],
},
{
"grouping": "provider",
"groupings": ["provider"],
"grouping_value": "/providers/19",
"grouping_values": ["/providers/19"],
"metrics": [
{
"metric_type": "vehicle_rotation",
"units": {
"numerators": [{"unit": "metric_count"}],
"denominators": [
{"unit": "vehicles"},
{"unit": "seconds", "value": 86400.0},
],
},
"values": [
22.471698113207548,
21.464788732394368,
16.394366197183096,
14.545454545454547,
11.1288056206089,
],
}
],
},
],
}

我想将此字典转换为pandas数据框,其中time_samples键中的时间戳作为一列,values键中的值(由grouping_value键分组)作为其他列(并以grouping_value作为列名),如下所示:

time_samples               5                   bicycle              ...
2022-10-21T15:00:00+00:00  17.533546325878596  2.3736263736263736   ...
2022-10-21T16:00:00+00:00  17.19327731092437   0.5454545454545454   ...
2022-10-21T17:00:00+00:00  13.44502617801047   None            ...
2022-10-21T18:00:00+00:00  11.5010395010395    None            ...
2022-10-21T19:00:00+00:00  9.649484536082475   None            ...

我开始如下,但挣扎如何进行。什么好主意吗?

df = pd.json_normalize(response_dict)
df['groupings'].explode().apply(pd.Series)

这是使用Pandas concat的一种方法:

import pandas as pd

df = pd.concat(
[
pd.DataFrame({"time_samples": my_dict["time_samples"]}),
pd.DataFrame(
{
grouping["grouping_value"]: grouping["metrics"][0]["values"]
for grouping in my_dict["groupings"]
}
),
],
axis=1,
)

:

print(df)
# Output
time_samples          5   bicycle      moped  
0  2022-10-21T15:00:00+00:00  17.533546  2.373626  19.160377   
1  2022-10-21T16:00:00+00:00  17.193277  0.545455  18.888889   
2  2022-10-21T17:00:00+00:00  13.445026       NaN  14.799076   
3  2022-10-21T18:00:00+00:00  11.501040       NaN  12.702641   
4  2022-10-21T19:00:00+00:00   9.649485       NaN  10.669714   
/geo_features/1448150377         17         18         19        20  
0                 17.533546  17.533546        NaN        NaN       NaN   
1                 17.193277        NaN  17.193277        NaN       NaN   
2                 13.445026        NaN        NaN  13.445026       NaN   
3                 11.501040        NaN        NaN        NaN  11.50104   
4                  9.649485        NaN        NaN        NaN       NaN   
21        all  /providers/12  /providers/180  /providers/19  
0       NaN  17.533546      15.849057        2.373626      22.471698  
1       NaN  17.193277      16.383562        0.545455      21.464789  
2       NaN  13.445026      13.254545             NaN      16.394366  
3       NaN  11.501040      10.914027             NaN      14.545455  
4  9.649485   9.649485      10.232143             NaN      11.128806 

相关内容

  • 没有找到相关文章

最新更新