[{"id" 1、"email":"email1"},{"id" 2,"email":"email2"}] 3600
[{"id" 2,"email":"email2"},{"id" 3,"email":"email3"}] 7200
我有这个数据框架:
explode
'参加者'转换为单独的行,然后用json_normalize
转换为列,用groupby.agg
聚合数据并转换to_dict
:
out = (df
.explode('attendees', ignore_index=True)
.pipe(lambda d: d.join(pd.json_normalize(d.pop('attendees'))))
.groupby('email')
.agg(**{'num_events_with': ('email', 'count'),
'duration_of_events': ('duration', 'sum')
})
.to_dict(orient='index')
)
输出:
{'email1': {'num_events_with': 1, 'duration_of_events': 3600},
'email2': {'num_events_with': 2, 'duration_of_events': 10800},
'email3': {'num_events_with': 1, 'duration_of_events': 7200}}
示例
col = ['event', 'attendees', 'duration']
data = [['meeting', [{"id":1, "email": "email1"}, {"id":2, "email": "email2"}], 3600], ['lunch', [{"id":2, "email": "email2"}, {"id":3, "email": "email3"}],7200]]
df = pd.DataFrame(data, columns=col)
代码df1 = df.explode('attendees')
grouper = df1['attendees'].str['email']
col1 = ['num_events_with', 'duration_of_events']
out = (df1.groupby(grouper)['duration'].agg(['count', sum]).T.set_axis(col1).to_dict())
out
:
{'email1': {'num_events_with': 1, 'duration_of_events': 3600},
'email2': {'num_events_with': 2, 'duration_of_events': 10800},
'email3': {'num_events_with': 1, 'duration_of_events': 7200}}
如果需要一行,请使用
(df.explode('attendees').assign(attendees=lambda x:x['attendees'].str['email'])
.groupby('attendees')['duration'].agg(['count',sum])
.T.set_axis(['num_events_with', 'duration_of_events']).to_dict())