Pandas JSON规范化并合并回表



我导入了JSON文件,并将其规范化到这一点。

Team    Goals
0   A   {'Time': '06:09', 'P': [{'Nr': 1}, {'Nr': 5}], 'Nr': 2, 'Scored': 'Game'}
0   A   {'Time': '11:07', 'P': [{'Nr': 2}, {'Nr': 3}, {'Nr': 1}], 'Nr': 7, 'Scored': 'Game'}
1   B   {'Time': '20:04', 'P': [{'Nr': 9}, {'Nr': 2}], 'Nr': 4, 'Scored': 'Game'}
1   B   {'Time': '35:38', 'P': [{'Nr': 3}, {'Nr': 8}, {'Nr': 4}], 'Nr': 4, 'Scored': 'Game'}
1   B   {'Time': '63:21', 'P': {'Nr': 4}, 'Nr': 9, 'Scored': 'Penalty'}

这就是我想要实现的:

Team    Goals.Time  Goals.P1    Goals.P2    Goals.P3    Goals.Nr    Goals.Scored
0   A       '06:09'         1           5           NA          2           'Game'
0   A       '11:07'         2           3           1           7           'Game'
1   B       '20:04'         9           2           NA          4           'Game'
1   B       '35:38'         3           8           4           4           'Game'
1   B       '63:21'         4           NA          NA          9           'Penalty'

我尝试过pandasjson_normalize,但它省略了Team列。

import pandas as pd
pd.json_normalize(df['Goals'])
Time                                      P  Nr Scored  P.Nr
0  06:09             [{'Nr': 1}, {'Nr': 5}]  2  'Game'   NaN
1  11:07  [{'Nr': 2}, {'Nr': 3}, {'Nr': 1}]  7  'Game'   NaN
2  20:04             [{'Nr': 9}, {'Nr': 2}]  4  'Game'   NaN
3  35:38  [{'Nr': 3}, {'Nr': 8}, {'Nr': 4}]  4  'Game'   NaN
4  63:21                                NaN  9  'Game'  34.0

有没有一种方法可以规范化并保留团队列之后不可能合并它们,因为索引不同,而且它们没有公共列

df['Team'] 
0       A
0       A
1       B
1       B
1       B
# save df as [dict, ... ,dict] format
data = df.to_dict(orient='record')
dfn = pd.json_normalize(data)
print(dfn)
#   Team Goals.Time                            Goals.P  Goals.Nr Goals.Scored  
# 0    A      06:09             [{'Nr': 1}, {'Nr': 5}]         2         Game   
# 1    A      11:07  [{'Nr': 2}, {'Nr': 3}, {'Nr': 1}]         7         Game   
# 2    B      20:04             [{'Nr': 9}, {'Nr': 2}]         4         Game   
# 3    B      35:38  [{'Nr': 3}, {'Nr': 8}, {'Nr': 4}]         4         Game   
# 4    B      63:21                                NaN         9      Penalty   
#    Goals.P.Nr  
# 0         NaN  
# 1         NaN  
# 2         NaN  
# 3         NaN  
# 4         4.0  
# handle Goals.P is null
cond = dfn['Goals.P'].isnull()
dfn.loc[cond, 'Goals.P'] = dfn.loc[cond, 'Goals.P.Nr'].map(lambda x: [{'Nr':x}])
print(dfn)
#  Team Goals.Time                            Goals.P  Goals.Nr Goals.Scored  
# 0    A      06:09             [{'Nr': 1}, {'Nr': 5}]         2         Game   
# 1    A      11:07  [{'Nr': 2}, {'Nr': 3}, {'Nr': 1}]         7         Game   
# 2    B      20:04             [{'Nr': 9}, {'Nr': 2}]         4         Game   
# 3    B      35:38  [{'Nr': 3}, {'Nr': 8}, {'Nr': 4}]         4         Game   
# 4    B      63:21                      [{'Nr': 4.0}]         9      Penalty   
#    Goals.P.Nr  
# 0         NaN  
# 1         NaN  
# 2         NaN  
# 3         NaN  
# 4         4.0

def expand_col(Ser, prefix=''):
"""pandas expand list column
"""
obj = Ser.explode().reset_index()
obj['cnt'] = 1
obj['tag'] = obj.groupby('index')['cnt'].cumsum()
df_obj = obj.set_index(['index', 'tag'])['Goals.P'].map(lambda x: x.get('Nr')).unstack()
df_obj.columns.name = None
df_obj.index.name = None
df_obj.columns = prefix + df_obj.columns.astype(str)
df_obj.index = Ser.index
return df_obj
df_obj = expand_col(dfn['Goals.P'], prefix='Goals.P')
print(df_obj)
#   Goals.P1  Goals.P2  Goals.P3
# 0       1.0       5.0       NaN
# 1       2.0       3.0       1.0
# 2       9.0       2.0       NaN
# 3       3.0       8.0       4.0
# 4       4.0       NaN       NaN
df_result = pd.concat([dfn, df_obj], axis=1)
print(df_result)
#   Team Goals.Time                            Goals.P  Goals.Nr Goals.Scored  
# 0    A      06:09             [{'Nr': 1}, {'Nr': 5}]         2         Game   
# 1    A      11:07  [{'Nr': 2}, {'Nr': 3}, {'Nr': 1}]         7         Game   
# 2    B      20:04             [{'Nr': 9}, {'Nr': 2}]         4         Game   
# 3    B      35:38  [{'Nr': 3}, {'Nr': 8}, {'Nr': 4}]         4         Game   
# 4    B      63:21                      [{'Nr': 4.0}]         9      Penalty   
#    Goals.P.Nr  Goals.P1  Goals.P2  Goals.P3  
# 0         NaN       1.0       5.0       NaN  
# 1         NaN       2.0       3.0       1.0  
# 2         NaN       9.0       2.0       NaN  
# 3         NaN       3.0       8.0       4.0  
# 4         4.0       4.0       NaN       NaN  

最新更新