聚合多列Pandas

  • 本文关键字:Pandas python pandas csv
  • 更新时间 :
  • 英文 :


目前我的csv是这样的:

<表类> 标题 field1 field2 field3 field4 tbody><<tr>A1A115530A1A12940A1A13300A1{n/A}09586A2A212000A2{n/A}03950A3A31350A3{n/A}02929

使用说明:

def fun(df, cols_to_aggregate, cols_order):
df = df.groupby(['field1', 'field2'], as_index=False)
.agg(cols_to_aggregate)
df['title'] = 'A'
#aggregate field4 to new column
df['field4'] = df.groupby('field1')['field4'].transform('sum')
df = df[cols_order]
return df

def create_csv(df, month_date):
cols_to_aggregate = {'field3': 'sum', 'field4': 'sum'}
#aded value 'field4'
cols_order = ['title', 'field1', 'field2', 'field3','field4']
funCSV = fun(df, cols_to_aggregate, cols_order)
return funCSV
print (create_csv(df, '2015-01').loc[lambda x: x['field2'].ne('{n/a}')])
title field1 field2  field3  field4
0     A     A1    A11     553    9586
1     A     A1    A12      94    9586
2     A     A1    A13      30    9586
4     A     A2    A21     200    3950
6     A     A3    A31      35    2929

或者如果需要第一个非0值每个field1使用:

def fun(df, cols_to_aggregate, cols_order):
df = df.groupby(['field1', 'field2'], as_index=False)
.agg(cols_to_aggregate)
df['title'] = 'A'
df['field4'] = df.groupby('field1')['field4'].transform('first')
df = df[cols_order]
return df

def create_csv(df, month_date):
cols_to_aggregate = {'field3': 'sum', 'field4': 'first'}
cols_order = ['title', 'field1', 'field2', 'field3','field4']
funCSV = fun(df, cols_to_aggregate, cols_order)
return funCSV
print (create_csv(df.replace({'field4':{0:np.nan}}), '2015-01').loc[lambda x: x['field2'].ne('{n/a}')])
title field1 field2  field3  field4
0     A     A1    A11     553  9586.0
1     A     A1    A12      94  9586.0
2     A     A1    A13      30  9586.0
4     A     A2    A21     200  3950.0
6     A     A3    A31      35  2929.0

相关内容

  • 没有找到相关文章

最新更新