我有一个pandas数据框架,如下图所示。
<表类>
赛车
race_time_1
race_time_2
1 st_place
2 nd_place
…
tbody><<tr>乔shmo 0:24:12 南 1 0 乔shmo南 0:32:43 0 0 乔shmo南 0:30:21 0 1 萨利苏 南 0:29:54 1 0 表类>
使用说明:
#function for formating timedeltas
def f(x):
ts = x.total_seconds()
hours, remainder = divmod(ts, 3600)
minutes, seconds = divmod(remainder, 60)
return ('{:02d}:{:02d}:{:02d}').format(int(hours), int(minutes), int(seconds))
#convert Place columns to numeric
cols1 = df.filter(like='Place').columns
df[cols1] = df[cols1].apply(pd.to_numeric)
#convert time columns to timedeltas and then to unix time
cols = df.filter(like='time').columns
df[cols] = df[cols].fillna('0').apply(pd.to_timedelta).astype(np.int64)
#aggregate sum
df = df.groupby('racer', dropna=True).sum()
#convert timedeltas to times with formating
df[cols] = df[cols].apply(lambda x: pd.to_timedelta(x).map(f))
print (df)
race_time_1 race_time_2 1st_Place 2nd_Place
racer
joe shmo 00:24:12 01:03:04 1 1
sally sue 00:00:00 00:29:54 1 0