10:00:00 6 10:00:00 10:00:00 10:00:00 4 10:00:01 6 10:00:01 2 10:00:01 9 10:00:04 4 10:00:04 5 10:00:04 1
我有一个csv文件,看起来像这样(大约400000行(它每秒钟从晚上10点到19点,它们被复制了很多
时间IIUC,
import pandas as pd
df = pd.DataFrame([
['10:00:00', 6],
['10:00:00', 5],
['10:00:00', 2],
['10:00:00', 4],
['10:00:01', 6],
['10:00:01', 2],
['10:00:01', 9],
['10:00:04', 4],
['10:00:04', 5],
['10:00:04', 1],
['10:01:00', 1],
], columns=['time', 'value'])
data = df[pd.to_datetime(df.time,format= '%H:%M:%S' ).dt.second == 0].groupby('time').agg("first").reset_index()
data["hour_and_min"] = pd.to_datetime(data.time,format= '%H:%M:%S' ).dt.strftime("%H:%M")
res = df.groupby('time').agg("last").reset_index()
res["hour_and_min"] = pd.to_datetime(res.time,format= '%H:%M:%S' ).dt.strftime("%H:%M")
pd.DataFrame(pd.merge(res, data, on="hour_and_min").apply(lambda x: [x.value_x - x.value_y, f"{x.time_y}-{x.time_x}"], axis=1).tolist(), columns=['value', 'time'])
输出:
value time
0 -2 10:00:00-10:00:00
1 3 10:00:00-10:00:01
2 -5 10:00:00-10:00:04