让我们假设我有一个熊猫数据框架,看起来像这样:
lst = [45.45454545454545, 45.45454545454545, 45.45454545454545, 45.45454545454545, 45.45454545454545, 36.36363636363637, 36.36363636363637, 36.36363636363637, 27.27272727272727, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 27.27272727272727, 0.0, 0.0, 27.27272727272727, 0.0, 0.0, 0.0, 0.0, 27.27272727272727, 0.0, 0.0, 0.0, 36.36363636363637, 0.0, 27.27272727272727, 0.0, 27.27272727272727, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 27.27272727272727, 27.27272727272727, 54.54545454545454, 27.27272727272727, 36.36363636363637, 36.36363636363637, 54.54545454545454, 36.36363636363637, 45.45454545454545, 45.45454545454545, 36.36363636363637, 36.36363636363637, 45.45454545454545, 45.45454545454545, 36.36363636363637, 45.45454545454545, 36.36363636363637, 45.45454545454545, 36.36363636363637, 45.45454545454545, 36.36363636363637, 36.36363636363637, 36.36363636363637, 0.0, 36.36363636363637, 27.27272727272727, 0.0, 36.36363636363637, 0.0, 36.36363636363637, 36.36363636363637, 0.0, 0.0, 27.27272727272727, 0.0, 36.36363636363637, 0.0, 0.0, 0.0, 0.0, 36.36363636363637, 36.36363636363637, 0.0, 36.36363636363637, 36.36363636363637, 27.27272727272727, 27.27272727272727, 36.36363636363637, 36.36363636363637, 36.36363636363637, 36.36363636363637, 0.0, 27.27272727272727, 0.0, 0.0, 0.0, 27.27272727272727, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 27.27272727272727, 36.36363636363637, 0.0, 0.0, 0.0, 0.0, 0.0]
df = pd.DataFrame(lst,columns =['%'])
df.index.name='Time/ps'
df
现在,我想知道第一次出现在什么时间?"%";减小到0.0但仅条件是0(0)存在至少在彼此下面连续5行。我试着用这段代码来做它,它部分工作:
for k, v in df[df['%'] == 0.000000].groupby((df['%'] != 0.000000).cumsum()):
print(f'[group {k}]')
print(v)
print('n')
然而,问题是,我不知道如何获得信息时,有0.0's在"%"列下至少5个连续的行。这段代码打印所有出现的情况,我可以滚动它,但我想自动完成。我想要的输出看起来像这样:Time/ps: 9
谢谢你的建议
天真的答案可能是将您的代码像这样做:
for k, v in df[df['%'] == 0.000000].groupby((df['%'] != 0.000000).cumsum()):
if len(v) > 5:
print("Time/ps:", k)
break
一个更好的方式可能是这样的:
df[df['%'] == 0.000000].groupby((df['%'] != 0.000000).cumsum()).filter(lambda x: len(x) > 5)
我给你分组代码,然后使用过滤器过滤掉长度小于5的组。
这给出了这个数据帧:
Time/ps
9 0.0
10 0.0
11 0.0
12 0.0
13 0.0
14 0.0
15 0.0
16 0.0
17 0.0
35 0.0
36 0.0
37 0.0
38 0.0
39 0.0
40 0.0
41 0.0
99 0.0
100 0.0
101 0.0
102 0.0
103 0.0
104 0.0
105 0.0
106 0.0
你说你想要第一次出现:
index = df[df['%'] == 0.000000].groupby((df['%'] != 0.000000).cumsum()).filter(lambda x: len(x) > 5).iloc[0].name
print('Time/ps:', index)
# Time/ps: 9
如果所有值都是非负的,则可以使用长度为5的滚动窗口和,并使用argmin
找到第一个零:
k = 5
df.index[df['%'].rolling(k).sum().argmin() - k + 1]
(如有负值,需先做.abs()
,后做rolling()
)
import pandas as pd
import numpy as np
number = 0.0 #45.45454545454545
df = pd.DataFrame(lst, columns=['p']) #use more convenient name of column
df['dif'] = np.abs(df.p.diff(1)) + np.abs(df.p.diff(2)) + np.abs(df.p.diff(3)) + np.abs(df.p.diff(4))
positions = df.index[(df.p==number) & (df.dif==0.0)]
first_index = positions[0] - 4