按时间顺序查找DataFrame的最小值和最大值



我有一个pandas数据帧,在其中提取最小值和极值。到目前为止效果不错,但问题是我如何将它们按日期(按时间顺序(放入列表中?它们被分为两个列表,我只想要一个按时间顺序排列的价格值列表

import pandas as pd
import numpy as np
import yfinance
from scipy.signal import argrelextrema
import matplotlib.dates as mpl_dates

def extract_data():
ticker = 'GBPJPY=X'
ticker = yfinance.Ticker(ticker)
start_date = '2022-09-25'
end_date = '2022-10-08'
df = ticker.history(interval='1h', start=start_date, end=end_date)
df['Date'] = pd.to_datetime(df.index)
df['Date'] = df['Date'].apply(mpl_dates.date2num)
df = df.loc[:, ['Date', 'Open', 'High', 'Low', 'Close']]
# Call function to find Min-Max Extrema
find_extrema(df)
def find_extrema(df):
n = 10  # number of points to be checked before and after
# Find local peaks
df['min'] = df.iloc[argrelextrema(df.Close.values, np.less_equal,
order=n)[0]]['Close']
df['max'] = df.iloc[argrelextrema(df.Close.values, np.greater_equal,
order=n)[0]]['Close']
min_values_list = []
max_values_list = []
# Add min value to list
for item in df['min']:
check_NaN = np.isnan(item) # check if values is empty
if check_NaN == True:
pass
else:
min_values_list.append(item)
# Add max values to list
for item in df['max']:
check_NaN = np.isnan(item) # check if values is empty
if check_NaN == True:
pass
else:
max_values_list.append(item)
print(f"Min: {min_values_list}")
print(f"Max: {max_values_list}")

extract_data()

选项1

  • 首先,使用df.to_numpy将列minmax转换为np.array
  • 使用应用于布尔掩码(使用np.isnan创建(的np.logical_or从数组中进行选择,从而消除所有NaN值
arr = df[['min','max']].to_numpy()
value_list = arr[np.logical_not(np.isnan(arr))].tolist()
print(value_list)
[159.7030029296875,
154.8979949951172,
160.7830047607422,
165.43800354003906,
149.55799865722656,
162.80499267578125,
156.6529998779297,
164.31900024414062,
156.125,
153.13499450683594,
161.3520050048828,
156.9340057373047,
162.52200317382812,
155.7740020751953,
160.98500061035156,
161.83700561523438]

选项2

更麻烦:

n = 10
# get the indices for `min` and `max` in two arrays
_min = argrelextrema(df.Close.values, np.less_equal, order=n)[0]
_max = argrelextrema(df.Close.values, np.greater_equal, order=n)[0]
# create columns (assuming you need this for other purposes as well)
df['min'] = df.iloc[_min]['Close']
df['max'] = df.iloc[_max]['Close']
# create lists for `min` and `max`
min_values_list = df['min'].dropna().tolist()
max_values_list = df['max'].dropna().tolist()
# join the lists
value_list2 = min_values_list + max_values_list
value_idxs = _min.tolist() + _max.tolist()
# finally, sort `value_list2` based on `value_idxs`
value_list2 = [x for _, x in sorted(zip(value_idxs, value_list2))]
# check if result is the same:
value_list2 == value_list
# True

假设您有maxmin列,那么类似的内容呢?

df['max_or_min'] = np.where(df['max'].notna(), df['max'], df['min'])
min_max_values = df['max_or_min'].dropna().values.tolist()

最新更新