使用numpy改进worldquant 101阿尔法因子的实现



直观地说,Pandas数据帧NumPy矩阵应该适合101个阿尔法因子的实现。以下是我迄今为止使用NumPy的最佳实现。但是,性能太低。在我的英特尔酷睿i7 windows机器上,用5000(交易日期(乘200(股票(矩阵作为输入,运行alpha#4因子大约需要45秒。



import numpy as np
def rankdata(a, method='average', *, axis=None):
# this rankdata refer to scipy.stats.rankdata (https://github.com/scipy/scipy/blob/v1.9.1/scipy/stats/_stats_py.py#L9047-L9153)
if method not in ('average', 'min', 'max', 'dense', 'ordinal'):
raise ValueError('unknown method "{0}"'.format(method))
if axis is not None:
a = np.asarray(a)
if a.size == 0:
np.core.multiarray.normalize_axis_index(axis, a.ndim)
dt = np.float64 if method == 'average' else np.int_
return np.empty(a.shape, dtype=dt)
return np.apply_along_axis(rankdata, axis, a, method)
arr = np.ravel(np.asarray(a))
algo = 'mergesort' if method == 'ordinal' else 'quicksort'
sorter = np.argsort(arr, kind=algo)
inv = np.empty(sorter.size, dtype=np.intp)
inv[sorter] = np.arange(sorter.size, dtype=np.intp)
if method == 'ordinal':
return inv + 1
arr = arr[sorter]
obs = np.r_[True, arr[1:] != arr[:-1]]
dense = obs.cumsum()[inv]
if method == 'dense':
return dense
# cumulative counts of each unique value
count = np.r_[np.nonzero(obs)[0], len(obs)]
if method == 'max':
return count[dense]
if method == 'min':
return count[dense - 1] + 1
# average method
return .5 * (count[dense] + count[dense - 1] + 1)
def rank(x):
return rankdata(x,method='min',axis=1)/np.size(x, 1)
def rolling_rank(na):
return rankdata(na.transpose(),method='min',axis=0)[-1].transpose()    
def ts_rank(x, window=10):
a_rolled = np.lib.stride_tricks.sliding_window_view(x, window,axis = 0)
return np.append(np.full([window-1,np.size(x, 1)],np.nan),rolling_rank(a_rolled),axis = 0)

def alpha004(data):
return -1 * ts_rank(rank(data), 9)
import time
# The input is a 5000 by 200 matrix, where the row index represents trade date and the column index represents security ID. 
data=np.random.random((5000, 200))
start_time = time.time()
print("--- %s seconds ---" % (time.time() - start_time))
--- 44.85099506378174 seconds ---


def WQAlpha4(low){
return -mrank(rowRank(low, percent=true), true, 9)
// The input is a 5000 by 200 matrix, where the row index represents trade date and the column index represents security ID.
low = rand(1000.0,5000:200);
timer WQAlpha4(low);
Time elapsed: 44.036 ms (0.044s)


return np.apply_along_axis(rankdata, axis, a, method)




def rank(x):
return (data.argsort(axis=1).argsort(axis=1) + 1) / np.size(x, 1)

def ts_rank(x, window=10):
a_rolled = np.lib.stride_tricks.sliding_window_view(x, window, axis = 0)
rolling_rank_fast = (a_rolled.argsort(axis=2).argsort(axis=2) + 1)[:, :, -1]
# Fill initial window - 1 rows with nan
initial_window = np.full([window-1,np.size(x, 1)],np.nan)
return np.append(initial_window,rolling_rank_fast,axis = 0)

def alpha004(data):
return -1 * ts_rank(rank(data), 9)

