我正在尝试从数据框架(DF)获取列名,并将它们与Spearmanr相关函数产生的结果数组相关联。我需要将列名(A-J)返回相关值(Spearman)和p值(Spearman_pvalue)。有没有直观的方法来执行此任务?
from scipy.stats import pearsonr,spearmanr
import numpy as np
import pandas as pd
df=pd.DataFrame(np.random.randint(0,100,size= (100,10)),columns=list('abcdefghij'))
def binary(row):
if row>=50:
return 1
else:
return 0
df['target']=df.a.apply(binary)
spearman,spearman_pvalue=spearmanr(df.drop(['target'],axis=1),df.target)
print(spearman)
print(spearman_pvalue)
看来您需要:
from scipy.stats import spearmanr
df=pd.DataFrame(np.random.randint(0,100,size= (100,10)),columns=list('abcdefghij'))
#print (df)
#faster for binary df
df['target'] = (df['a'] >= 50).astype(int)
#print (df)
spearman,spearman_pvalue=spearmanr(df.drop(['target'],axis=1),df.target)
df1 = pd.DataFrame(spearman.reshape(-1, 11), columns=df.columns)
#print (df1)
df2 = pd.DataFrame(spearman_pvalue.reshape(-1, 11), columns=df.columns)
#print (df2)
### Kyle, we can assign the index back to the column names for the total matrix:
df2=df2.set_index(df.columns)
df1=df1.set_index(df.columns)
或:
df1 = pd.DataFrame(spearman.reshape(-1, 11),
columns=df.columns,
index=df.columns)
df2 = pd.DataFrame(spearman_pvalue.reshape(-1, 11),
columns=df.columns,
index=df.columns)