我们可以使用RandomizedSearchCV
得到模型的最佳参数。
def test_model():
X_train, X_test, y_train, y_test = make_friedman1()
result_dfs = []
model = Ridge()
search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv)
result = search.fit(X_train, y_train)
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)
现在,我试图获得使用X_test
数据的每种不同类型的参数组合的测试分数(即MSE
,R2
)。
def test_model():
X_train, X_test, y_train, y_test = make_friedman1()
result_dfs = []
model = Ridge()
search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv)
result = search.fit(X_train, y_train)
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)
test_result = search.fit(X_train, y_train).predict(X_test)
diff_acc = test_result - y_test
fold_df = pd.DataFrame()
fold_df["MSE"] = [mean_squared_error(y_test, test_result)]
fold_df["R2"] = [r2_score(y_test, test_result)]
result_dfs.append(fold_df)
rep_df = pd.concat(result_dfs, axis=0, ignore_index=True)
return rep_df
我得到的输出是
Best Score: -0.495580216817403
Best Hyperparameters: {'alpha': 28.590361345568553, 'fit_intercept': False, 'normalize': True, 'solver': 'cholesky'}
MSE R2
0 0.460333 0.504366
但是我想从param space
中获得所有不同参数配置的所有测试分数,并将它们保存在df
中。
n_iter=500
。我有500个参数设置组合。我想将下面一行中的这些参数用于fit
和predict
。最后,我将为每个不同的参数组合设置500个MSE
和R2
。
test_result = search.fit(X_train, y_train).predict(X_test)
你能告诉我如何使用RandomizedSearchCV
获得每个不同参数组合的所有测试分数吗?
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import pandas as pd
from scipy.stats import loguniform
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import RandomizedSearchCV
# define search space
space = dict()
space['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
space['alpha'] = loguniform(1e-5, 100)
space['fit_intercept'] = [True, False]
space['normalize'] = [True, False]
cv = RepeatedKFold(n_splits=5, n_repeats=3)
def generate_friedman1():
data = datasets.make_friedman1(n_samples=300)
X = data[0]
y = data[1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
return X_train, X_test, y_train, y_test
def test_model():
X_train, X_test, y_train, y_test = make_friedman1()
result_dfs = []
model = Ridge()
search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv)
result = search.fit(X_train, y_train)
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)
test_result = search.fit(X_train, y_train).predict(X_test)
diff_acc = test_result - y_test
fold_df = pd.DataFrame()
fold_df["MSqE"] = [mean_squared_error(y_test, test_result)]
fold_df["R2"] = [r2_score(y_test, test_result)]
result_dfs.append(fold_df)
rep_df = pd.concat(result_dfs, axis=0, ignore_index=True)
return rep_df
if __name__ == "__main__":
print(test_model())
您可以将所有参数保存在一个变量
all_param_combination = search.cv_results_['params']
然后你可以使用一个循环到fit
和predict
使用模型
for i in range(len(all_param_combination)):
reg_preds = Ridge(**all_param_combination[i]).fit(X_train, y_train).predict(X_test)
acc_diff = reg_preds - y_test
fold_df = pd.DataFrame()
fold_df["MSE"] = [mean_squared_error(y_test, reg_preds)]
fold_df["R2"] = [r2_score(y_test, reg_preds)]
fold_dfs.append(fold_df)
rep_df = pd.concat(fold_dfs, axis=0, ignore_index=True)
属性.cv_results_
将具有每个cv折叠和每个参数测试的结果。例如,search.cv_results_['params']
将保存随机搜索中测试的所有值的字典,search.cv_results_['split0_test_score']
将保存split0的分数。
如果您需要进一步的帮助,请指定您想要看到的DataFrame的列,如果需要,我可以协助!