生成optuna网格的函数提供了sklearn管道



我将sklearn与optuna一起用于HPO。我想创建一个自定义函数,将sklearn管道作为输入并返回optuna-specifc网格。返回sklearn特定的参数网格(即字典(似乎更直接(duh(;这就是我目前所掌握的:

def grid_from_estimator(estimator, type = 'sklearn'):
estimator_name = estimator.named_steps['estimator'].__class__.__name__
if type == 'sklearn':
if estimator_name=='LogisticRegression':
params =  {
'estimator__penalty': ['l1','elasticnet'],
'estimator__C': np.logspace(-4, 4, 20)
}
elif estimator_name=='LGBMClassifier':
params = {
'estimator__n_estimators': np.arange(100, 1000, 200),
'estimator__boosting_type':['gbdt','dart'],
'estimator__max_depth': np.arange(6, 12),
'estimator__num_leaves': np.arange(30, 150,5),
'estimator__learning_rate': [1e-2/2 , 1e-2, 1e-1/2, 1e-1, 0.5, 1],
'estimator__min_child_samples': np.arange(20, 100, 5),
'estimator__subsample': np.arange(0.65, 1, 0.05),
'estimator__colsample_bytree': np.arange(0.4, 0.75, 0.05),
'estimator__reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
'estimator__reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100],
'estimator__iterations': np.arange(100, 800, 100),
'estimator__objective': 'binary'
}
elif type == 'optuna':
if estimator_name == 'LogisticRegression':
params = {
'estimator__penalty': trial.suggest_categorical('penalty', ['l1', 'elasticnet']),
'estimator__C': trial.suggest.suggest_loguniform('c', -4, 4)
}
elif estimator_name == 'LGBMClassifier':
params = {
'estimator__n_estimators': trial.suggest_int('n_estimators', 100, 1000),
'estimator__boosting_type': trial.suggest_categorical('boosting_type', ['gbdt', 'dart']),
'estimator__max_depth': trial.suggest_int('max_depth', 6, 12),
'estimator__num_leaves': trial.suggest_int('num_leaves', 30, 150, 5),
'estimator__learning_rate': trial.suggest_float('learning_rate', 1e-4, 1),
'estimator__min_child_samples': trial.suggest_int('min_child_samples', 20, 100),
'estimator__subsample': trial.suggest_float('subsample', 0.5, 1),
'estimator__colsample_bytree': trial.suggest_float('colsample_bytree', 0.4, 0.75),
'estimator__reg_alpha': trial.suggest_float('reg_alpha', 1e-2, 10),
'estimator__reg_lambda': trial.suggest_float('reg_lambda', 1e-2, 10)
}
return params

">试验。suggest_…";零件不断"抱怨"并返回错误;虽然我理解原因,但我看不出有什么办法。这可能吗?有什么想法吗?感谢您的支持!

使用optunaask and tell interface的示例方法。

代码

import optuna
import numpy as np

def optuna_objective(estimator_name, params):
if estimator_name == 'LogisticRegression':
x = params['x']
y = params['y']
return (x - 2) ** 2 + y
if estimator_name == 'LGBMClassifier':
# estimator__n_estimators = params['estimator__n_estimators']
# return accuracy
pass
return None

def grid_from_estimator(estimator_name, type_='sklearn', study=None):
params, trial = None, None
if type_ == 'sklearn':
if estimator_name == 'LogisticRegression':
params =  {
'estimator__penalty': ['l1','elasticnet'],
'estimator__C': np.logspace(-4, 4, 20)
}
elif estimator_name == 'LGBMClassifier':
params = {
'estimator__n_estimators': np.arange(100, 1000, 200),
'estimator__boosting_type':['gbdt','dart'],
'estimator__max_depth': np.arange(6, 12),
'estimator__num_leaves': np.arange(30, 150,5),
'estimator__learning_rate': [1e-2/2 , 1e-2, 1e-1/2, 1e-1, 0.5, 1],
'estimator__min_child_samples': np.arange(20, 100, 5),
'estimator__subsample': np.arange(0.65, 1, 0.05),
'estimator__colsample_bytree': np.arange(0.4, 0.75, 0.05),
'estimator__reg_alpha': [0, 1e-1, 1, 2, 5, 7, 10, 50, 100],
'estimator__reg_lambda': [0, 1e-1, 1, 5, 10, 20, 50, 100],
'estimator__iterations': np.arange(100, 800, 100),
'estimator__objective': 'binary'
}
elif type_ == 'optuna':
trial = study.ask()
if estimator_name == 'LogisticRegression':
params = {
'x': trial.suggest_float('x', -10, 10),
'y': trial.suggest_float('y', -10, 10)
}
# params = {
# 'estimator__penalty': trial.suggest_categorical('estimator__penalty', ['l1', 'elasticnet']),
# 'estimator__C': trial.suggest_float('estimator__C', -4, 4)
# }
elif estimator_name == 'LGBMClassifier':
params = {
'estimator__n_estimators': trial.suggest_int('estimator__n_estimators', 100, 1000),
'estimator__boosting_type': trial.suggest_categorical('estimator__boosting_type', ['gbdt', 'dart']),
'estimator__max_depth': trial.suggest_int('estimator__max_depth', 6, 12),
'estimator__num_leaves': trial.suggest_int('estimator__num_leaves', 30, 150, 5),
'estimator__learning_rate': trial.suggest_float('estimator__learning_rate', 1e-4, 1),
'estimator__min_child_samples': trial.suggest_int('estimator__min_child_samples', 20, 100),
'estimator__subsample': trial.suggest_float('estimator__subsample', 0.5, 1),
'estimator__colsample_bytree': trial.suggest_float('estimator__colsample_bytree', 0.4, 0.75),
'estimator__reg_alpha': trial.suggest_float('estimator__reg_alpha', 1e-2, 10),
'estimator__reg_lambda': trial.suggest_float('estimator__reg_lambda', 1e-2, 10)
}
return params, trial

# (1) sklearn example
print('SKLEARN')
estimator_name = 'LogisticRegression'
optimizer_type = 'sklearn'
params, _ = grid_from_estimator(estimator_name, type_=optimizer_type)
print(params)
print()
# (2) Optuna example with ask and tell interface.
print('OPTUNA')
study = optuna.create_study(direction='maximize')
n_trials = 10
estimator_name = 'LogisticRegression'
optimizer_type = 'optuna'
for _ in range(n_trials):
params, trial = grid_from_estimator(estimator_name, type_=optimizer_type, study=study)
objective_value = optuna_objective(estimator_name, params)
study.tell(trial, objective_value)  # tell the pair of trial and objective value
print(f'trialnum: {trial.number}, params: {params}, value: {objective_value}')
best_params = study.best_params
best_x = best_params["x"]
best_y = best_params["y"]
best_value = study.best_value
best_trial_num = study.best_trial.number
print(f"best x: {best_x}, best y: {best_y}, (x - 2)^2 + y: {(best_x - 2) ** 2 + best_y}, best_value: {best_value}, best_trial_num: {best_trial_num}")  # trial num starts at 0

输出

SKLEARN
{'estimator__penalty': ['l1', 'elasticnet'], 'estimator__C': array([1.00000000e-04, 2.63665090e-04, 6.95192796e-04, 1.83298071e-03,
4.83293024e-03, 1.27427499e-02, 3.35981829e-02, 8.85866790e-02,
2.33572147e-01, 6.15848211e-01, 1.62377674e+00, 4.28133240e+00,
1.12883789e+01, 2.97635144e+01, 7.84759970e+01, 2.06913808e+02,
5.45559478e+02, 1.43844989e+03, 3.79269019e+03, 1.00000000e+04])}
OPTUNA
[I 2021-11-25 19:03:09,673] A new study created in memory with name: no-name-f5046b21-f579-4c74-8046-79420c256d4a
trialnum: 0, params: {'x': 2.905894660287128, 'y': -4.537699327718261}, value: -3.7170541921815303
trialnum: 1, params: {'x': -9.275103438355583, 'y': -5.925000918692578}, value: 121.2029566269253
trialnum: 2, params: {'x': -2.9531168045205103, 'y': 5.253730464314739}, value: 29.78709654353821
trialnum: 3, params: {'x': 3.766902399344163, 'y': 3.778408673279479}, value: 6.900352762087639
trialnum: 4, params: {'x': -0.897563829823584, 'y': -0.887774211794973}, value: 7.508101936106943
trialnum: 5, params: {'x': -2.2256917634354645, 'y': 3.8017184220598903}, value: 21.658189301626216
trialnum: 6, params: {'x': -6.333366980619912, 'y': 9.87067058585388}, value: 79.3156758195401
trialnum: 7, params: {'x': 2.570258991787558, 'y': -0.1959178948625162}, value: 0.1292774228520457
trialnum: 8, params: {'x': 2.94430596072913, 'y': 4.318454050149043}, value: 5.210167797617609
trialnum: 9, params: {'x': 5.972023459737699, 'y': 4.165369460555215}, value: 19.942339825261854
best x: -9.275103438355583, best y: -5.925000918692578, (x - 2)^2 + y: 121.2029566269253, best_value: 121.2029566269253, best_trial_num: 1

我认为,这方面的一些东西应该会起作用,

def grid_from_estimator(estimator, trial, type = 'sklearn'):
pass
def your_objective_function(trial):
params = grid_from_estimator('LogisticRegression', trial, 'optuna')
#Rest of the code here.

def tune_model():
study = optuna.create_study()
study.optimize(your_objective_function, n_trials=20)
tune_model()

最新更新