我正在尝试使用lgbm
和optuna
进行分类任务。
这是我的模型
from optuna.integration import LightGBMPruningCallback
import optuna.integration.lightgbm as lgbm
import optuna
def objective(trial, X_train, y_train, X_test, y_test):
param_grid = {
# "device_type": trial.suggest_categorical("device_type", ['gpu']),
"n_estimators": trial.suggest_categorical("n_estimators", [10000]),
"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
"num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
"max_depth": trial.suggest_int("max_depth", 3, 12),
"min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 100, 10000, step=1000),
"lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
"min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
"bagging_fraction": trial.suggest_float(
"bagging_fraction", 0.2, 0.95, step=0.1
),
"bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
"feature_fraction": trial.suggest_float(
"feature_fraction", 0.2, 0.95, step=0.1
),
"max_features": trial.suggest_categorical(
"max_features", choices=["auto", "sqrt", "log2"]
),
"n_jobs": -1,
"random_state": 1121218,
}
model = lgbm.LGBMClassifier(objective="multiclass", **param_grid)
model.fit(
X_train,
y_train,
eval_set=[(X_test, y_test)],
eval_metric="multi_logloss",
early_stopping_rounds=5,
callbacks=[
LightGBMPruningCallback(trial, "multi_logloss")
], # Add a pruning callback
)
preds = model.predict_proba(X_test)
return preds, model
然后调用模型
%%time
study = optuna.create_study(direction="maximize", study_name="LGBM Classifier")
func = lambda trial: objective(trial, X_train, y_train, X_test, y_test)
preds, model = study.optimize(func, n_trials=100)
但是我得到以下错误:
RuntimeError: scikit-learn estimators should always specify their parameters in the signature of their __init__ (no varargs).
<class 'optuna.integration._lightgbm_tuner.sklearn.LGBMClassifier'> with constructor (self, *args:Any, **kwargs:Any) -> None doesn't follow this convention.
我明白这个错误,但我不确定做我想做的事情的正确方法是什么。
要解决这个问题,您必须指定超参数,而不使用可变长度参数。
你可以试着替换这个代码吗:
model = lgbm.LGBMClassifier(objective="multiclass", **param_grid)
:
model = lgbm.LGBMClassifier(objective="multiclass", n_estimators=param_grid["n_estimators"], learning_rate=param_grid["learning_rate"], max_depth=param_grid["max_depth"], min_data_in_leaf=param_grid["min_data_in_leaf"],lambda_l1=param_grid["lambda_l1"],min_gain_to_split=param_grid["min_gain_to_split"],bagging_fraction=param_grid["bagging_fraction"],bagging_freq=param_grid["bagging_freq"],feature_fraction=param_grid["feature_fraction"],max_features=param_grid["max_features"],n_jobs=param_grid["n_jobs"],random_state=param_grid["random_state"])