我正在做一些预测建模,想在scikit-learn
中对不同类型的回归器进行基准测试,只是为了看看那里有什么以及它们在给定的预测任务上的表现。
我受到这个 kaggle 内核的启发,作者基本上手动导入了一堆分类器(大约 10 个)并对其进行基准测试。
我无法找到 sklearn
中回归器的完整导入列表,因此我正在尝试自动化 import
语句以自动返回我可以使用的类列表。
我尝试动态导入类:
from importlib import import_module
import sklearn
def all_regressors():
regressors=[]
for module in sklearn.__all__:
try:
regressors.extend([cls for cls in import_module(f'sklearn.{module}').__all__ if 'Regress' in cls ])
except:
pass
regressors.append(sklearn.svm.SVR)
return regressors
print(all_regressors())
但是我只以字符串的形式返回名称,而不是类:
['RandomForestRegressor', 'ExtraTreesRegressor', 'BaggingRegressor',
'GradientBoostingRegressor', 'AdaBoostRegressor',
'GaussianProcessRegressor', 'IsotonicRegression', 'ARDRegression',
'HuberRegressor', 'LinearRegression', 'LogisticRegression',
'LogisticRegressionCV', 'PassiveAggressiveRegressor',
'RandomizedLogisticRegression', 'SGDRegressor', 'TheilSenRegressor',
'RANSACRegressor', 'MultiOutputRegressor', 'KNeighborsRegressor',
'RadiusNeighborsRegressor', 'MLPRegressor', 'DecisionTreeRegressor',
'ExtraTreeRegressor', <class 'sklearn.svm.classes.SVR'>]
如何获得实际课程?
您可以使用
sklearn.utils
中的all_estimators
from sklearn.utils import all_estimators
def get_all_regressors_sklearn():
estimators = all_estimators(type_filter='regressor')
all_regs = []
for name, RegClass in estimators:
print('Appending', name)
try:
reg = RegClass()
all_regs.append(reg)
except Exception as e:
pass
return all_regs
all_regs = get_all_regressors_sklearn()
print(all_regs)
给:
[ARDRegression(), AdaBoostRegressor(), BaggingRegressor(), BayesianRidge(), CCA(), DecisionTreeRegressor(), DummyRegressor(), ElasticNet(), ElasticNetCV(), ExtraTreeRegressor(), ExtraTreesRegressor(), GammaRegressor(), GaussianProcessRegressor(), GradientBoostingRegressor(), HistGradientBoostingRegressor(), HuberRegressor(), IsotonicRegression(), KNeighborsRegressor(), KernelRidge(), Lars(), LarsCV(), Lasso(), LassoCV(), LassoLars(), LassoLarsCV(), LassoLarsIC(), LinearRegression(), LinearSVR(), MLPRegressor(), MultiTaskElasticNet(), MultiTaskElasticNetCV(), MultiTaskLasso(), MultiTaskLassoCV(), NuSVR(), OrthogonalMatchingPursuit(), OrthogonalMatchingPursuitCV(), PLSCanonical(), PLSRegression(), PassiveAggressiveRegressor(), PoissonRegressor(), QuantileRegressor(), RANSACRegressor(), RadiusNeighborsRegressor(), RandomForestRegressor(), Ridge(), RidgeCV(), SGDRegressor(), SVR(), TheilSenRegressor(), TransformedTargetRegressor(), TweedieRegressor()]
我发现我必须在模块对象上使用getattr
:
from importlib import import_module
import sklearn
def all_regressors():
regressors=[]
for module in sklearn.__all__:
try:
module = import_module(f'sklearn.{module}')
regressors.extend([getattr(module,cls) for cls in module.__all__ if 'Regress' in cls ])
except:
pass
regressors.append(sklearn.svm.SVR)
return regressors
print(all_regressors())
[<class 'sklearn.ensemble.forest.RandomForestRegressor'>, <class
'sklearn.ensemble.forest.ExtraTreesRegressor'>, <class
'sklearn.ensemble.bagging.BaggingRegressor'>, <class
'sklearn.ensemble.gradient_boosting.GradientBoostingRegressor'>,
<class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, <class
'sklearn.gaussian_process.gpr.GaussianProcessRegressor'>, <class
'sklearn.isotonic.IsotonicRegression'>, <class
'sklearn.linear_model.bayes.ARDRegression'>, <class
'sklearn.linear_model.huber.HuberRegressor'>, <class
'sklearn.linear_model.base.LinearRegression'>, <class
'sklearn.linear_model.logistic.LogisticRegression'>, <class
'sklearn.linear_model.logistic.LogisticRegressionCV'>, <class
'sklearn.linear_model.passive_aggressive.PassiveAggressiveRegressor'>,
<class 'sklearn.linear_model.randomized_l1.RandomizedLogisticRegression'>, <class
'sklearn.linear_model.stochastic_gradient.SGDRegressor'>, <class
'sklearn.linear_model.theil_sen.TheilSenRegressor'>, <class
'sklearn.linear_model.ransac.RANSACRegressor'>, <class
'sklearn.multioutput.MultiOutputRegressor'>, <class
'sklearn.neighbors.regression.KNeighborsRegressor'>, <class
'sklearn.neighbors.regression.RadiusNeighborsRegressor'>, <class
'sklearn.neural_network.multilayer_perceptron.MLPRegressor'>, <class
'sklearn.tree.tree.DecisionTreeRegressor'>, <class
'sklearn.tree.tree.ExtraTreeRegressor'>, <class
'sklearn.svm.classes.SVR'>]