为机器学习模型制作流水线


from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
model_params = {           /* creating dictionary of all classifiers with paramters */
'svm': {
'model': svm.SVC(gamma='auto'),
'params' : {
'svc__C': [1,10,100,1000],
'svc__kernel': ['rbf','linear']
}  
},

'logistic_regression' : {
'model': LogisticRegression(solver='liblinear',multi_class='auto'),
'params': {
'logisticregression__C': [1,5,10]
}
},

'random_forest1': {
'model': RandomForestClassifier(),
'params' : {
'randomforestclassifier__n_estimators': [1,5,10]
}
},


'decision_tree': {
'model': DecisionTreeClassifier(),
'params': {
'decisionTreeClassifier__criterion': ["gini","entropy","log_loss"]

}

}
}
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
scores = []
best_estimators = {}
import pandas as pd
for algo, mp in model_params.items():
pipe = make_pipeline(StandardScaler(), mp['model']) /* creating pipeline to scale data and fetching classifiers from dictionary */

clf =  GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False)  /* using grid search cv on my classifiers */

clf.fit(features,target)
scores.append({
'model': algo,
'best_score': clf.best_score_,
'best_params': clf.best_params_
})
best_estimators[algo] = clf.best_estimator_

df = pd.DataFrame(scores,columns=['model','best_score','best_params'])

错误:

Invalid parameter '' for estimator Pipeline(steps=[('standardscaler', StandardScaler()),
('decision_tree', DecisionTreeClassifier() ]). Valid parameters are: ['memory', 'steps', 'verbose'].  

代码对SVM逻辑回归和随机森林分类器工作良好,但对决策树分类器抛出参数错误。不知道是语法问题还是别的什么

应该是decisiontreeclassifier__criterionmake_pipeline()将舞台名称设置为相应类型的小写(https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.make_pipeline.html)

最新更新