我需要对我的模型进行投票分类器。显然,我不能这样做,因为我的模型不是scikit learn的一部分,所以我不能做投票分类器。所以我决定我必须先拟合我的模型,然后做投票分类器,但不幸的是,投票分类器不支持预拟合模型。如何改变估计器,使投票分类器在预拟合模型上工作?我使用python 2.7
这是我的代码:
import numpy as np
import os
from decimal import *
import sys
sys.path.append('/home/nida/pylibol/python')
import classifiers
from classifiers_new2 import *
from sklearn.datasets import load_svmlight_file
filedir= '/home/nida/2018/2018200.libsvm'
X, y = load_svmlight_file(filedir)
print 'X shape:n',X.shape
print 'y shape:n',y.shape
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0)
print 'X_test shape:n',X_test.shape
print 'y_test shape:n',y_test.shape
print 'X train:n',X_train.shape
print 'y train:n',y_train.shape
model1 = Pa1()
model2 = Ogd()
model3= Arow()
model1.fit(X_train,y_train)
model2.fit(X_train,y_train)
model3.fit(X_train,y_train)
pred1=model1.predict(X_test)
pred2=model2.predict(X_test)
pred3=model3.predict(X_test)
modelsNames = [('Pa1',model1), ('OGD',model2),('Arow',model3)]
votingClassifier = VotingClassifier(voting = 'hard',estimators= modelsNames)
votingClassifier.fit(X_train,y_train)
错误:
TypeError Traceback (most recent call last)
<ipython-input-22-6f0e98ffe925> in <module>()
36
37 votingClassifier = VotingClassifier(voting = 'hard',estimators= modelsNames)
---> 38 votingClassifier.fit(X_train,y_train)
/home/nida/anaconda3/envs/py2/lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyc in fit(self, X, y, sample_weight)
198 delayed(_parallel_fit_estimator)(clone(clf), X, transformed_y,
199 sample_weight=sample_weight)
--> 200 for clf in clfs if clf is not None)
201
202 self.named_estimators_ = Bunch(**dict())
/home/nida/anaconda3/envs/py2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
/home/nida/anaconda3/envs/py2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in dispatch_one_batch(self, iterator)
752 tasks = BatchedCalls(itertools.islice(iterator, batch_size),
753 self._backend.get_nested_backend(),
--> 754 self._pickle_cache)
755 if len(tasks) == 0:
756 # No more tasks available in the iterator: tell caller to stop.
/home/nida/anaconda3/envs/py2/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __init__(self, iterator_slice, backend_and_jobs, pickle_cache)
208
209 def __init__(self, iterator_slice, backend_and_jobs, pickle_cache=None):
--> 210 self.items = list(iterator_slice)
211 self._size = len(self.items)
212 if isinstance(backend_and_jobs, tuple):
/home/nida/anaconda3/envs/py2/lib/python2.7/site-packages/sklearn/ensemble/voting_classifier.pyc in <genexpr>((clf,))
198 delayed(_parallel_fit_estimator)(clone(clf), X, transformed_y,
199 sample_weight=sample_weight)
--> 200 for clf in clfs if clf is not None)
201
202 self.named_estimators_ = Bunch(**dict())
/home/nida/anaconda3/envs/py2/lib/python2.7/site-packages/sklearn/base.pyc in clone(estimator, safe)
58 "it does not seem to be a scikit-learn estimator "
59 "as it does not implement a 'get_params' methods."
---> 60 % (repr(estimator), type(estimator)))
61 klass = estimator.__class__
62 new_object_params = estimator.get_params(deep=False)
TypeError: Cannot clone object '<classifiers_new2.Pa1 instance at 0x7f1c9c0e41e0>' (type <type 'instance'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' methods.
您应该通过继承BaseEstimator
和ClassifierMixin
类来实现您的估计器。Sklearn提供了如何开发自定义估算器的大量文档。
您的估计应该看起来像:
from sklearn.base import BaseEstimator, ClassifierMixin
class MyClassifier(BaseEstimator, ClassifierMixin):
def fit(self, X, y):
pass
def predict(self, X):
pass