如何使用递归特征消除?



我是ML的新手,一直在尝试使用RFE方法进行功能选择。我的数据集有 5K 记录及其二元分类问题。这是我根据在线教程遵循的代码

#no of features
nof_list=np.arange(1,13)            
high_score=0
#Variable to store the optimum features
nof=0           
score_list =[]
for n in range(len(nof_list)):
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 0)
model = RandomForestClassifier()
rfe = RFE(model,nof_list[n])
X_train_rfe = rfe.fit_transform(X_train,y_train)
X_test_rfe = rfe.transform(X_test)
model.fit(X_train_rfe,y_train)
score = model.score(X_test_rfe,y_test)
score_list.append(score)
if(score>high_score):
high_score = score
nof = nof_list[n]
print("Optimum number of features: %d" %nof)
print("Score with %d features: %f" % (nof, high_score))

我遇到以下错误。有人可以帮忙吗

TypeError                                 Traceback (most recent call last)
<ipython-input-332-a23dfb331001> in <module>
9     model = RandomForestClassifier()
10     rfe = RFE(model,nof_list[n])
---> 11     X_train_rfe = rfe.fit_transform(X_train,y_train)
12     X_test_rfe = rfe.transform(X_test)
13     model.fit(X_train_rfe,y_train)
~AppDataLocalContinuumanaconda3libsite-packagessklearnbase.py in fit_transform(self, X, y, **fit_params)
554             Training set.
555 
--> 556         y : numpy array of shape [n_samples]
557             Target values.
558 
~AppDataLocalContinuumanaconda3libsite-packagessklearnfeature_selection_base.py in transform(self, X)
75         X = check_array(X, dtype=None, accept_sparse='csr',
76                         force_all_finite=not tags.get('allow_nan', True))
---> 77         mask = self.get_support()
78         if not mask.any():
79             warn("No features were selected: either the data is"
~AppDataLocalContinuumanaconda3libsite-packagessklearnfeature_selection_base.py in get_support(self, indices)
44             values are indices into the input feature vector.
45         """
---> 46         mask = self._get_support_mask()
47         return mask if not indices else np.where(mask)[0]
48 
~AppDataLocalContinuumanaconda3libsite-packagessklearnfeature_selection_rfe.py in _get_support_mask(self)
269 
270     def _get_support_mask(self):
--> 271         check_is_fitted(self)
272         return self.support_
273 
TypeError: check_is_fitted() missing 1 required positional argument: 'attributes'

你的sklearn版本是什么?

以下(使用人工数据(应该可以正常工作:

from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier
X = np.random.rand(100,20)
y = np.ones((X.shape[0]))
#no of features
nof_list=np.arange(1,13)            
high_score=0
#Variable to store the optimum features
nof=0           
score_list =[]
for n in range(len(nof_list)):
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 0)
model = RandomForestClassifier()
rfe = RFE(model,nof_list[n])
X_train_rfe = rfe.fit_transform(X_train,y_train)
X_test_rfe = rfe.transform(X_test)
model.fit(X_train_rfe,y_train)
score = model.score(X_test_rfe,y_test)
score_list.append(score)
if(score>high_score):
high_score = score
nof = nof_list[n]
print("Optimum number of features: %d" %nof)
print("Score with %d features: %f" % (nof, high_score))

最佳特征数:1

1个特征的分数:1.000000

测试的版本:

sklearn.__version__
'0.20.4'
sklearn.__version__
'0.21.3'

最新更新