我在Jupyter Notebook中使用Python 3.7。我正在根据Jason Brownlee的电子书《Machine Learning Mastery with Python》创建分类模型。代码基本上是从电子书中剪切并粘贴到Jupyter Notebook中。当我拆分数据时,模型工作正常,但是当我使用 k 折叠交叉验证时,它会生成一条 Future 警告消息,我将剪切并粘贴下面的代码和消息。我输入了error_score =np.nan,但它没有解决问题,但我不知道应该在哪里输入代码。我将不胜感激任何建议,但请记住,我是一个新手。谢谢
# 逻辑回归分类
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
df = pd.read_csv('Diabetes_Classification.csv')
array = df.values
X = array[:,0:8]
Y = array[:,8]
kfold = KFold(n_splits=10, random_state=7)
model = LogisticRegression(solver='liblinear')
error_score = np.nan
results = cross_val_score(model, X, Y, cv=kfold)
print(results.mean())
# Logistic Regression Classification
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
df = pd.read_csv('Diabetes_Classification.csv')
array = df.values
X = array[:,0:8]
Y = array[:,8]
kfold = KFold(n_splits=10, random_state=7)
model = LogisticRegression(solver='liblinear')
error_score = np.nan
results = cross_val_score(model, X, Y, cv=kfold)
print(results.mean())
/Users/roberthoyt/opt/anaconda3/lib/python3.7/site-
packages/sklearn/model_selection/_validation.py:530: FutureWarning: From version 0.22, errors during
fit will result in a cross validation score of NaN by default. Use error_score='raise' if you want
an exception raised or error_score=np.nan to adopt the behavior from version 0.22.
FutureWarning)
ValueError Traceback (most recent call last)
<ipython-input-105-010e5612fd63> in <module>
11 model = LogisticRegression(solver='liblinear')
12 error_score = np.nan
---> 13 results = cross_val_score(model, X, Y, cv=kfold)
14 print(results.mean())
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in
cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch,
error_score)
389 fit_params=fit_params,
390 pre_dispatch=pre_dispatch,
--> 391 error_score=error_score)
392 return cv_results['test_score']
393
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in
cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch,
return_train_score, return_estimator, error_score)
230 return_times=True, return_estimator=return_estimator,
231 error_score=error_score)
--> 232 for train, test in cv.split(X, y, groups))
233
234 zipped_scores = list(zip(*scores))
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self,
iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self,
func,
callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in __init__(self,
batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _
fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params,
return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator,
error_score)
514 estimator.fit(X_train, **fit_params)
515 else:
--> 516 estimator.fit(X_train, y_train, **fit_params)
517
518 except Exception as e:
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py in fit(self, X, y,
sample_weight)
1531 X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C",
1532 accept_large_sparse=solver != 'liblinear')
-> 1533 check_classification_targets(y)
1534 self.classes_ = np.unique(y)
1535 n_samples, n_features = X.shape
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/multiclass.py in
check_classification_targets(y)
167 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
168 'multilabel-indicator', 'multilabel-sequences']:
--> 169 raise ValueError("Unknown label type: %r" % y_type)
170
171
ValueError: Unknown label type: 'continuous'
问题是你的目标是连续的,你正在执行分类任务。确保您使用的列是分类的。您可能需要将其转换为整数。所有这些都在回溯中报告:
check_classification_targets(y)
167 if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
168 'multilabel-indicator', 'multilabel-sequences']:
--> 169 raise ValueError("Unknown label type: %r" % y_type)
您的目标不在接受的目标中。您的目标是连续的:
ValueError: Unknown label type: 'continuous'
检查您的目标是否是带df.dtypes
的整数,如果不是,则将其更改为整数。
Y = array[:,8].astype(int)
这是假设您没有犯对连续值进行分类任务的错误。您还可以检查是否所有值都表示 0 和 1:
np.unique(array[:, 8])