Python 未来警告消息



我在Jupyter Notebook中使用Python 3.7。我正在根据Jason Brownlee的电子书《Machine Learning Mastery with Python》创建分类模型。代码基本上是从电子书中剪切并粘贴到Jupyter Notebook中。当我拆分数据时,模型工作正常,但是当我使用 k 折叠交叉验证时,它会生成一条 Future 警告消息,我将剪切并粘贴下面的代码和消息。我输入了error_score =np.nan,但它没有解决问题,但我不知道应该在哪里输入代码。我将不胜感激任何建议,但请记住,我是一个新手。谢谢

# 逻辑回归分类

from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
df = pd.read_csv('Diabetes_Classification.csv')
array = df.values
X = array[:,0:8]
Y = array[:,8]
kfold = KFold(n_splits=10, random_state=7)
model = LogisticRegression(solver='liblinear')
error_score = np.nan
results = cross_val_score(model, X, Y, cv=kfold)
print(results.mean())
# Logistic Regression Classification
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
df = pd.read_csv('Diabetes_Classification.csv')
array = df.values
X = array[:,0:8]
Y = array[:,8]
kfold = KFold(n_splits=10, random_state=7)
model = LogisticRegression(solver='liblinear')
error_score = np.nan
results = cross_val_score(model, X, Y, cv=kfold)
print(results.mean())
/Users/roberthoyt/opt/anaconda3/lib/python3.7/site- 
packages/sklearn/model_selection/_validation.py:530: FutureWarning: From version 0.22, errors during 
fit will result in a cross validation score of NaN by default. Use error_score='raise' if you want 
an exception raised or error_score=np.nan to adopt the behavior from version 0.22.
FutureWarning)

ValueError                                Traceback (most recent call last)
<ipython-input-105-010e5612fd63> in <module>
11 model = LogisticRegression(solver='liblinear')
12 error_score = np.nan
---> 13 results = cross_val_score(model, X, Y, cv=kfold)
14 print(results.mean())

~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in 
cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, 
error_score)
389                                 fit_params=fit_params,
390                                 pre_dispatch=pre_dispatch,
--> 391                                 error_score=error_score)
392     return cv_results['test_score']
393 
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in 
cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, 
return_train_score, return_estimator, error_score)
230             return_times=True, return_estimator=return_estimator,
231             error_score=error_score)
--> 232         for train, test in cv.split(X, y, groups))
233 
234     zipped_scores = list(zip(*scores))
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
919             # remaining jobs.
920             self._iterating = False
--> 921             if self.dispatch_one_batch(iterator):
922                 self._iterating = self._original_iterator is not None
923 
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in dispatch_one_batch(self, 
iterator)
757                 return False
758             else:
--> 759                 self._dispatch(tasks)
760                 return True
761 
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in _dispatch(self, batch)
714         with self._lock:
715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
717             # A job can complete so quickly than its callback is
718             # called before we get here, causing self._jobs to
~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in apply_async(self, 
func, 
callback)
180     def apply_async(self, func, callback=None):
181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
183         if callback:
184             callback(result)
~/opt/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py in __init__(self, 
batch)
547         # Don't delay the application, to avoid keeping the input
548         # arguments in memory
--> 549         self.results = batch()
550 
551     def get(self):
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in __call__(self)
223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
226 
227     def __len__(self):
~/opt/anaconda3/lib/python3.7/site-packages/joblib/parallel.py in <listcomp>(.0)
223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
226 
227     def __len__(self):
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _ 
fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, 
return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, 
error_score)
514             estimator.fit(X_train, **fit_params)
515         else:
--> 516             estimator.fit(X_train, y_train, **fit_params)
517 
518     except Exception as e:
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py in fit(self, X, y, 
sample_weight)
1531         X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype, order="C",
1532                          accept_large_sparse=solver != 'liblinear')
-> 1533         check_classification_targets(y)
1534         self.classes_ = np.unique(y)
1535         n_samples, n_features = X.shape
~/opt/anaconda3/lib/python3.7/site-packages/sklearn/utils/multiclass.py in 
check_classification_targets(y)
167     if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
168                       'multilabel-indicator', 'multilabel-sequences']:
--> 169         raise ValueError("Unknown label type: %r" % y_type)
170 
171 
ValueError: Unknown label type: 'continuous'

问题是你的目标是连续的,你正在执行分类任务。确保您使用的列是分类的。您可能需要将其转换为整数。所有这些都在回溯中报告:

check_classification_targets(y)
167     if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
168                       'multilabel-indicator', 'multilabel-sequences']:
--> 169         raise ValueError("Unknown label type: %r" % y_type)

您的目标不在接受的目标中。您的目标是连续的:

ValueError: Unknown label type: 'continuous'

检查您的目标是否是带df.dtypes的整数,如果不是,则将其更改为整数。

Y = array[:,8].astype(int)

这是假设您没有犯对连续值进行分类任务的错误。您还可以检查是否所有值都表示 0 和 1:

np.unique(array[:, 8])

最新更新