cross_val_score出错-为什么会这样



我正在努力学习下面列出的机器学习教程:https://machinelearningmastery.com/machine-learning-in-python-step-by-step/,但我遇到了一个问题。我可以在我的Macbook air上运行以下代码,但它在我的Windows机器上不起作用。我检查了其他标题相似的问题,似乎没有一个符合我的问题。

为什么会发生这种情况?如何修复?

我的全部代码:

# Python version
import sys
print('Python: {}'.format(sys.version))
# scipy
import scipy
print('scipy: {}'.format(scipy.__version__))
# numpy
import numpy
print('numpy: {}'.format(numpy.__version__))
# matplotlib
import matplotlib
print('matplotlib: {}'.format(matplotlib.__version__))
# pandas
import pandas
print('pandas: {}'.format(pandas.__version__))
# scikit-learn
import sklearn
print('sklearn: {}'.format(sklearn.__version__))
# compare algorithms
from pandas import read_csv
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
# Load dataset
url = "energyFormatted.csv"
names = ['TOTAL', 'PURCHASED', 'NUCLEAR', 'SOLAR', 'WIND', 'NATURAL_GAS', 'COAL', 'OIL']
dataset = read_csv(url, names=names)
print(dataset.shape)
# Split-out validation dataset
array = dataset.values
X = array[:, 0:4]
y = array[:, 4]
X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1, shuffle=True)
# Spot Check Algorithms
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))
# evaluate each model in turn
results = []
names = []
for name, model in models:
kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
results.append(cv_results)
names.append(name)
print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))

给我一个错误的线路:

cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')

错误本身:

Traceback (most recent call last):
File "D:ApplicationspythonProjectvenvlibsite-packagesjoblibparallel.py", line 862, in dispatch_one_batch
tasks = self._ready_batches.get(block=False)
File "C:UsersdanieAppDataLocalProgramsPythonPython39libqueue.py", line 168, in get
raise Empty
_queue.Empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:UsersdanieAppDataRoamingJetBrainsPyCharmCE2022.2scratchesFY23 SCI FAIRmain.py", line 63, in <module>
cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring=None)
File "D:ApplicationspythonProjectvenvlibsite-packagessklearnmodel_selection_validation.py", line 515, in cross_val_score
cv_results = cross_validate(
File "D:ApplicationspythonProjectvenvlibsite-packagessklearnmodel_selection_validation.py", line 266, in cross_validate
results = parallel(
File "D:ApplicationspythonProjectvenvlibsite-packagesjoblibparallel.py", line 1085, in __call__
if self.dispatch_one_batch(iterator):
File "D:ApplicationspythonProjectvenvlibsite-packagesjoblibparallel.py", line 873, in dispatch_one_batch
islice = list(itertools.islice(iterator, big_batch_size))
File "D:ApplicationspythonProjectvenvlibsite-packagessklearnmodel_selection_validation.py", line 266, in <genexpr>
results = parallel(
File "D:ApplicationspythonProjectvenvlibsite-packagessklearnmodel_selection_split.py", line 340, in split
for train, test in super().split(X, y, groups):
File "D:ApplicationspythonProjectvenvlibsite-packagessklearnmodel_selection_split.py", line 86, in split
for test_index in self._iter_test_masks(X, y, groups):
File "D:ApplicationspythonProjectvenvlibsite-packagessklearnmodel_selection_split.py", line 717, in _iter_test_masks
test_folds = self._make_test_folds(X, y)
File "D:ApplicationspythonProjectvenvlibsite-packagessklearnmodel_selection_split.py", line 660, in _make_test_folds
raise ValueError(
ValueError: Supported target types are: ('binary', 'multiclass'). Got 'continuous' instead.

CSV:

28564,0,6284.08,1713.84,19.9948,19994.8,19.9948,19.9948
28411,0,6250.42,852.33,0,20740.03,568.22,0
27515,0,6053.3,550.3,0,20361.1,550.3,0
24586,491.72,5408.92,245.86,0,17947.78,491.72,0
26653,533.06,6130.19,0,0,18923.63,1066.12,0
26836,805.08,6172.28,0,0,18785.2,1073.44,0
26073,1303.65,5736.06,0,0,17990.37,1042.92,0
27055,1352.75,6222.65,0,0,18397.4,1082.2,0
26236,1311.8,6034.28,0,0,17578.12,1311.8,0
26020,1821.4,3903,0,0,18994.6,1040.8,260.2
26538,0,4246.08,265.38,13799.76,6369.12,0,1326.9
25800,3354,5160,0,0,14964,1290,1032
26682,3468.66,5603.22,0,0,14941.92,1600.92,1067.28
24997,3499.58,5499.34,0,0,13248.41,1499.82,1249.85
25100,3765,4769,0,0,13052,1506,2008
24651,4190.67,4930.2,0,0,12325.5,1232.55,1972.08
12053,0,1084.77,0,3133.78,6508.62,0,723.18
11500,2070,2415,0,0,4255,690,2070

准确度对连续变量的评分没有意义。该错误表示您的y值属于float类型(或者是连续的(。你可以尝试类似sklearn.metrics.mean_squared_error的东西,而不是精确性。

最新更新