在进行PCA分析时,sklearn的skpca.fit的错误是什么



我正在用一些卫星数据做一个简单的PCA分析。所有的陆地点都被移除,平均值和标准差接近于0和1。但是我得到

from sklearn import preprocessing
scaler  = preprocessing.StandardScaler()
scaler_sst = scaler.fit(sss_data)
import joblib
joblib.dump(scaler_sst, './scaler_sst.pkl', compress=9)
scaler_sst = joblib.load('./scaler_sst.pkl')
X = scaler_sst.transform(sss_data)
print(X.mean())
print(X.std())
#X.shape
5.7725416769826885e-15
0.9999999999999993
from sklearn.decomposition import pca
skpca=pca.PCA()
skpca.fit(X)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj, include, exclude)
968 
969             if method is not None:
--> 970                 return method(include=include, exclude=exclude)
971             return None
972         else:
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/sklearn/base.py in _repr_mimebundle_(self, **kwargs)
461     def _repr_mimebundle_(self, **kwargs):
462         """Mime bundle used by jupyter kernels to display estimator"""
--> 463         output = {"text/plain": repr(self)}
464         if get_config()["display"] == 'diagram':
465             output["text/html"] = estimator_html_repr(self)
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/sklearn/base.py in __repr__(self, N_CHAR_MAX)
273 
274         # use ellipsis for sequences with a lot of elements
--> 275         pp = _EstimatorPrettyPrinter(
276             compact=True, indent=1, indent_at_name=True,
277             n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/sklearn/utils/_pprint.py in __init__(self, indent, width, depth, stream, compact, indent_at_name, n_max_elements_to_show)
162         if self._indent_at_name:
163             self._indent_per_level = 1  # ignore indent param
--> 164         self._changed_only = get_config()['print_changed_only']
165         # Max number of elements in a list, dict, tuple until we start using
166         # ellipsis. This also affects the number of arguments of an estimators
KeyError: 'print_changed_only'
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/IPython/core/formatters.py in __call__(self, obj)
700                 type_pprinters=self.type_printers,
701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
703             printer.flush()
704             return stream.getvalue()
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/IPython/lib/pretty.py in pretty(self, obj)
392                         if cls is not object 
393                                 and callable(cls.__dict__.get('__repr__')):
--> 394                             return _repr_pprint(obj, self, cycle)
395 
396             return _default_pprint(obj, self, cycle)
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
698     """A pprint that just redirects to the normal repr function."""
699     # Find newlines and replace them with p.break_()
--> 700     output = repr(obj)
701     lines = output.splitlines()
702     with p.group():
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/sklearn/base.py in __repr__(self, N_CHAR_MAX)
273 
274         # use ellipsis for sequences with a lot of elements
--> 275         pp = _EstimatorPrettyPrinter(
276             compact=True, indent=1, indent_at_name=True,
277             n_max_elements_to_show=N_MAX_ELEMENTS_TO_SHOW)
~/miniconda3/envs/py3_std_maps/lib/python3.8/site-packages/sklearn/utils/_pprint.py in __init__(self, indent, width, depth, stream, compact, indent_at_name, n_max_elements_to_show)
162         if self._indent_at_name:
163             self._indent_per_level = 1  # ignore indent param
--> 164         self._changed_only = get_config()['print_changed_only']
165         # Max number of elements in a list, dict, tuple until we start using
166         # ellipsis. This also affects the number of arguments of an estimators
KeyError: 'print_changed_only'

错误发生在skpca.fit(X)部分。我重新安装了sklearn包和scikit包。我以前用过sklearn的PCA分析,但从来没有发生过这种情况。

我不知道答案,但也许这是一个bug在sklearn:试一试:

import sklearn
sklearn.get_config()

在我的例子中它返回一个字典:

{'assume_finite': False, 'working_memory': 1024, 'print_changed_only': False}

错误提示您的print_changend_only不存在。我的sklearn版本是python 3.6上的"0.21.2"。也许降级sklearn版本会有帮助?

最新更新