我正在运行一个简单的代码为KMeans:
# Scaling the data set before clustering
scaler = StandardScaler()
subset = df[num_col].copy()
subset_scaled = scaler.fit_transform(subset)
subset_scaled_df = pd.DataFrame(subset_scaled, columns=subset.columns)
clusters = range(1, 9)
meanDistortions = []
for k in clusters:
model = KMeans(n_clusters=k)
model.fit(subset_scaled_df)
prediction = model.predict(subset_scaled_df)
distortion = (
sum(
np.min(cdist(subset_scaled_df, model.cluster_centers_, "euclidean"), axis=1)
)
/ subset_scaled_df.shape[0]
)
meanDistortions.append(distortion)
print("Number of Clusters:", k, "tAverage Distortion:", distortion)
plt.plot(clusters, meanDistortions, "bx-")
plt.xlabel("k")
plt.ylabel("Average Distortion")
plt.title("Selecting k with the Elbow Method", fontsize=20)
运行到以下错误:
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<ipython-input-173-4b988580ff32> in <module>
11 for k in clusters:
12 model = KMeans(n_clusters=k)
---> 13 model.fit(subset_scaled_df)
14 prediction = model.predict(subset_scaled_df)
15 distortion = (
/usr/local/lib/python3.9/site-packages/sklearn/cluster/_kmeans.py in fit(self, X, y, sample_weight)
1006 if self._algorithm == "full":
1007 kmeans_single = _kmeans_single_lloyd
-> 1008 self._check_mkl_vcomp(X, X.shape[0])
1009 else:
1010 kmeans_single = _kmeans_single_elkan
/usr/local/lib/python3.9/site-packages/sklearn/cluster/_kmeans.py in _check_mkl_vcomp(self, X, n_samples)
872 active_threads = int(np.ceil(n_samples / CHUNK_SIZE))
873 if active_threads < self._n_threads:
--> 874 modules = threadpool_info()
875 has_vcomp = "vcomp" in [module["prefix"] for module in modules]
876 has_mkl = ("mkl", "intel") in [
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in threadpool_info()
122 In addition, each module may contain internal_api specific entries.
123 """
--> 124 return _ThreadpoolInfo(user_api=_ALL_USER_APIS).todicts()
125
126
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in __init__(self, user_api, prefixes, modules)
338
339 self.modules = []
--> 340 self._load_modules()
341 self._warn_if_incompatible_openmp()
342 else:
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in _load_modules(self)
369 """Loop through loaded libraries and store supported ones"""
370 if sys.platform == "darwin":
--> 371 self._find_modules_with_dyld()
372 elif sys.platform == "win32":
373 self._find_modules_with_enum_process_module_ex()
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in _find_modules_with_dyld(self)
426
427 # Store the module if it is supported and selected
--> 428 self._make_module_from_path(filepath)
429
430 def _find_modules_with_enum_process_module_ex(self):
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in _make_module_from_path(self, filepath)
513 if prefix in self.prefixes or user_api in self.user_api:
514 module_class = globals()[module_class]
--> 515 module = module_class(filepath, prefix, user_api, internal_api)
516 self.modules.append(module)
517
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in __init__(self, filepath, prefix, user_api, internal_api)
603 self.user_api = user_api
604 self.internal_api = internal_api
--> 605 self._dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
606 self.version = self.get_version()
607 self.num_threads = self.get_num_threads()
/usr/local/Cellar/python@3.9/3.9.1_4/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ctypes/__init__.py in __init__(self, name, mode, handle, use_errno, use_last_error, winmode)
372
373 if handle is None:
--> 374 self._handle = _dlopen(self._name, mode)
375 else:
376 self._handle = handle
OSError: image not already loaded
但是,如果我用下面的代码替换上面的代码,它可以正常工作:
clusters = range(1, 9)
meanDistortions = []
for k in clusters:
model = KMeans(n_clusters=8)
不传递"k"到KMeans(n_clusters=),如果我传递一个整数,它工作得很好。我不知道是哪里出了问题,如果有任何建议,我将不胜感激。
谢谢!
尝试更新您的sklearn包:pip install -U sklearn
我假设它抛出k=1的错误,并为k>1. 这也解释了你的工作修改。所以你可以用range(2, 9)
作为快速解决方法。我在最近的sklean环境(0.24.2)中观察到同样的情况。