KMeans -运行到OSError:图像尚未加载



我正在运行一个简单的代码为KMeans:

# Scaling the data set before clustering
scaler = StandardScaler()
subset = df[num_col].copy()
subset_scaled = scaler.fit_transform(subset)
subset_scaled_df = pd.DataFrame(subset_scaled, columns=subset.columns)
clusters = range(1, 9)
meanDistortions = []
for k in clusters:
model = KMeans(n_clusters=k)
model.fit(subset_scaled_df)
prediction = model.predict(subset_scaled_df)
distortion = (
sum(
np.min(cdist(subset_scaled_df, model.cluster_centers_, "euclidean"), axis=1)
)
/ subset_scaled_df.shape[0]
)
meanDistortions.append(distortion)
print("Number of Clusters:", k, "tAverage Distortion:", distortion)
plt.plot(clusters, meanDistortions, "bx-")
plt.xlabel("k")
plt.ylabel("Average Distortion")
plt.title("Selecting k with the Elbow Method", fontsize=20)

运行到以下错误:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-173-4b988580ff32> in <module>
11 for k in clusters:
12     model = KMeans(n_clusters=k)
---> 13     model.fit(subset_scaled_df)
14     prediction = model.predict(subset_scaled_df)
15     distortion = (
/usr/local/lib/python3.9/site-packages/sklearn/cluster/_kmeans.py in fit(self, X, y, sample_weight)
1006         if self._algorithm == "full":
1007             kmeans_single = _kmeans_single_lloyd
-> 1008             self._check_mkl_vcomp(X, X.shape[0])
1009         else:
1010             kmeans_single = _kmeans_single_elkan
/usr/local/lib/python3.9/site-packages/sklearn/cluster/_kmeans.py in _check_mkl_vcomp(self, X, n_samples)
872         active_threads = int(np.ceil(n_samples / CHUNK_SIZE))
873         if active_threads < self._n_threads:
--> 874             modules = threadpool_info()
875             has_vcomp = "vcomp" in [module["prefix"] for module in modules]
876             has_mkl = ("mkl", "intel") in [
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in threadpool_info()
122     In addition, each module may contain internal_api specific entries.
123     """
--> 124     return _ThreadpoolInfo(user_api=_ALL_USER_APIS).todicts()
125 
126 
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in __init__(self, user_api, prefixes, modules)
338 
339             self.modules = []
--> 340             self._load_modules()
341             self._warn_if_incompatible_openmp()
342         else:
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in _load_modules(self)
369         """Loop through loaded libraries and store supported ones"""
370         if sys.platform == "darwin":
--> 371             self._find_modules_with_dyld()
372         elif sys.platform == "win32":
373             self._find_modules_with_enum_process_module_ex()
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in _find_modules_with_dyld(self)
426 
427             # Store the module if it is supported and selected
--> 428             self._make_module_from_path(filepath)
429 
430     def _find_modules_with_enum_process_module_ex(self):
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in _make_module_from_path(self, filepath)
513             if prefix in self.prefixes or user_api in self.user_api:
514                 module_class = globals()[module_class]
--> 515                 module = module_class(filepath, prefix, user_api, internal_api)
516                 self.modules.append(module)
517 
/usr/local/lib/python3.9/site-packages/threadpoolctl.py in __init__(self, filepath, prefix, user_api, internal_api)
603         self.user_api = user_api
604         self.internal_api = internal_api
--> 605         self._dynlib = ctypes.CDLL(filepath, mode=_RTLD_NOLOAD)
606         self.version = self.get_version()
607         self.num_threads = self.get_num_threads()
/usr/local/Cellar/python@3.9/3.9.1_4/Frameworks/Python.framework/Versions/3.9/lib/python3.9/ctypes/__init__.py in __init__(self, name, mode, handle, use_errno, use_last_error, winmode)
372 
373         if handle is None:
--> 374             self._handle = _dlopen(self._name, mode)
375         else:
376             self._handle = handle
OSError: image not already loaded

但是,如果我用下面的代码替换上面的代码,它可以正常工作:

clusters = range(1, 9)
meanDistortions = []
for k in clusters:
model = KMeans(n_clusters=8)

不传递"k"到KMeans(n_clusters=),如果我传递一个整数,它工作得很好。我不知道是哪里出了问题,如果有任何建议,我将不胜感激。

谢谢!

尝试更新您的sklearn包:pip install -U sklearn

我假设它抛出k=1的错误,并为k>1. 这也解释了你的工作修改。所以你可以用range(2, 9)作为快速解决方法。我在最近的sklean环境(0.24.2)中观察到同样的情况。

最新更新