我正在使用python的多处理库,但收到以下错误。与多处理相关的代码部分是:
pool = mp.Pool(mp.cpu_count()) # calculate stats for date in parallel
print('Parallelize on {} CPUs'.format(mp.cpu_count()))
date_range = [start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)]
stats = pool.starmap(get_stats_for_date, [(path, dct, tfidf_model, curr_date, threshold)
for curr_date in date_range])
pool.close()
"">
我得到的错误是
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/multiprocessing/pool.py", line 47, in starmapstar
return list(itertools.starmap(args[0], args[1]))
File "analysis_gensim.py", line 304, in get_stats_for_date
unclustered_articles_indices = get_articles_not_in_cluster(articles_day1, dct, tfidf_model, threshold=threshold)
File "analysis_gensim.py", line 154, in get_articles_not_in_cluster
for idx, similarities in enumerate(index):
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/site-packages/gensim/similarities/docsim.py", line 643, in __iter__
for chunk in self.iter_chunks():
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/site-packages/gensim/similarities/docsim.py", line 667, in iter_chunks
self.close_shard()
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/site-packages/gensim/similarities/docsim.py", line 436, in close_shard
shard = Shard(self.shardid2filename(shardid), index)
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/site-packages/gensim/similarities/docsim.py", line 118, in __init__
self.index = self.get_index()
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/site-packages/gensim/similarities/docsim.py", line 164, in get_index
self.index = self.cls.load(self.fullname(), mmap='r')
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/site-packages/gensim/utils.py", line 426, in load
obj = unpickle(fname)
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/site-packages/gensim/utils.py", line 1384, in unpickle
return _pickle.load(f, encoding='latin1')
_pickle.UnpicklingError: invalid load key, 'xc3'.
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "analysis_gensim.py", line 358, in <module>
main()
File "analysis_gensim.py", line 348, in main
df1, df2 = aggregate_by_year(db_path, dct, tfidf_model, year, threshold=threshold)
File "analysis_gensim.py", line 180, in aggregate_by_year
stats += [aggregate_by_month(path, dct, tfidf_model, year, month, avg=True, threshold=threshold)]
File "analysis_gensim.py", line 228, in aggregate_by_month
for curr_date in date_range])
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/multiprocessing/pool.py", line 276, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/home/ubuntu/anaconda3/envs/econ/lib/python3.7/multiprocessing/pool.py", line 657, in get
raise self._value
_pickle.UnpicklingError: invalid load key, 'xc3'.
我不明白这个问题。有人可以帮我解决这个问题吗? 谢谢
当容器内存不足时,我遇到了类似的问题。针对 26GB RAM 硬限制运行 16 个线程,并开始出现取消酸洗错误。代码在 64GB RAM 和 16 个线程上运行正常 ->因此容器的硬限制为 56GB RAM,现在可以使用 1,5GB RAM 运行。更改了我的代码以根据可用内存调整工作线程的数量。
我的错误是:
Process ForkPoolWorker-7:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/usr/local/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/usr/local/lib/python3.7/multiprocessing/pool.py", line 110, in worker
task = get()
File "/usr/local/lib/python3.7/multiprocessing/queues.py", line 354, in get
return _ForkingPickler.loads(res)
_pickle.UnpicklingError: invalid load key, 'xf5'.