Python多处理,打开的文件太多



我目前正在尝试构建一个基于单个数据帧保存10000-50000个图像的scipt

为了加快这个过程,我正在尝试使用下面的脚本来实现多处理。我得到大约4000个图像之前,我得到一个错误关于太多打开的文件

# import multiprocessing
import multiprocessing as mp
# Create all image files and save to folder
week_list = df[['instrument_id', 'year', 'week_of_year']].drop_duplicates().to_numpy().tolist()
# define function that creates plot image
def create_image(instrument_id, year, week_of_year):
plt.clf()
# first slice the dataframe to only contain data from the first week
weekDF = df[(df['instrument_id'] == instrument_id) & (df['year'] == year) & (df['week_of_year'] == week_of_year)]
# plot day_pct
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(2,2))
weekDF.plot(
ax=axes[0,0],
x='previous_months_rows_id',
y='day_pct',
kind='kde'
),
weekDF.plot(
ax=axes[0,1],
x='previous_months_rows_id',
y='day_pct'
),
weekDF.plot(
ax=axes[1,0],
x='previous_months_rows_id',
y='day_volume',
kind='kde'
),
weekDF.plot(
ax=axes[1,1],
x='previous_months_rows_id',
y='day_volume'
)
axes[0,0].axis("off")
axes[1,0].axis("off")
axes[0,1].axis("off")
axes[1,1].axis("off")
axes[0,0].get_legend().remove()
axes[1,0].get_legend().remove()
axes[0,1].get_legend().remove()
axes[1,1].get_legend().remove()
plt.tight_layout()
plt.ioff()
fig.savefig('/home/henrik/Dokumenter/Stock/img/{instrument_id}_{year}_{week_of_year}.jpg'.format(instrument_id = instrument_id, year =year, week_of_year = week_of_year))
plt.close(fig)
processes = [mp.Process(target=create_image, args=(week[0], week[1], week[2])) for week in week_list]
# Run processes
for p in processes:
p.start()
# Exit the completed processes
for p in processes:
p.join()

我得到的错误:

OSError                                   Traceback (most recent call last)
<ipython-input-6-901de27246d0> in <module>
56 # Run processes
57 for p in processes:
---> 58     p.start()
59 
60 # Exit the completed processes
~/anaconda3/lib/python3.7/multiprocessing/process.py in start(self)
110                'daemonic processes are not allowed to have children'
111         _cleanup()
--> 112         self._popen = self._Popen(self)
113         self._sentinel = self._popen.sentinel
114         # Avoid a refcycle if the target function holds an indirect
~/anaconda3/lib/python3.7/multiprocessing/context.py in _Popen(process_obj)
221     @staticmethod
222     def _Popen(process_obj):
--> 223         return _default_context.get_context().Process._Popen(process_obj)
224 
225 class DefaultContext(BaseContext):
~/anaconda3/lib/python3.7/multiprocessing/context.py in _Popen(process_obj)
275         def _Popen(process_obj):
276             from .popen_fork import Popen
--> 277             return Popen(process_obj)
278 
279     class SpawnProcess(process.BaseProcess):
~/anaconda3/lib/python3.7/multiprocessing/popen_fork.py in __init__(self, process_obj)
18         self.returncode = None
19         self.finalizer = None
---> 20         self._launch(process_obj)
21 
22     def duplicate_for_child(self, fd):
~/anaconda3/lib/python3.7/multiprocessing/popen_fork.py in _launch(self, process_obj)
67     def _launch(self, process_obj):
68         code = 1
---> 69         parent_r, child_w = os.pipe()
70         self.pid = os.fork()
71         if self.pid == 0:
OSError: [Errno 24] Too many open files

关于这个问题的原因和解决方案,有什么想法吗?

以下代码基于CJR建议解决了我的问题

pool = mp.Pool(processes=12)
pool.starmap(create_image, week_list)
pool.close()

相关内容

  • 没有找到相关文章

最新更新