我目前正在尝试构建一个基于单个数据帧保存10000-50000个图像的scipt
为了加快这个过程,我正在尝试使用下面的脚本来实现多处理。我得到大约4000个图像之前,我得到一个错误关于太多打开的文件
# import multiprocessing
import multiprocessing as mp
# Create all image files and save to folder
week_list = df[['instrument_id', 'year', 'week_of_year']].drop_duplicates().to_numpy().tolist()
# define function that creates plot image
def create_image(instrument_id, year, week_of_year):
plt.clf()
# first slice the dataframe to only contain data from the first week
weekDF = df[(df['instrument_id'] == instrument_id) & (df['year'] == year) & (df['week_of_year'] == week_of_year)]
# plot day_pct
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(2,2))
weekDF.plot(
ax=axes[0,0],
x='previous_months_rows_id',
y='day_pct',
kind='kde'
),
weekDF.plot(
ax=axes[0,1],
x='previous_months_rows_id',
y='day_pct'
),
weekDF.plot(
ax=axes[1,0],
x='previous_months_rows_id',
y='day_volume',
kind='kde'
),
weekDF.plot(
ax=axes[1,1],
x='previous_months_rows_id',
y='day_volume'
)
axes[0,0].axis("off")
axes[1,0].axis("off")
axes[0,1].axis("off")
axes[1,1].axis("off")
axes[0,0].get_legend().remove()
axes[1,0].get_legend().remove()
axes[0,1].get_legend().remove()
axes[1,1].get_legend().remove()
plt.tight_layout()
plt.ioff()
fig.savefig('/home/henrik/Dokumenter/Stock/img/{instrument_id}_{year}_{week_of_year}.jpg'.format(instrument_id = instrument_id, year =year, week_of_year = week_of_year))
plt.close(fig)
processes = [mp.Process(target=create_image, args=(week[0], week[1], week[2])) for week in week_list]
# Run processes
for p in processes:
p.start()
# Exit the completed processes
for p in processes:
p.join()
我得到的错误:
OSError Traceback (most recent call last)
<ipython-input-6-901de27246d0> in <module>
56 # Run processes
57 for p in processes:
---> 58 p.start()
59
60 # Exit the completed processes
~/anaconda3/lib/python3.7/multiprocessing/process.py in start(self)
110 'daemonic processes are not allowed to have children'
111 _cleanup()
--> 112 self._popen = self._Popen(self)
113 self._sentinel = self._popen.sentinel
114 # Avoid a refcycle if the target function holds an indirect
~/anaconda3/lib/python3.7/multiprocessing/context.py in _Popen(process_obj)
221 @staticmethod
222 def _Popen(process_obj):
--> 223 return _default_context.get_context().Process._Popen(process_obj)
224
225 class DefaultContext(BaseContext):
~/anaconda3/lib/python3.7/multiprocessing/context.py in _Popen(process_obj)
275 def _Popen(process_obj):
276 from .popen_fork import Popen
--> 277 return Popen(process_obj)
278
279 class SpawnProcess(process.BaseProcess):
~/anaconda3/lib/python3.7/multiprocessing/popen_fork.py in __init__(self, process_obj)
18 self.returncode = None
19 self.finalizer = None
---> 20 self._launch(process_obj)
21
22 def duplicate_for_child(self, fd):
~/anaconda3/lib/python3.7/multiprocessing/popen_fork.py in _launch(self, process_obj)
67 def _launch(self, process_obj):
68 code = 1
---> 69 parent_r, child_w = os.pipe()
70 self.pid = os.fork()
71 if self.pid == 0:
OSError: [Errno 24] Too many open files
关于这个问题的原因和解决方案,有什么想法吗?
以下代码基于CJR建议解决了我的问题
pool = mp.Pool(processes=12)
pool.starmap(create_image, week_list)
pool.close()