Python中跨多个程序的只读共享内存



我想将数据加载到内存一次,并希望其他进程在此期间访问(只读)此数据。这些进程基本上是不同的python程序,在不同的时间被调用(当然是在加载数据之后)。

为了实现这个功能,我使用共享内存。请参见下面的代码片段:

server.py

import numpy as np
from multiprocessing import shared_memory

class DataUploader:
def __init__(self, shared_memory_name):
# let's share the following two numpy arrays
self._uint_np = np.random.randint(0, 255, size=(64, 4, 28, 28)).astype(np.uint8)
self._float_np = np.random.rand(64, 8).astype(np.float32)
name_1 = f"{shared_memory_name}_uint_np"
name_2 = f"{shared_memory_name}_float_np"
self._shm_1 = shared_memory.SharedMemory(name=name_1, create=True, size=self._uint_np.nbytes)
self._shm_2 = shared_memory.SharedMemory(name=name_2, create=True, size=self._float_np.nbytes)
# now create a numpy array backed by shared memory
self._shared_1 = np.ndarray(self._uint_np.shape, dtype=self._uint_np.dtype, buffer=self._shm_1.buf)
self._shared_2 = np.ndarray(self._float_np.shape, dtype=self._float_np.dtype, buffer=self._shm_2.buf)
# copy the original data into shared memory
self._shared_1[:] = self._uint_np[:]
self._shared_2[:] = self._float_np[:]
def __del__(self):
if self._shm_1 is not None and self._shm_2 is not None:
self._shm_1.close()
self._shm_1.unlink()
self._shm_2.close()
self._shm_2.unlink()
print("Shared memory destroyed")

if __name__ == "__main__":
data_uploader = DataUploader(shared_memory_name="test")
# keep running the program forever
input(f'Press "enter" key to exit: ')

client.py

import numpy as np
from multiprocessing import shared_memory

class DataProvider:
def __init__(self, shared_memory_name):
self._existing_shm_1 = shared_memory.SharedMemory(name=f"{shared_memory_name}_uint_np")
self._existing_shm_2 = shared_memory.SharedMemory(name=f"{shared_memory_name}_float_np")
self._uint_np = np.ndarray((64, 4, 28, 28), dtype=np.uint8, buffer=self._existing_shm_1.buf)
self._float_np = np.ndarray((64, 8), dtype=np.float32, buffer=self._existing_shm_2.buf)
def get_item(self, idx):
uint_np = self._uint_np[idx]
float_np = self._float_np[idx]
return uint_np, float_np
def __del__(self):
if self._existing_shm_1 is not None and self._existing_shm_2 is not None:
self._existing_shm_1.close()
self._existing_shm_2.close()

if __name__ == "__main__":
data_provider = DataProvider(shared_memory_name="test")
uint_np, float_np = data_provider.get_item(0)
# just print some information about the accessed data
print(uint_np.std(), float_np.std())

在执行一次server.py后,我希望多次执行client.py以访问(只读)数据。但是,在第一次执行client.py之后,出现以下警告:

$ python client.py 
73.84145388455019 0.25972846
/home/ravi/tools/anaconda/envs/py39/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 2 leaked shared_memory objects to clean up at shutdown
warnings.warn('resource_tracker: There appear to be %d '

第二次运行时,client.py抛出以下错误:

$ python client.py 
Traceback (most recent call last):
File "/home/ravi/test/client.py", line 34, in <module>
data_provider = DataProvider(shared_memory_name="test")
File "/home/ravi/test/client.py", line 16, in __init__
self._existing_shm_1 = shared_memory.SharedMemory(name=f"{shared_memory_name}_uint_np")
File "/home/ravi/tools/anaconda/envs/py39/lib/python3.9/multiprocessing/shared_memory.py", line 103, in __init__
self._fd = _posixshmem.shm_open(
FileNotFoundError: [Errno 2] No such file or directory: '/test_uint_np'

显然,共享内存在第一次访问后被破坏/不可达。

操作系统信息:

$ lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description:    Ubuntu 18.04.6 LTS
Release:    18.04
Codename:   bionic
$ uname -r
5.4.0-86-generic

是否有一种方法来保持活的共享内存和访问(只读)它从不同的程序多次?

我在这个帖子里找到了答案。close()被认为是子进程,而不是独立进程,因此在close()之后,共享对象不会从resource_tracker注销。解决方案是手动关闭它们:

from multiprocessing import resource_tracker
...
def __del__(self):
if self._existing_shm_1 is not None and self._existing_shm_2 is not None:
self._existing_shm_1.close()
self._existing_shm_2.close()
resource_tracker.unregister(self._existing_shm_1._name, "shared_memory")
resource_tracker.unregister(self._existing_shm_2._name, "shared_memory")

注意:resource_tracker.unregister(self._existing_shm_2.name, "shared_memory")不工作,""不见了。

编辑:我想说没有办法保持shared_memory活着没有手动注册从resurce_tracker。此时,resource_tracker将始终删除泄漏的共享内存。

最新更新