如何在 Python 中的两个独立进程之间来回交换数据



PROBLEM

有两个独立的进程并行运行,我希望它们来回通信。

代码说明

代码在 Python 2.7 中。在我的精简到最小脚本中,我使用队列进行进程之间的通信。进程 p1 将数据放入队列中。进程 p2 从队列中获取数据并对数据执行某些操作。然后,进程 p2 将修改后的数据放回队列中,最后进程 p1 从队列中取回修改后的数据。修改后的数据必须返回到进程 p1,因为此进程实际上是发送/接收请求的事件服务器。

法典

#!/usr/bin/python2.7 python2.7
# -*- coding: utf-8 -*-
# script for back-and-forth data exchange between processes
# common modules
import os
import sys
import time
from multiprocessing import Process
from multiprocessing import Queue
from datetime import datetime
someData = {}
class Load():
def post(self):
timestamp = str(datetime.now())
someData = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
queue1.put(someData)        # put into queue
print "#20 process 1: put in queue1 =>", someData
time.sleep(3)
while True:     # queue1 checking loop, comment out the loop if use time.sleep only
if queue1.empty() == False:
timestamp = str(datetime.now())
res = queue1.get()
res = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
print "#28 get from queue1 =>", res
break
else:
print "#31 queue1 empty"
time.sleep(1)
# while True:       # queue2 checking loop
#   if queue2.empty() == False:
#       timestamp = str(datetime.now())
#       res = queue2.get()
#       res = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
#       print "#39 get from queue2 =>", res
#       break
#   else:
#       print "#42 queue2 empty"
#       time.sleep(1)
class Unload():
def get(self):
try:
if queue1.empty() == False:
data = queue1.get()     # retrieve package from queue
#queue1.close()
#queue1.join_thread()
timestamp = str(datetime.now())
data = {"process":"p2","class":"Unload()","method":"get()","timestamp":timestamp} 
print "#54 process 2: get from queue1 =>", data
self.doSomething(data)  # call method
else:
print "#57 queue1 empty"
pass
except:
print "#60 queue1 error"
pass
def doSomething(self, data):
time.sleep(3)
timestamp = str(datetime.now())
someData = {"process":"p2","class":"Unload()","method":"doSomething()","timestamp":timestamp}
self.someData = someData
print "#68 process 2: do something =>", someData
self.put()
def put(self):
time.sleep(3)
timestamp = str(datetime.now())
self.someData = {"process":"p2","class":"Unload()","method":"put()","timestamp":timestamp}
print "#75 process 2: put back in queue1 =>", self.someData
res = self.someData
queue1.put(res)
#print "#78 process 2: put back in queue2 =>", self.someData
#res = self.someData
#queue2.put(res)
#queue2.close()
#queue2.join_thread()
# main 
if __name__ == '__main__':
queue1 = Queue()
#queue2 = Queue()
global p1, p2
p1 = Process(target=Load().post(), args=(queue1,))      # process p1
#p1 = Process(target=Load().post(), args=(queue1,queue2,))
p1.daemon = True
p1.start() 
p2 = Process(target=Unload().get(), args=(queue1,))     # process p2
#p2 = Process(target=Unload().get(), args=(queue1,queue2,))
p2.start()
p2.join()

问题我已经检查了其他资源,但它们都涉及单向通信。以下是资源列表。

  1. use-get-nowait-in-python-without-raising-empty-exception
  2. in-python-how-do-you-get-data-back-from-a-specific-process-using-multiprocess
  3. 如何使用多处理队列带锁定
  4. 多处理模块支持锁
  5. 我可以暂停和恢复的线程
  6. 在两个 python 进程之间交换数据

如何让进程 1 等待并从进程 2 检索修改后的数据?我是否应该考虑另一种进程之间的通信方法,例如管道,zeroMQ?

尝试 1:在进程 1 中使用不带 while 循环的 time.sleep() 只有 time.sleep数据在队列中上升到后面,但永远不会到达进程 1 中的最终目的地。到目前为止一切顺利,但缺少最后一步。结果如下。

#20 process 1: put in queue1 => {'process': 'p1', 'timestamp': '2020-02-23 11:40:30.234466', 'class': 'Load()', 'method': 'post()'}
#54 process 2: get from queue1 => {'process': 'p2', 'timestamp': '2020-02-23 11:40:33.239113', 'class': 'Unload()', 'method': 'get()'}
#68 process 2: do something => {'process': 'p2', 'timestamp': '2020-02-23 11:40:36.242500', 'class': 'Unload()', 'method': 'doSomething()'}
#75 process 2: put back in queue1 => {'process': 'p2', 'timestamp': '2020-02-23 11:40:39.245856', 'class': 'Unload()', 'method': 'put()'}

尝试 2:在进程 1 中使用 while 循环 通过 while循环检查队列,数据进入队列,但随后立即被捕获,它们永远不会到达进程 2。结果如下。

#20 process 1: put in queue1 => {'process': 'p1', 'timestamp': '2020-02-23 11:46:14.606356', 'class': 'Load()', 'method': 'post()'}
#28 get from queue1 => {'process': 'p1', 'timestamp': '2020-02-23 11:46:17.610202', 'class': 'Load()', 'method': 'post()'}
#57 queue1 empty

尝试 3:使用两个队列 使用两个队列:队列 1 从进程 1 到进程 2,队列2 从进程 2 到进程 1。数据进入队列 1 但不返回队列 2,它们神秘地消失了。结果如下。

#20 process 1: put in queue1 => {'process': 'p1', 'timestamp': '2020-02-23 11:53:39.745177', 'class': 'Load()', 'method': 'post()'}
#42 queue2 empty

-----更新20200224:尝试 4、5 和 6 -----------------------------------------------------------------

尝试 4:对管理器使用两个队列。队列()

对管理器使用两个队列。队列():从进程 1 到进程 2 的队列 1,从进程 2 到进程 1 的队列 2。数据进入队列 1 但不返回队列 2,它们再次神秘地消失了。代码和结果如下。

尝试代码 4: #!/usr/bin/python2.7 python2.7 # -- 编码:UTF-8 --# 用于序列化进程间数据交换的脚本

# common modules
import os
import sys
import time
import multiprocessing
from multiprocessing import Process
from multiprocessing import Queue
from multiprocessing import Manager
from datetime import datetime
someData = {}
manager = multiprocessing.Manager()
queue1 = manager.Queue()
queue2 = manager.Queue()
class Load():
def post(self):
timestamp = str(datetime.now())
someData = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
queue1.put(someData)        # put into queue
print "#20 process 1: put in queue1 =>", someData
time.sleep(3)
# while True:       # queue1 checking loop
#   if queue1.empty() == False:
#       timestamp = str(datetime.now())
#       res = queue1.get()
#       res = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
#       print "#28 get from queue1 =>", res
#       break
#   else:
#       print "#31 queue1 empty"
#       time.sleep(1)
while True:     # queue2 checking loop
if queue2.empty() == False:
timestamp = str(datetime.now())
res = queue2.get()
res = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
print "#39 get from queue2 =>", res
break
else:
print "#42 queue2 empty"
time.sleep(1)
class Unload():
def get(self):
try:
if queue1.empty() == False:
data = queue1.get()     # retrieve package from queue
#queue1.close()
#queue1.join_thread()
timestamp = str(datetime.now())
data = {"process":"p2","class":"Unload()","method":"get()","timestamp":timestamp} 
print "#54 process 2: get from queue1 =>", data
self.doSomething(data)  # call method
else:
print "#57 queue1 empty"
pass
except:
print "#60 queue1 error"
pass
def doSomething(self, data):
time.sleep(3)
timestamp = str(datetime.now())
someData = {"process":"p2","class":"Unload()","method":"doSomething()","timestamp":timestamp}
self.someData = someData
print "#68 process 2: do something =>", someData
self.put()
def put(self):
time.sleep(3)
timestamp = str(datetime.now())
self.someData = {"process":"p2","class":"Unload()","method":"put()","timestamp":timestamp}
res = self.someData
#print "#75 process 2: put back in queue1 =>", self.someData
#queue1.put(res)
print "#78 process 2: put back in queue2 =>", self.someData
queue2.put(res)
#queue2.close()
#queue2.join_thread()
# main 
if __name__ == '__main__':
manager = multiprocessing.Manager()
queue1 = manager.Queue()
queue2 = manager.Queue()
global p1, p2
#p1 = Process(target=Load().post(), args=(queue1,))     # process p1
p1 = Process(target=Load().post(), args=(queue1,queue2,))
p1.daemon = True
p1.start() 
#p2 = Process(target=Unload().get(), args=(queue1,))        # process p2
p2 = Process(target=Unload().get(), args=(queue1,queue2,))
p2.start()
p2.join()

尝试的结果 4:

#20 process 1: put in queue1 => {'process': 'p1', 'timestamp': '2020-02-24 13:06:17.687762', 'class': 'Load()', 'method': 'post()'}
#42 queue2 empty

尝试 5:将一个队列与管理器一起使用。队列()与管理器使用一个队列。队列():队列 1 从进程 1 返回到进程 2,队列 1 从进程 2 返回进程 1。数据进入队列 1 但随后立即被捕获,它们永远不会到达进程 2。代码结果如下。

尝试的代码 5:

#!/usr/bin/python2.7 python2.7
# -*- coding: utf-8 -*-
# script for serialized interprocess data exchange
# common modules
import os
import sys
import time
import multiprocessing
from multiprocessing import Process
from multiprocessing import Queue
from multiprocessing import Manager
from datetime import datetime
someData = {}
manager = multiprocessing.Manager()
queue1 = manager.Queue()
#queue2 = manager.Queue()
class Load():
def post(self):
timestamp = str(datetime.now())
someData = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
queue1.put(someData)        # put into queue
print "#25 process 1: put in queue1 =>", someData
time.sleep(3)
while True:     # queue1 checking loop
if queue1.empty() == False:
timestamp = str(datetime.now())
res = queue1.get()
res = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
print "#33 get from queue1 =>", res
break
else:
print "#36 queue1 empty"
time.sleep(1)
# while True:       # queue2 checking loop
#   if queue2.empty() == False:
#       timestamp = str(datetime.now())
#       res = queue2.get()
#       res = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
#       print "#44 get from queue2 =>", res
#       break
#   else:
#       print "#47 queue2 empty"
#       time.sleep(1)
class Unload():
def get(self):
try:
if queue1.empty() == False:
data = queue1.get()     # retrieve package from queue
#queue1.close()
#queue1.join_thread()
timestamp = str(datetime.now())
data = {"process":"p2","class":"Unload()","method":"get()","timestamp":timestamp} 
print "#59 process 2: get from queue1 =>", data
self.doSomething(data)  # call method
else:
print "#62 queue1 empty"
pass
except:
print "#65 queue1 error"
pass
def doSomething(self, data):
time.sleep(3)
timestamp = str(datetime.now())
someData = {"process":"p2","class":"Unload()","method":"doSomething()","timestamp":timestamp}
self.someData = someData
print "#73 process 2: do something =>", someData
self.put()
def put(self):
time.sleep(3)
timestamp = str(datetime.now())
self.someData = {"process":"p2","class":"Unload()","method":"put()","timestamp":timestamp}
res = self.someData
print "#81 process 2: put back in queue1 =>", self.someData
queue1.put(res)
#print "#83 process 2: put back in queue2 =>", self.someData
#queue2.put(res)
#queue2.close()
#queue2.join_thread()
# main 
if __name__ == '__main__':
manager = multiprocessing.Manager()
queue1 = manager.Queue()
#queue2 = manager.Queue()
global p1, p2
p1 = Process(target=Load().post(), args=(queue1,))      # process p1
#p1 = Process(target=Load().post(), args=(queue1,queue2,))
p1.daemon = True
p1.start() 
p2 = Process(target=Unload().get(), args=(queue1,))     # process p2
#p2 = Process(target=Unload().get(), args=(queue1,queue2,))
p2.start()
p2.join()

尝试的结果 5:

#25 process 1: put in queue1 => {'process': 'p1', 'timestamp': '2020-02-24 14:08:13.975886', 'class': 'Load()', 'method': 'post()'}
#33 get from queue1 => {'process': 'p1', 'timestamp': '2020-02-24 14:08:16.980382', 'class': 'Load()', 'method': 'post()'}
#62 queue1 empty

尝试 6:使用队列超时

正如建议的那样,我试图纠正队列超时。该方法再次是从进程 1 到进程 2 的队列 1,从进程 2 到进程 1 的队列 2。数据进入队列 1 但不返回队列 2,它们再次神秘地消失了。代码和结果如下。

尝试的代码 6:

#!/usr/bin/python2.7 python2.7
# -*- coding: utf-8 -*-
# script for serialized interprocess data exchange
# common modules
import os
import sys
import time
import uuid
import Queue
#from Queue import Empty
import multiprocessing
from multiprocessing import Process
#from multiprocessing import Queue
from datetime import datetime
someData = {}
class Load():
def post(self):
timestamp = str(datetime.now())
someData = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
queue1.put(someData)        # put into queue
print "#24 process 1: put in queue1 =>", someData
time.sleep(3)
# while True:       # queue1 checking loop
#   if queue1.empty() == False:
#       timestamp = str(datetime.now())
#       res = queue1.get()
#       res = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
#       print "#33 get from queue1 =>", res
#       break
#   else:
#       print "#36 queue1 empty"
#       time.sleep(1)
while True:     # queue2 checking loop
try:
someData = queue2.get(True,1)
timestamp = str(datetime.now())
someData = {"process":"p1","class":"Load()","method":"post()","timestamp":timestamp}
print "#43 process 1: got from queue2 =>", someData
break
except Queue.Empty:
print "#46 process1: queue2 empty"
continue
class Unload():
def get(self):
while True:     # queue2 checking loop
try:
someData = queue1.get(True,1)
timestamp = str(datetime.now())
someData = {"process":"p2","class":"Unload()","method":"get()","timestamp":timestamp} 
print "#56 process2: got from queue1 =>", someData
break
except Queue.Empty:
print "#59 process2: queue1 empty"
continue
self.doSomething(someData)  # call method
def doSomething(self, data):
time.sleep(3)
timestamp = str(datetime.now())
someData = {"process":"p2","class":"Unload()","method":"doSomething()","timestamp":timestamp}
self.someData = someData
print "#68 process2: do something =>", someData
self.put(someData)
def put(self,data):
time.sleep(3)
timestamp = str(datetime.now())
self.someData = {"process":"p2","class":"Unload()","method":"put()","timestamp":timestamp}
someData = self.someData
#print "#81 process 2: put back in queue1 =>", self.someData
#queue1.put(res)
print "#78 process2: put back in queue2 =>", someData
queue2.put(someData)

# main 
if __name__ == '__main__':
queue1 = multiprocessing.Queue()
queue2 = multiprocessing.Queue()
global p1, p2
#p1 = Process(target=Load().post(), args=(queue1,))     # process p1
p1 = Process(target=Load().post(), args=(queue1,queue2,))
p1.daemon = True
p1.start() 
#p2 = Process(target=Unload().get(), args=(queue1,))        # process p2
p2 = Process(target=Unload().get(), args=(queue1,queue2,))
p2.start()
p2.join()

尝试结果6:

#24 process 1: put in queue1 => {'process': 'p1', 'timestamp': '2020-02-24 18:14:46.435661', 'class': 'Load()', 'method': 'post()'}
#46 process1: queue2 empty

注意:当我在没有类的情况下使用它时,建议的方法有效。代码如下:

import uuid
import multiprocessing
from multiprocessing import Process
import Queue

def load(que_in, que_out):
request = {"id": uuid.uuid4(), "workload": "do_stuff", }
que_in.put(request)
print("load: sent request {}: {}".format(request["id"], request["workload"]))
while True:
try:
result = que_out.get(True, 1)
except Queue.Empty:
continue
print("load: got result {}: {}".format(result["id"], result["result"]))

def unload(que_in, que_out):
def processed(request):
return {"id": request["id"], "result": request["workload"] + " processed", }
while True:
try:
request = que_in.get(True, 1)
except Queue.Empty:
continue
print("unload: got request {}: {}".format(request["id"], request["workload"]))
result = processed(request)
que_out.put(result)
print("unload: sent result {}: {}".format(result["id"], result["result"]))

# main
if __name__ == '__main__':
que_in = multiprocessing.Queue()
que_out = multiprocessing.Queue()
p1 = Process(target=load, args=(que_in, que_out))      # process p1
p1.daemon = True
p1.start()
p2 = Process(target=unload, args=(que_in, que_out))     # process p2
p2.start()
p2.join()

-----更新20200225:尝试 7 ------------------------------------------------------------------------------

尝试 7:使用一个队列在不同类中具有队列超时(工作)

在这个尝试中,我在不同类的方法之间使用一个共享队列,并更正了超时。数据在shared_queue中从进程 1 返回到进程 2,再从进程 2 返回到进程 1。在此尝试中,数据正确传输。代码和结果如下。

尝试的代码 7:

import uuid
import multiprocessing
from multiprocessing import Process
import Queue
class Input():
def load(self, shared_queue):
request = {"id": uuid.uuid4(), "workload": "do_stuff", }
shared_queue.put(request)
print("load: sent request {}: {}".format(request["id"], request["workload"]))
while True:
try:
result = shared_queue.get(True, 1)
except Queue.Empty:
continue
print("load: got result {}: {}".format(result["id"], result["result"]))
break
class Output():
def unload(self, shared_queue):
def processed(request):
return {"id": request["id"], "result": request["workload"] + " processed", }
while True:
try:
request = shared_queue.get(True, 1)
except Queue.Empty:
continue
print("unload: got request {}: {}".format(request["id"], request["workload"]))
result = processed(request)
shared_queue.put(result)
print("unload: sent result {}: {}".format(result["id"], result["result"]))

# main
if __name__ == '__main__':
shared_queue = multiprocessing.Queue()
up = Input()
down = Output()
p1 = Process(target=up.load, args=(shared_queue,))      # process p1
p1.daemon = True
p1.start()

p2 = Process(target=down.unload, args=(shared_queue,))     # process p2
p2.start()
p1.join()
p2.join()

尝试的结果 7:

load: sent request a461357a-b39a-43c4-89a8-a77486a5bf45: do_stuff
unload: got request a461357a-b39a-43c4-89a8-a77486a5bf45: do_stuff
unload: sent result a461357a-b39a-43c4-89a8-a77486a5bf45: do_stuff processed
load: got result a461357a-b39a-43c4-89a8-a77486a5bf45: do_stuff processed

解决方案:使用一个共享队列

在遵循建议并进行一些调整以正确定位不同的类方法后,我解决了这个问题。数据在两个独立进程之间的来回流动现在是正确的。对我来说,一个重要的注意事项是要特别注意两个独立进程之间交换的someData包,它实际上必须是被扔来扔去的同一个包。因此,标识符条目"id": uuid.uuid4()检查每个段落的包是否相同。

#!/usr/bin/python2.7 python2.7
# -*- coding: utf-8 -*-
# script for back and forth communication between two separate processes using a shared queue
# common modules
import os
import sys
import time
import uuid
import Queue
import multiprocessing
from multiprocessing import Process
from datetime import datetime

someData = {}

class Load():
def post(self, sharedQueue):
timestamp = str(datetime.now()) # for timing checking
someData = {"timestamp":timestamp, "id": uuid.uuid4(), "workload": "do_stuff",}
self.someData = someData
sharedQueue.put(someData)       # put into the shared queue
print("#25 p1 load: sent someData {}: {}".format(someData["id"], someData["timestamp"], someData["workload"]))
time.sleep(1)   # for the time flow     
while True:     # sharedQueue checking loop
try:
time.sleep(1)   # for the time flow
timestamp = str(datetime.now())
someData = sharedQueue.get(True,1)
someData["timestamp"] = timestamp
print("#37 p1 load: got back someData {}: {}".format(someData["id"], someData["timestamp"], someData["workload"]))
break
except Queue.Empty:
print("#37 p1: sharedQueue empty")
continue
break

class Unload():
def get(self, sharedQueue):
while True:     # sharedQueue checking loop
try:
someData = sharedQueue.get(True,1)
self.someData = someData
timestamp = str(datetime.now())
someData["timestamp"] = timestamp
print("#50 p2 unload: got someData {}: {}".format(someData["id"], someData["timestamp"], someData["workload"]))
break
except Queue.Empty:
print("#53 p2: sharedQueue empty")
continue
time.sleep(1)               # for the time flow
self.doSomething(someData)  # pass the data to the method

def doSomething(self, someData):    # execute some code here
timestamp = str(datetime.now())
someData["timestamp"] = timestamp
print("#62 p2 unload: doSomething {}: {}".format(someData["id"], someData["timestamp"], someData["workload"]))
self.put(someData)
time.sleep(1)   # for the time flow

def put(self,someData):
timestamp = str(datetime.now())
someData["timestamp"] = timestamp
sharedQueue.put(someData)
print("#71 p2 unload: put someData {}: {}".format(someData["id"], someData["timestamp"], someData["workload"]))
time.sleep(1)   # for the time flow

# main 
if __name__ == '__main__':
sharedQueue = multiprocessing.Queue()
trx = Load()
rcx = Unload()
p1 = Process(target=trx.post, args=(sharedQueue,))      # process p1
p1.daemon = True
p1.start() 
p2 = Process(target=rcx.get, args=(sharedQueue,))       # process p2
p2.start()
p1.join()
p2.join()

我想你只是错过了队列超时的使用

try:
result = que_out.get(True, 1)
except queue.Empty:
continue

这个简化的示例可以帮助您:

import uuid
from multiprocessing import Process
from multiprocessing import Queue
import queue

def load(que_in, que_out):
request = {"id": uuid.uuid4(), "workload": "do_stuff", }
que_in.put(request)
print("load: sent request {}: {}".format(request["id"], request["workload"]))
while True:
try:
result = que_out.get(True, 1)
except queue.Empty:
continue
print("load: got result {}: {}".format(result["id"], result["result"]))

def unload(que_in, que_out):
def processed(request):
return {"id": request["id"], "result": request["workload"] + " processed", }
while True:
try:
request = que_in.get(True, 1)
except queue.Empty:
continue
print("unload: got request {}: {}".format(request["id"], request["workload"]))
result = processed(request)
que_out.put(result)
print("unload: sent result {}: {}".format(result["id"], result["result"]))
# main
if __name__ == '__main__':
que_in = Queue()
que_out = Queue()
p1 = Process(target=load, args=(que_in, que_out))      # process p1
p1.daemon = True
p1.start()
p2 = Process(target=unload, args=(que_in, que_out))     # process p2
p2.start()
p2.join()

输出

load: sent request d9894e41-3e8a-4474-9563-1a99797bc722: do_stuff
unload: got request d9894e41-3e8a-4474-9563-1a99797bc722: do_stuff
unload: sent result d9894e41-3e8a-4474-9563-1a99797bc722: do_stuff processed
load: got result d9894e41-3e8a-4474-9563-1a99797bc722: do_stuff processed

您必须使用管理器包装的队列在进程之间传播更改,否则每个进程都有其单独的队列对象,并且看不到其他进程。管理器为所有子进程创建队列的共享实例。

因此,queue1 = Queue()变得queue1 = manager.Queue()from multiprocessing import Manager位于顶部。如果要使用两个队列的方法,显然必须以相同的方式包装第二个队列。

相关资源:

来自一个多处理管理器的多个队列

蟒蛇文档

最新更新