下面的脚本执行以下操作:
1( 制作一个包含 200 行的数据框
2(将DF排序为对象列表,多处理,使每个内核将四分之一的DF放入自己的列表中
3(将列表粘成一个大列表并打印
问题 = 列表为空,几乎就像每个进程中都没有调用 get_car_terms 函数而没有错误消息
import random
import psutil
import pandas as pd
import multiprocessing as mp
class car_term(): #object to go into list
def __init__(self, capcode, miles,months , cmprice, fmprice ):
self.capcode = capcode
self.months = months
self.miles = miles
self.cmprice = cmprice
self.fmprice = fmprice
df_final = pd.DataFrame({'capcode':[],'months':[],'mileage':[],'cm':[],'fm':[]})
for i in range (200): # making dataframe to get data from
df_final.append(pd.DataFrame({'capcode':[i],'months':[random.randint(1, 12)],'mileage':[random.randint(0, 10000)],'cm':[random.randint(5, 700)],'fm':[random.randint(15, 710)]}))
all_deals=[] # this is the list i want to put my objects into
def get_car_terms(data,mdb1,all_deals1):
all_deals1.append(car_term(mdb1['capcode'][data],mdb1['mileage'][data],mdb1['months'][data],mdb1['cm'][data],mdb1['fm'][data])) # i make the objects with the dataframe like this
all_deals1a=[] # individual lists for each proccessor
all_deals2a=[]
all_deals3a=[]
all_deals4a=[]
print("yo1")
if __name__ == "__main__":
n_cpus = psutil.cpu_count() # number of cpus
print(n_cpus) # i have 4 cpus
if df_final.shape[0]%n_cpus == 0:
for i in range(int(df_final.shape[0]/n_cpus)):
############# the problem is the get_car_terms function doesnt run below
p1 = mp.Proccess(target = get_car_terms,args = (i+((df_final.shape[0]/n_cpus)*1), df_final,all_deals1a)) # each cpu sorts a quater of the dataframe into my objects list
p2 = mp.Proccess(target = get_car_terms,args = (i+((df_final.shape[0]/n_cpus)*2), df_final,all_deals2a))
p3 = mp.Proccess(target = get_car_terms,args = (i+((df_final.shape[0]/n_cpus)*3), df_final,all_deals3a))
p4 = mp.Proccess(target = get_car_terms,args = (i+((df_final.shape[0]/n_cpus)*4), df_final,all_deals4a))
p1.start()
p2.start()
p3.start()
p4.start()
p1.end()
p2.end()
p3.end()
p4.end()
all_deals.append(all_deals1a) # group lists together
all_deals.append(all_deals2a)
all_deals.append(all_deals3a)
all_deals.append(all_deals4a)
print("we did it")
print(len(all_deals)) # this should have 200 of my objects in it... it doesnt
for i in all_deals:
print(i.capcode)
你在.start()
后立即调用了.end()
,所以多处理没有获得工作所需的时间。我建议在开始和结束之间运行time.sleep(1)
,以给他们所需的时间。