我正试图解决一个涉及最小化某个函数的问题。函数包含一个numpy数组,其元素通过调用函数来填充。数组的生成需要花费大量的时间,我的意思是。我正在生成的数组定义在下面
def cov_p(Phi, l_mat, s_f, sigma_11, sigma_22, N_p):
ans = np.zeros([N_p, N_p, 2, 2], dtype=complex)
for i in range(N_p):
for j in range(N_p):
ans[i][j] = np.linalg.multi_dot([L_0, R_final(
Phi, l_mat, s_f, (i-j)*tstep), L_0.T])+np.array([[sigma_11, 0], [0, sigma_22]])*I(i, j)
return ans.transpose(0, 2, 1, 3).reshape(2*N_p, -1)
其中L_0和I定义在之下
L_0 = np.eye(2)
def I(m, p):
if m == p:
return 1
return 0
可以看出,每个元素都引用函数R_final,该函数基本上返回一个2乘2的复矩阵。然后,我将所有这些2乘2的矩阵连接起来,形成大小为2N_p乘2N_p的矩阵。R_final定义如下。
def R_x(Phi, l_mat, s_f, t, i, j):
real_integral = quad(S_xr, -np.inf, np.inf,
args=(Phi, l_mat, s_f, t, i, j), limit=10000)
imag_integral = quad(S_xc, -np.inf, np.inf,
args=(Phi, l_mat, s_f, t, i, j), limit=10000)
return real_integral[0]+1j*imag_integral[0]
def R_final(Phi, l_mat, s_f, t):
return np.array([[R_x(Phi, l_mat, s_f, t, i, j) for j in range(2)] for i in range(2)])
其中S_xr、S_xc显示在下方
def S_xr(omega, Phi, l_mat, s_f, t, i, j):
h_x = np.real(np.linalg.multi_dot(
[Phi, np.linalg.inv(-1j*omega*np.linalg.inv(l_mat)+np.eye(np.shape(l_mat)[0])), Phi.T])) #Taking only the real value. Check again!!!
ans = np.exp(1j*omega*t)*np.linalg.multi_dot([np.conjugate(h_x), s_f, h_x.T])
return np.real(ans[i][j])
def S_xc(omega, Phi, l_mat, s_f, t, i, j):
h_x = np.real(np.linalg.multi_dot(
[Phi, np.linalg.inv(-1j*omega*np.linalg.inv(l_mat)+np.eye(np.shape(l_mat)[0])), Phi.T])) #Taking only the real value. Check again!!!
ans = np.exp(1j*omega*t)*np.linalg.multi_dot([np.conjugate(h_x), s_f, h_x.T])
return np.imag(ans[i][j])
尝试为以下列出的参数值计算cov_p。
phi = np.array([[-0.0529255 +0.00662948j, -0.0529255 -0.00662948j,
-0.03050694-0.00190298j, -0.03050694+0.00190298j],
[-0.04149906+0.00171591j, -0.04149906-0.00171591j,
0.01974404-0.00194719j, 0.01974404+0.00194719j]])
lamb_mat = np.array([[-1.00390867 +6.28783994j, 0. +0.j ,
0. +0.j , 0. +0.j ],
[ 0. +0.j , -1.00390867 -6.28783994j,
0. +0.j , 0. +0.j ],
[ 0. +0.j , 0. +0.j ,
-0.25859133+12.09860357j, 0. +0.j ],
[ 0. +0.j , 0. +0.j ,
0. +0.j , -0.25859133-12.09860357j]])
S_f = np.array([[100,0],[0,100]])
tstep = 0.1
sigma_11 = 0.3
sigma_22 = 0.4
#Try finding cov_p for the following set of inputs
cov_p(phi, lamb_mat, S_f, sigma_11, sigma_22, 10)
我想问的是,如何通过将操作划分为多个过程来加快cov_p的填充。实际上,cov_p右边和底部的R_final矩阵需要时间。任何帮助都将不胜感激。
您可以使用多处理的Pool
轻松地并行化最外层的循环。其想法是在最后一个维度中拆分ans
数组,并在计算完每个部分后将其合并。请注意,tstep
变量需要发送到进程。以下是主要包含修改部分的结果代码:
import numpy as np
from scipy.integrate import quad
from multiprocessing import Pool
# [...] -- Code of the following unchanged function here
# I, R_x, R_final, S_xr, S_xc
def cov_p_work(params):
Phi, l_mat, s_f, sigma_11, sigma_22, N_p, tstep, i = params
ans = np.zeros([N_p, 2, 2], dtype=complex)
L_0 = np.eye(2)
for j in range(N_p):
ans[j] = np.linalg.multi_dot([L_0, R_final(
Phi, l_mat, s_f, (i-j)*tstep), L_0.T])+np.array([[sigma_11, 0], [0, sigma_22]])*I(i, j)
return ans
def cov_p(Phi, l_mat, s_f, sigma_11, sigma_22, N_p, tstep):
with Pool() as p:
params = [(Phi, l_mat, s_f, sigma_11, sigma_22, N_p, tstep, i) for i in range(N_p)]
result = p.map(cov_p_work, params)
ans = np.array(result)
return ans.transpose(0, 2, 1, 3).reshape(2*N_p, -1)
if __name__ == '__main__':
# [...] -- Unchanged parameters here
#Try finding cov_p for the following set of inputs
cov_p(phi, lamb_mat, S_f, sigma_11, sigma_22, 10, tstep)
在我的6核机器上,这大约快了5倍(96.9秒-->20.9秒(。请注意,由于N_p
在这里非常小(这可能会导致一些不完美的负载平衡(,所以它几乎不会随着更多的核心而扩展。
我在这里看到的唯一潜在优化是将quad作为一个离散过程运行包装器。这是因为quad调用S_xr和S_xc,所以您无法真正优化这些函数。
使用您显示的值,整个程序在我的机器上运行不到50秒。它是如何在你的机器上运行6天的,这是个谜。也许您使用的参数与问题中显示的参数不同。不管怎样,这是整个修改版本:
import numpy as np
from scipy.integrate import quad
from multiprocessing import Pool
phi = np.array([[-0.0529255 + 0.00662948j, -0.0529255 - 0.00662948j,
-0.03050694-0.00190298j, -0.03050694+0.00190298j],
[-0.04149906+0.00171591j, -0.04149906-0.00171591j,
0.01974404-0.00194719j, 0.01974404+0.00194719j]])
lamb_mat = np.array([[-1.00390867 + 6.28783994j, 0. + 0.j,
0. + 0.j, 0. + 0.j],
[0. + 0.j, -1.00390867 - 6.28783994j,
0. + 0.j, 0. + 0.j],
[0. + 0.j, 0. + 0.j,
-0.25859133+12.09860357j, 0. + 0.j],
[0. + 0.j, 0. + 0.j,
0. + 0.j, -0.25859133-12.09860357j]])
L_0 = np.eye(2)
S_f = np.array([[100, 0], [0, 100]])
tstep = 0.1
sigma_11 = 0.3
sigma_22 = 0.4
quadProc = None
def S_xr(omega, Phi, l_mat, s_f, t, i, j):
h_x = np.real(np.linalg.multi_dot(
[Phi, np.linalg.inv(-1j*omega*np.linalg.inv(l_mat)+np.eye(np.shape(l_mat)[0])), Phi.T])) # Taking only the real value. Check again!!!
ans = np.exp(1j*omega*t) * np.linalg.multi_dot([np.conjugate(h_x), s_f, h_x.T])
return np.real(ans[i][j])
def S_xc(omega, Phi, l_mat, s_f, t, i, j):
h_x = np.real(np.linalg.multi_dot(
[Phi, np.linalg.inv(-1j*omega*np.linalg.inv(l_mat)+np.eye(np.shape(l_mat)[0])), Phi.T])) # Taking only the real value. Check again!!!
ans = np.exp(1j*omega*t) * np.linalg.multi_dot([np.conjugate(h_x), s_f, h_x.T])
return np.imag(ans[i][j])
def quadWrapper(S_x, Phi, l_mat, s_f, t, i, j):
return S_x, quad(S_x, -np.inf, np.inf, args=(Phi, l_mat, s_f, t, i, j), limit=10_000)
def R_x(Phi, l_mat, s_f, t, i, j):
args = [(S_xr, Phi, l_mat, s_f, t, i, j), (S_xc, Phi, l_mat, s_f, t, i, j)]
results = None
def callback(n):
nonlocal results
results = n
quadProc.starmap_async(func=quadWrapper, iterable=args, callback=callback).wait()
for f, i in results:
if f == S_xr:
real_integral = i
else:
imag_integral = i
return real_integral[0]+1j*imag_integral[0]
def R_final(Phi, l_mat, s_f, t):
return np.array([[R_x(Phi, l_mat, s_f, t, i, j) for j in range(2)] for i in range(2)])
def cov_p(Phi, l_mat, s_f, sigma_11, sigma_22, N_p):
global quadProc
with Pool() as quadProc:
ans = np.zeros([N_p, N_p, 2, 2], dtype=complex)
for i in range(N_p):
for j in range(N_p):
ans[i][j] = np.linalg.multi_dot([L_0, R_final(
Phi, l_mat, s_f, (i-j)*tstep), L_0.T])+np.array([[sigma_11, 0], [0, sigma_22]])*(i==j)
return ans.transpose(0, 2, 1, 3).reshape(2*N_p, -1)
import time
if __name__ == '__main__':
start = time.perf_counter()
print(cov_p(phi, lamb_mat, S_f, sigma_11, sigma_22, 10))
end = time.perf_counter()
print(f'Duration={end-start:.2f}s')