我正在尝试并行化大型数据集的拟合。我在单个阵列上进行了原子测试,它起作用了。当我引入多处理时,模型开始显示溢出错误。
import numpy as np
import math
from scipy import optimize
import matplotlib.pyplot as plt
def math_model(n):
def crrn(x, t0, amp, tau, offset):
crrn = np.zeros(np.shape(x))
for i,a in enumerate(x):
if a < t0:
crrn[i] = offset
else:
crrn[i] = offset + amp * (math.exp(n) / (n ** n)) *
((a - t0) / tau) ** n * math.exp( - (a - t0) / tau)
return crrn
return crrn
def fitting_model (d, X, p) :
model = math_model(8)
errfunc = lambda p, x, y: model(x, *p) - y
p1, success = optimize.leastsq(errfunc, p, args=(X, d))
residuo = model(X, *p1) - y
return p1, residuo
y = [48.9375, 50.0, 49.9375, 50.0, 49.9375, 49.0, 50.9375, 52.0, 53.9375, 56.0, 56.9375,
58.0, 59.9375, 61.0, 61.9375, 60.0, 59.9375, 58.0, 55.9375, 55.0, 52.9375,
53.0, 50.9375, 51.0, 49.9375, 49.0, 48.9375, 49.0, 47.9375, 49.0, 48.9375, 48.0]
x = list(range(len(y)))
p = [4,12,2,47]
p, resudio = fitting_model (y, x, p)
这是工作的例子。以下是添加了多处理。
def fitting_worker(data, params):
model = math_model(params[0])
X = list(range(len(data[0])))
fit = []
residuals = []
errfunc = lambda p, x, y: model(x, *p) - y
for d in data:
p, success = optimize.leastsq(errfunc, params[1:], args=(X, d))
fit.append(p)
residuals.append(model(X, *p) - d)
return [fit, residuals]
def fitting(data, params):
processors = mp.cpu_count()
pool = mp.Pool(processors)
fitted = []
numlines = math.floor(len(data) / processors) * 3
fitted = pool.map(partial(fitting_worker, params = params),
(data[line : line + numlines] for line in range(0, len(data), numlines)))
pool.close()
return [item for sublist in fitted for item in sublist]
添加了尝试,除了查看数据是否引起此问题,但尝试使用在单个进程文件中触发多处理错误的数据可以正常工作。如果您对如何解决此问题有任何建议,那就太好了。谢谢。
不建议使用 minimumsq。要使用的函数是 curve_fit 它依赖于 minimumsq,但修复了大多数 minimumsq 问题。