我发现尝试使用CUDA :: Thurst Iterator在GPU上实现ODES求解器例程,以求解GPU中的一堆耦合的一阶方程。我想在以前的问题中解决该方法,使用户能够使用在矢量元素上工作的任意函数来编写方程系统。详细说明,这是一小部分代码:
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/sequence.h>
#include <thrust/copy.h>
#include <thrust/fill.h>
#include <thrust/replace.h>
#include <thrust/functional.h>
#include <thrust/for_each.h>
#include <thrust/iterator/zip_iterator.h>
#include <iostream>
#include <math.h>
__host__ __device__ float f1(float x)
{
return sinf(x);
}
__host__ __device__ float f2(float x)
{
return cosf(x);
}
__host__ __device__ float Vx(float x)
{
return sinf(x);
}
struct q_dot
{
float x;
float delta;
q_dot(float _x,float _delta): x(_x),delta(_delta){};
template <typename Tuple>
__host__ __device__
float operator()(Tuple t)
{
float p = thrust::get<1>(t) + delta;
return p/MASS;
}
};
struct p_dot
{
float x;
float delta;
p_dot(float _x,float _delta): x(_x),delta(_delta){};
template <typename Tuple>
__host__ __device__
float operator()(Tuple t)
{
float q = thrust::get<0>(t) + delta;
return -Vx(q);
}
};
struct euler_functor
{
unsigned fn;
float h;
float er;
float x0;
euler_functor(unsigned _fn,float _x0,float _h, float _er) : fn(_fn),h(_h),er(_er),x0(_x0) {};
template <typename Tuple>
__host__ __device__
void operator()(Tuple t) const {
// if (fn == 1) y = h*f1(y);
//else if (fn == 2) y = h*f2(y); This can be handled in this way?
q = h*p_dot(x0,h/2)(t);
p = h*p_dot(x0,h/2)(t);
float er_p,er_q;
er_p=0.5*h*p_dot(x0,h/2)(t);
er_q=0.5*h*q_dot(x0,h/2)(t);
er = er_p;
}
};
int main(void)
{
float t=0;
float t_step=0.1;
float error;
const unsigned N = 8;
// allocate three device_vectors with 10 elements
thrust::device_vector<float> Q(N),P(N);
// initilaize to some values
thrust::sequence(Q.begin(), Q.end(), 0.0f, (float)(6.283/(float)N));
// initilaize to some values
thrust::sequence(P.begin(), P.end(), 0.0f, (float)(10.283/(float)N));
// apply euler for each element of Q and P
//thrust::for_each(X.begin(),X.end(),euler_functor(1,t,t_step,error)); this becomes:
thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(Q.begin(),P.begin())),
thrust::make_zip_iterator(thrust::make_tuple( Q.end(), P.end())),euler_functor(1,t,t_step,error));
// print the values
for(int i = 0; i < N; i++) std::cout<< Q[i]<<" "<<P[i]<< std::endl;
}
但是,当我编译以前的代码时,我会遇到很多错误。同样,我不确定这是否是最好的方法。我该如何使其工作?我的错误在哪里?有更好的方法吗?因为它是ER变量,因此携带有关数值误差的信息始终返回零。如何获取此信息?它可以用来实现一些自适应技巧。
您的代码存在许多问题。我不确定我会在这里捕获所有这些,但是:
-
MASS
未定义。 - 您的
p_dot
和q_dot
函子需要其他__device__
装饰 - 您对Euler函数内
p
和q
变量的使用没有任何意义。它们在任何地方都没有定义,这也不是将值返回P
和Q
矢量的正确方法,如果这是您的意图。 - 我们没有通过在实例化时传递给函子的变量返回数据。因此,要返回每个时间步中的
er
变量,我创建一个单独的向量(ERP
和ERQ
)。
这是一个修改的代码,它具有以上问题和其他各种问题。尽管我没有详细检查算术,但它似乎返回了明智的结果。
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/sequence.h>
#include <thrust/copy.h>
#include <thrust/fill.h>
#include <thrust/replace.h>
#include <thrust/functional.h>
#include <thrust/for_each.h>
#include <thrust/iterator/zip_iterator.h>
#include <iostream>
#include <math.h>
#define MASS 1.0f
__host__ __device__ float f1(float x)
{
return sinf(x);
}
__host__ __device__ float f2(float x)
{
return cosf(x);
}
__host__ __device__ float Vx(float x)
{
return sinf(x);
}
struct q_dot
{
float x;
float delta;
__host__ __device__
q_dot(float _x,float _delta): x(_x),delta(_delta){};
template <typename Tuple>
__host__ __device__
float operator()(Tuple t)
{
float p = thrust::get<1>(t) + delta;
return p/MASS;
}
};
struct p_dot
{
float x;
float delta;
__host__ __device__
p_dot(float _x,float _delta): x(_x),delta(_delta){};
template <typename Tuple>
__host__ __device__
float operator()(Tuple t)
{
float q = thrust::get<0>(t) + delta;
return -Vx(q);
}
};
struct euler_functor
{
unsigned fn;
float h;
float x0;
euler_functor(unsigned _fn,float _x0,float _h) : fn(_fn),h(_h),x0(_x0) {};
template <typename Tuple>
__host__ __device__
void operator()(const Tuple &t) {
// if (fn == 1) y = h*f1(y);
//else if (fn == 2) y = h*f2(y);
float t0, t1, t2, t3;
t0 = h*p_dot(x0,h/2.0f)(t);
t1 = h*q_dot(x0,h/2.0f)(t);
t2=0.5*h*p_dot(x0,h/2.0f)(t);
t3=0.5*h*q_dot(x0,h/2.0f)(t);
thrust::get<0>(t) = t0;
thrust::get<1>(t) = t1;
thrust::get<2>(t) = t2;
thrust::get<3>(t) = t3;
}
};
int main(void)
{
float t=0;
float t_step=0.1;
const unsigned N = 8;
// allocate three device_vectors with 10 elements
thrust::device_vector<float> Q(N),P(N), ERP(N), ERQ(N);
// initilaize to some values
thrust::sequence(Q.begin(), Q.end(), 0.0f, (float)(6.283/(float)N));
// initilaize to some values
thrust::sequence(P.begin(), P.end(), 0.0f, (float)(10.283/(float)N));
for(int i = 0; i < N; i++) std::cout<< Q[i]<<" "<<P[i]<< " "<< ERP[i] << " " << ERQ[i] << std::endl;
std::cout<< "*****" << std::endl;
// apply euler for each element of Q and P
//thrust::for_each(X.begin(),X.end(),euler_functor(1,t,t_step,error)); this becomes:
thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(Q.begin(),P.begin(),ERP.begin(), ERQ.begin())),thrust::make_zip_iterator(thrust::make_tuple(Q.end(),P.end(),ERP.end(), ERQ.end())),euler_functor(1,t,t_step));
// print the values
for(int i = 0; i < N; i++) std::cout<< Q[i]<<" "<<P[i]<< " "<< ERP[i] << " " << ERQ[i] << std::endl;
}