错误:内核参数具有非平凡复制的可构造类/结构类型+sycl+tbb



我试图提供一种;地图";封装OneAPI调用的骨架通过指定目标类型(CPU或GPU/Accelerator(的一些参数来隐藏硬件目标问题。我的Map骨架传递函数及其导数与初始点的牛顿法。但我有一个错误,它是:

kernel parameter has non-trivially copy constructible class/struct type 'std::function<double (double)>'

我的代码是:

#include <CL/sycl.hpp>
#include <iostream>
#include <tbb/tbb.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_reduce.h>
#include <vector>
#include <string>
#include <queue>
#include<tbb/blocked_range.h>
#include <tbb/global_control.h>
#include <chrono>
#include "uTimer.cpp"
#include <cmath>
#include <random>
#include <ctime>
#include <numeric>
#include <cstdlib>
//#include <dos.h> //for delay
//#include <conio.h> //for getch()
//#include <complex>
#define EPSILON 0.000001 // The step size across the X and Y axis
using namespace tbb;
class Clock {
private:
typedef std::chrono::high_resolution_clock clock;
std::chrono::time_point<clock> t;
public:
Clock() {
start();
}
void start() {
t = clock::now();
}
double stop() const {
return std::chrono::duration_cast<std::chrono::duration<double>>(
clock::now() - t).count();
}
};

//std::complex<double> mycomplex(10.0, 2.0);
template<class Tin, class Tout>
class Map {
private:
std::function<Tout(Tin)> fun;
std::function<Tout(Tin)> dfun;
public:
Map() {};
Map(std::function<Tout(Tin)> f, std::function<Tout(Tin)> df) {
fun = f;
dfun = df;
};

void operator()(bool use_tbb, Tin &x1) {
int iter=100;
Tout x;
if (use_tbb) {
uTimer *timer = new uTimer("Executing Code On CPU");
tbb::parallel_for(tbb::blocked_range < int > (0, iter),
[&](tbb::blocked_range<int> t) {
for (int index = t.begin(); index < t.end(); ++index) {
do
{
x = x1;
x1 = x - (fun(x) / dfun(x));
}while (std::abs(x1 - x) >= EPSILON);
}
});
timer->~uTimer();
}else {
sycl::buffer<Tin, 1> x1_buffer(&x1, iter);
sycl::buffer<Tout, 1> x_buffer(&x, iter);
//Profiling GPU
// Initialize property list with profiling information
sycl::property_list propList {
sycl::property::queue::enable_profiling() };
// Build the command queue (constructed to handle event profling)
sycl::queue gpuQueue = cl::sycl::queue(sycl::gpu_selector(),
propList);
// print out the device information used for the kernel code
std::cout << "Device: "
<< gpuQueue.get_device().get_info<sycl::info::device::name>()
<< std::endl;
std::cout << "Compute Units: "
<< gpuQueue.get_device().get_info<
sycl::info::device::max_compute_units>()
<< std::endl;
auto start_overall = std::chrono::system_clock::now();
auto event = gpuQueue.submit([&](sycl::handler &h) {
//local copy of fun
auto f = fun;
auto df = dfun;
sycl::accessor x1_accessor(x1_buffer, h, sycl::read_write);
sycl::accessor x_accessor(x_buffer, h, sycl::read_write);
h.parallel_for(iter, [=](sycl::id<1> index) {
do
{
x_accessor[index] = x1_accessor[index];
x1_accessor[index] = x_accessor[index] - (f(x_accessor[index]) / df(x_accessor[index]));
}while (sycl::fabs(f(x1_accessor[index]))>= EPSILON);
});
});
event.wait();
auto end_overall = std::chrono::system_clock::now();
cl_ulong submit_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_submit>();
cl_ulong start_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_start>();
cl_ulong end_time = event.template get_profiling_info<
cl::sycl::info::event_profiling::command_end>();
auto submission_time = (start_time - submit_time) / 1000000.0f;
std::cout << "Submit Time: " << submission_time << " ms"
<< std::endl;
auto execution_time = (end_time - start_time) / 1000000.0f;
std::cout << "Execution Time: " << execution_time << " ms"
<< std::endl;
auto execution_overall = std::chrono::duration_cast
< std::chrono::milliseconds > (end_overall - start_overall);
std::cout << "Overall Execution Time: " << execution_overall.count()
<< " ms" << std::endl;
};
};
};


int main(int argc, char *argv[]) {
//Define a function
auto f = [](double x) {return pow(x,3);};
//Define the derivative of function
auto df = [](double x) {return pow(x, 2) *3;};
//Define an instance of Map class
auto m1 = Map<double, double>(f, df);
double x1 = 3;
m1(true, x1);
//print the result
//for (auto &e : r) {
//std::cout << e << " ";
//}
return 0;
}

此外,如果我们不考虑错误,我认为我的代码中的某些内容似乎不正确,但我无法理解它是什么。

你不能随心所欲。如果你尝试去掉std::function并使用函数指针,你仍然无法(即使它是微不足道的可复制性(。在SYCL中,就像在任何其他这样的语言(CUDA、hip、OpenCL…(中一样,设备编译器需要能够编译内核执行/调用的所有函数。所以不,你不能传递一个函数"在"中;。这可以归结为你之前在这里回答的一个问题

您可以尝试在其他地方将lambda定义为函数,然后从内核中调用它们。如果你想在运行时在各种函数之间进行选择,你可以编写一个模板化内核(比如说一个枚举(,并通过if constexpr(在内核中(调度你的调用,以避免运行时成本(和代码重复数据消除(。最后,它将实例化n个SYCL内核,每个内核调用您的一个函数。它们将由设备编译器等正确编译。

最新更新