ArrayFire:具有从主函数调用的OpenCL内核的函数

函数如下(从http://arrayfire.org/docs/interop_opencl.htm)

唯一的`main`函数

int main() {
size_t length = 10;
// Create ArrayFire array objects:
af::array A = af::randu(length, f32);
af::array B = af::constant(0, length, f32);
// ... additional ArrayFire operations here
// 2. Obtain the device, context, and queue used by ArrayFire
static cl_context af_context = afcl::getContext();
static cl_device_id af_device_id = afcl::getDeviceId();
static cl_command_queue af_queue = afcl::getQueue();
// 3. Obtain cl_mem references to af::array objects
cl_mem * d_A = A.device<cl_mem>();
cl_mem * d_B = B.device<cl_mem>();
// 4. Load, build, and use your kernels.
//    For the sake of readability, we have omitted error checking.
int status = CL_SUCCESS;
// A simple copy kernel, uses C++11 syntax for multi-line strings.
const char * kernel_name = "copy_kernel";
const char * source = R"(
void __kernel
copy_kernel(__global float * gA, __global float * gB)
{
int id = get_global_id(0);
gB[id] = gA[id];
}
)";
// Create the program, build the executable, and extract the entry point
// for the kernel.
cl_program program = clCreateProgramWithSource(af_context, 1, &source, NULL, &status);
status = clBuildProgram(program, 1, &af_device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, kernel_name, &status);
// Set arguments and launch your kernels
clSetKernelArg(kernel, 0, sizeof(cl_mem), d_A);
clSetKernelArg(kernel, 1, sizeof(cl_mem), d_B);
clEnqueueNDRangeKernel(af_queue, kernel, 1, NULL, &length, NULL, 0, NULL, NULL);
// 5. Return control of af::array memory to ArrayFire
A.unlock();
B.unlock();
// ... resume ArrayFire operations
// Because the device pointers, d_x and d_y, were returned to ArrayFire's
// control by the unlock function, there is no need to free them using
// clReleaseMemObject()
return 0;
}

运行良好，因为最终值B的与A的一致，即。af_print(B);匹配A，但当我单独编写函数时，如下所示：

单独的`main`函数

arraycopy功能

void arraycopy(af::array A, af::array B,size_t length) {
// 2. Obtain the device, context, and queue used by ArrayFire   
static cl_context af_context = afcl::getContext();
static cl_device_id af_device_id = afcl::getDeviceId();
static cl_command_queue af_queue = afcl::getQueue();
// 3. Obtain cl_mem references to af::array objects
cl_mem * d_A = A.device<cl_mem>();
cl_mem * d_B = B.device<cl_mem>();
// 4. Load, build, and use your kernels.
//    For the sake of readability, we have omitted error checking.
int status = CL_SUCCESS;
// A simple copy kernel, uses C++11 syntax for multi-line strings.
const char * kernel_name = "copy_kernel";
const char * source = R"(
void __kernel
copy_kernel(__global float * gA, __global float * gB)
{
int id = get_global_id(0);
gB[id] = gA[id];
}
)";
// Create the program, build the executable, and extract the entry point
// for the kernel.
cl_program program = clCreateProgramWithSource(af_context, 1, &source, NULL, &status);
status = clBuildProgram(program, 1, &af_device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, kernel_name, &status);
// Set arguments and launch your kernels
clSetKernelArg(kernel, 0, sizeof(cl_mem), d_A);
clSetKernelArg(kernel, 1, sizeof(cl_mem), d_B);
clEnqueueNDRangeKernel(af_queue, kernel, 1, NULL, &length, NULL, 0, NULL, NULL);
// 5. Return control of af::array memory to ArrayFire
A.unlock();
B.unlock();
// ... resume ArrayFire operations
// Because the device pointers, d_x and d_y, were returned to ArrayFire's
// control by the unlock function, there is no need to free them using
// clReleaseMemObject()
}

main功能

int main()
{
size_t length = 10;
af::array A = af::randu(length, f32);
af::array B = af::constant(0, length, f32);
arraycopy(A, B, length);
af_print(B);//does not match A
}

B的最终值没有改变，为什么会发生这种情况？我该怎么做才能让它发挥作用？，提前感谢

您通过值而不是引用将af::array传递到arraycopy中，因此无论您在arraycopy中做什么，main中的A和B都保持不变。可以通过参数列表中的af::array &B引用传递B。我还建议通过const引用传递A作为自定义，以避免不必要的副本(const af::array &A(。

您看到的行为背后的原因是引用计数。但它并不是一个确定的bug，它与C++语言的行为一致。

af:：array对象在使用赋值或等效操作创建时，仅执行元数据的副本并保留共享指针。

在您的代码版本中，B是通过值传递的，因此在内部，arraycopy函数中的B是main函数中的B的元数据的副本，并共享指向main的数组B中数据的指针。在这一点上，如果用户执行device调用来获取指针，我们假设它用于写入该指针的位置。因此，当设备在一个引用计数>1的数组对象上被调用时，我们会复制原始数组(来自main的B(，并将指针返回到该内存。因此，如果您在内部执行af_print(B)，您将看到正确的值。这本质上是写时复制-由于B是通过值传递的，所以您不会从arraycopy函数中看到B的修改结果。

在我说的第一行中，它符合C++行为，因为如果对象B需要从函数中修改，则必须通过引用传递。通过值传递它只会使函数内部的值发生变化——这正是ArrayFire处理af：：数组对象的方式。

希望这能消除混乱。

普拉迪普。ArrayFire开发团队。

唯一的`main`函数

单独的`main`函数

相关内容

最新更新

热门标签：

ArrayFire:具有从主函数调用的OpenCL内核的函数

唯一的main函数

单独的main函数

相关内容

最新更新

热门标签：

唯一的`main`函数

单独的`main`函数