函数如下(从http://arrayfire.org/docs/interop_opencl.htm)
唯一的main
函数
int main() {
size_t length = 10;
// Create ArrayFire array objects:
af::array A = af::randu(length, f32);
af::array B = af::constant(0, length, f32);
// ... additional ArrayFire operations here
// 2. Obtain the device, context, and queue used by ArrayFire
static cl_context af_context = afcl::getContext();
static cl_device_id af_device_id = afcl::getDeviceId();
static cl_command_queue af_queue = afcl::getQueue();
// 3. Obtain cl_mem references to af::array objects
cl_mem * d_A = A.device<cl_mem>();
cl_mem * d_B = B.device<cl_mem>();
// 4. Load, build, and use your kernels.
// For the sake of readability, we have omitted error checking.
int status = CL_SUCCESS;
// A simple copy kernel, uses C++11 syntax for multi-line strings.
const char * kernel_name = "copy_kernel";
const char * source = R"(
void __kernel
copy_kernel(__global float * gA, __global float * gB)
{
int id = get_global_id(0);
gB[id] = gA[id];
}
)";
// Create the program, build the executable, and extract the entry point
// for the kernel.
cl_program program = clCreateProgramWithSource(af_context, 1, &source, NULL, &status);
status = clBuildProgram(program, 1, &af_device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, kernel_name, &status);
// Set arguments and launch your kernels
clSetKernelArg(kernel, 0, sizeof(cl_mem), d_A);
clSetKernelArg(kernel, 1, sizeof(cl_mem), d_B);
clEnqueueNDRangeKernel(af_queue, kernel, 1, NULL, &length, NULL, 0, NULL, NULL);
// 5. Return control of af::array memory to ArrayFire
A.unlock();
B.unlock();
// ... resume ArrayFire operations
// Because the device pointers, d_x and d_y, were returned to ArrayFire's
// control by the unlock function, there is no need to free them using
// clReleaseMemObject()
return 0;
}
运行良好,因为最终值B的与A的一致,即。af_print(B);
匹配A,但当我单独编写函数时,如下所示:
单独的main
函数
arraycopy
功能
void arraycopy(af::array A, af::array B,size_t length) {
// 2. Obtain the device, context, and queue used by ArrayFire
static cl_context af_context = afcl::getContext();
static cl_device_id af_device_id = afcl::getDeviceId();
static cl_command_queue af_queue = afcl::getQueue();
// 3. Obtain cl_mem references to af::array objects
cl_mem * d_A = A.device<cl_mem>();
cl_mem * d_B = B.device<cl_mem>();
// 4. Load, build, and use your kernels.
// For the sake of readability, we have omitted error checking.
int status = CL_SUCCESS;
// A simple copy kernel, uses C++11 syntax for multi-line strings.
const char * kernel_name = "copy_kernel";
const char * source = R"(
void __kernel
copy_kernel(__global float * gA, __global float * gB)
{
int id = get_global_id(0);
gB[id] = gA[id];
}
)";
// Create the program, build the executable, and extract the entry point
// for the kernel.
cl_program program = clCreateProgramWithSource(af_context, 1, &source, NULL, &status);
status = clBuildProgram(program, 1, &af_device_id, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, kernel_name, &status);
// Set arguments and launch your kernels
clSetKernelArg(kernel, 0, sizeof(cl_mem), d_A);
clSetKernelArg(kernel, 1, sizeof(cl_mem), d_B);
clEnqueueNDRangeKernel(af_queue, kernel, 1, NULL, &length, NULL, 0, NULL, NULL);
// 5. Return control of af::array memory to ArrayFire
A.unlock();
B.unlock();
// ... resume ArrayFire operations
// Because the device pointers, d_x and d_y, were returned to ArrayFire's
// control by the unlock function, there is no need to free them using
// clReleaseMemObject()
}
main
功能
int main()
{
size_t length = 10;
af::array A = af::randu(length, f32);
af::array B = af::constant(0, length, f32);
arraycopy(A, B, length);
af_print(B);//does not match A
}
B的最终值没有改变,为什么会发生这种情况?我该怎么做才能让它发挥作用?,提前感谢
您通过值而不是引用将af::array
传递到arraycopy
中,因此无论您在arraycopy
中做什么,main
中的A
和B
都保持不变。可以通过参数列表中的af::array &B
引用传递B
。我还建议通过const引用传递A
作为自定义,以避免不必要的副本(const af::array &A
(。
您看到的行为背后的原因是引用计数。但它并不是一个确定的bug,它与C++语言的行为一致。
af::array对象在使用赋值或等效操作创建时,仅执行元数据的副本并保留共享指针。
在您的代码版本中,B是通过值传递的,因此在内部,arraycopy函数中的B是main函数中的B的元数据的副本,并共享指向main的数组B中数据的指针。在这一点上,如果用户执行device
调用来获取指针,我们假设它用于写入该指针的位置。因此,当设备在一个引用计数>1的数组对象上被调用时,我们会复制原始数组(来自main的B(,并将指针返回到该内存。因此,如果您在内部执行af_print(B)
,您将看到正确的值。这本质上是写时复制-由于B是通过值传递的,所以您不会从arraycopy函数中看到B的修改结果。
在我说的第一行中,它符合C++行为,因为如果对象B需要从函数中修改,则必须通过引用传递。通过值传递它只会使函数内部的值发生变化——这正是ArrayFire处理af::数组对象的方式。
希望这能消除混乱。
普拉迪普。ArrayFire开发团队。