为什么即使没有错误,OpenCL 内核也不会执行?(c, nvidia, kubuntu)



我正在学习opencl,由于某种原因内核什么也不做:

#include <stdlib.h>
#include <stdio.h>
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
int err = 0;
#define PRINTERR() fprintf(stderr, "Error at line %u.n", __LINE__)
#define CHECKERR(x) if(x){PRINTERR();return __LINE__;}
#define CHECKNOTERR(x) if(!x){PRINTERR();return __LINE__;}
const char *KernelSource =
"__kernel void square(                                                  n" 
"   __global float* input,                                              n" 
"   __global float* output,                                             n" 
"   const unsigned int count)                                           n" 
"{                                                                      n" 
"   int i = get_global_id(0);                                           n" 
"   if(i == 0) printf("test\n");                                     n" 
"   if(i < count)                                                       n" 
"       output[i] = input[i] * input[i];                                n" 
"}                                                                      n" ;

#define DATA_SIZE 1024
int main(){
float data[DATA_SIZE];
float results[DATA_SIZE];
size_t global;
size_t local;
cl_platform_id platform_id;
cl_device_id device_id;
cl_context context;
cl_command_queue commands;
cl_program program;
cl_kernel kernel;
cl_mem input;
cl_mem output;
unsigned int i = 0;
unsigned int count = DATA_SIZE;
for(i = 0; i < count; ++i)
//data[i] = rand() / (float)RAND_MAX;
data[i] = 2.f;

int gpu = 1;
err = clGetPlatformIDs (1, &platform_id, NULL); CHECKERR(err)
err = clGetDeviceIDs(platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); CHECKERR(err)
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); CHECKERR(!context)
commands = clCreateCommandQueueWithProperties(context, device_id, NULL, &err); CHECKERR(err)

input = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(float) * count, NULL, &err); CHECKERR(err)
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, &err); CHECKERR(err)
CHECKERR(!input || !output)
err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL); CHECKERR(err)

program = clCreateProgramWithSource(context, 1, &KernelSource, NULL, &err); CHECKERR(err)
err = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); CHECKERR(err)
kernel = clCreateKernel(program, "square", &err); CHECKERR(err)
err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
CHECKERR(err)

err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); CHECKERR(err)
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); CHECKERR(err)
err = clEnqueueReadBuffer(commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL ); CHECKERR(err)
clFlush(commands);
clFinish(commands);

unsigned int correct = 0;
for(i = 0; i < count; ++i)
printf("%fn",results[i]);

printf("Computed '%d/%d' correct values!n", correct, count);

// free
clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(commands);
clReleaseContext(context);
return 0;
}

我想让它做一些事情,但是它没有。

我试着读取输入而不是输出,它很好。内核中的printf什么也不做,如果我运行它,clEnqueueReadBuffer给出的只是0。我有amd,所以我不能在cpu上测试它。我试了另一个例子,它起作用了。(这里的那个)帮助感激。

global为0,因此程序运行0次。

最新更新