我正在学习opencl,由于某种原因内核什么也不做:
#include <stdlib.h>
#include <stdio.h>
#define CL_TARGET_OPENCL_VERSION 300
#include <CL/cl.h>
int err = 0;
#define PRINTERR() fprintf(stderr, "Error at line %u.n", __LINE__)
#define CHECKERR(x) if(x){PRINTERR();return __LINE__;}
#define CHECKNOTERR(x) if(!x){PRINTERR();return __LINE__;}
const char *KernelSource =
"__kernel void square( n"
" __global float* input, n"
" __global float* output, n"
" const unsigned int count) n"
"{ n"
" int i = get_global_id(0); n"
" if(i == 0) printf("test\n"); n"
" if(i < count) n"
" output[i] = input[i] * input[i]; n"
"} n" ;
#define DATA_SIZE 1024
int main(){
float data[DATA_SIZE];
float results[DATA_SIZE];
size_t global;
size_t local;
cl_platform_id platform_id;
cl_device_id device_id;
cl_context context;
cl_command_queue commands;
cl_program program;
cl_kernel kernel;
cl_mem input;
cl_mem output;
unsigned int i = 0;
unsigned int count = DATA_SIZE;
for(i = 0; i < count; ++i)
//data[i] = rand() / (float)RAND_MAX;
data[i] = 2.f;
int gpu = 1;
err = clGetPlatformIDs (1, &platform_id, NULL); CHECKERR(err)
err = clGetDeviceIDs(platform_id, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &device_id, NULL); CHECKERR(err)
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &err); CHECKERR(!context)
commands = clCreateCommandQueueWithProperties(context, device_id, NULL, &err); CHECKERR(err)
input = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * count, NULL, &err); CHECKERR(err)
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * count, NULL, &err); CHECKERR(err)
CHECKERR(!input || !output)
err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, sizeof(float) * count, data, 0, NULL, NULL); CHECKERR(err)
program = clCreateProgramWithSource(context, 1, &KernelSource, NULL, &err); CHECKERR(err)
err = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); CHECKERR(err)
kernel = clCreateKernel(program, "square", &err); CHECKERR(err)
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output);
err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count);
CHECKERR(err)
err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); CHECKERR(err)
err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); CHECKERR(err)
err = clEnqueueReadBuffer(commands, output, CL_TRUE, 0, sizeof(float) * count, results, 0, NULL, NULL ); CHECKERR(err)
clFlush(commands);
clFinish(commands);
unsigned int correct = 0;
for(i = 0; i < count; ++i)
printf("%fn",results[i]);
printf("Computed '%d/%d' correct values!n", correct, count);
// free
clReleaseMemObject(input);
clReleaseMemObject(output);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(commands);
clReleaseContext(context);
return 0;
}
我想让它做一些事情,但是它没有。
我试着读取输入而不是输出,它很好。内核中的printf什么也不做,如果我运行它,clEnqueueReadBuffer给出的只是0。我有amd,所以我不能在cpu上测试它。我试了另一个例子,它起作用了。(这里的那个)帮助感激。
global为0,因此程序运行0次。