opencl 如何与 opencv 一起工作



我正在尝试使用 OpenCL 创建一个区域格罗伊翼算法,为此我将使用 OpenCV 打开一个图像。问题是如何将数据转换为 OpenCL。

我正在使用opencv版本:2.4.9和opencl:AMD APP SDK\2.9-1在Visual Studio中

有人会告诉我使用 opencv 打开图像后我该怎么办吗

通常,在 OpenCL 应用程序中,有两种方法可以将图像(或任何其他数据)从主机程序传输到设备程序: 1-使用缓冲区 2-使用Image2d。 它们都使用cl_mem类型。由于使用缓冲区比使用 image2d 更简单(尤其是在灰度图像中),因此我将解释如何使用 OpenCL 中的缓冲区将图像从主机程序传输到设备。

openCV对象Mat读取输入图像后,将其转换为灰度图像。然后,我们使用返回cl_mem缓冲区的方法clCreateBuffer。我们可以简单地将data(Matobeject的属性)传递给clCreateBuffer,以通过输入图像数据初始化我们的输入内核缓冲区。然后我们可以使用clSetKernelArg方法将创建的缓冲区传输到内核。最后,当内核完成其工作时,我们可以通过clEnqueueReadBuffer读取结果。

阅读注释以了解此代码,并随时提出问题。

主机代码:


// Make Contex, Kerenl and other requirements for OpenCL before this section....
Mat image = imread("logo.bmp", CV_LOAD_IMAGE_COLOR); // reading input image by opencv to Mat type
Mat input_;
cvtColor(image, input_, CV_BGR2GRAY); // convert input image to gray scale
cl_mem inputSignalBuffer = clCreateBuffer(
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, input_.rows *input_.cols *input_.elemSize(),
static_cast<void *>(
input_.data), // inputSignalBuffers will be initialized by input_.data which contains input image data
&errNum);
cl_mem outputSignalBuffer =
clCreateBuffer( // make and preparing an empty output buffer to use after opencl kernel call back
context, CL_MEM_WRITE_ONLY, input_.rows *input_.cols *input_.elemSize(), NULL, &errNum);
checkErr(errNum, "clCreateBuffer(outputSignal)");
errNum = clSetKernelArg(
kernel, 0, sizeof(cl_mem),
&inputSignalBuffer); // passing input buffer and output buffer to kernel in order to be used on device
errNum |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &maskBuffer);
errNum |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &outputSignalBuffer);
errNum |= clSetKernelArg(kernel, 3, sizeof(cl_uint), &input_.rows);
errNum |= clSetKernelArg(kernel, 4, sizeof(cl_uint), &input_.cols);
errNum |= clSetKernelArg(kernel, 5, sizeof(cl_uint), &maskWidth);
size_t localWorkSize[2] = {16, 16}; // Using 2 dimensional range  with size of work group 16
size_t globalWorkSize[2] = {
input_
.rows, // Note: Global work size (input image rows and cols) should be multiple of size of work group.
input_.cols};
// Queue the kernel up for execution across the array
errNum =
clEnqueueNDRangeKernel( // enqueue kernel with enabling host blocking until finishing kernel execution
queue, kernel, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, NULL);
checkErr(errNum, "clEnqueueNDRangeKernel");
Mat output_ = cv::Mat(input_.rows, input_.cols, CV_8UC1);
errNum = clEnqueueReadBuffer( //  reading from ourput parameter of kernel
queue, outputSignalBuffer, CL_TRUE, 0, input_.rows *input_.cols *input_.elemSize(),
output_.data, // initialize OpenCV Mat by output_.data which contains output results of kernel
0, NULL, NULL);
checkErr(errNum, "clEnqueueReadBuffer");
// cut the extra border spaces which has been added in the first part of the code in order to adjust image
// size with Work Group Size;
cv::imwrite("output.bmp", output_); // saving output in image file

内核代码:

__kernel void convolve(const __global uchar *const input, __constant uint *const mask,
__global uchar *const output, const int inputHeight, const int inputWidth,
const int maskWidth) {
uint sum = 0;
const int curr_x = get_global_id(0); // current curr_x (row)
const int curr_y = get_global_id(1); // current curr_y (col)
int d = maskWidth / 2;
if (curr_x > d - 1 && curr_y > d - 1 && curr_x < inputHeight - d &&
curr_y < inputWidth - d) // checking mask borders not to be out of input matrix
for (int i = -d; i <= d; i++)
for (int j = -d; j <= d; j++) {
int mask_ptr =
maskWidth * (i + d) + (j + d); // you can also use mad24(maskWidth, i+d, j+d) which is faster.
sum += input[(curr_x + i) * inputWidth + curr_y + j] * mask[mask_ptr];
}
sum /= (maskWidth * maskWidth); // miangin gereftan
sum = clamp(sum, (uint)0, (uint)255); // clamp == min(max(x, minval), maxval)
output[curr_x * inputWidth + curr_y] = sum;
}

最新更新