如何找到CUDA/OpenCV程序返回纯灰色图像的原因



我正在编写这个CUDA代码,以使用CUDA将RGB图像转换为灰度。我目前正在学习CUDA和OpenCV,所以大部分内容都是在其他代码的帮助下编写的,尤其是"并行程序简介"Psets。

我得到的输出是一个普通的灰色图像。如何查找此代码中的问题?

#include <iostream>
#include <cuda.h>
#include <cuda_runtime.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
using namespace cv;
using namespace std;
__global__ void rgba_to_greyscale(const uchar4* const rgbaImage,
unsigned char* greyImage,
int numRows, int numCols)
{
int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;        
if (col >= numCols || row >= numRows) {
return;
}
int offset = row * numCols + col;
uchar4 rgba_pixel = rgbaImage[offset];
float greyness = .299f * rgba_pixel.x + .587f * rgba_pixel.y +
.114f * rgba_pixel.z;
greyImage[offset] = static_cast<unsigned char>(greyness);
}
int main()
{
Mat imageRGBA;
Mat imageGrey;
uchar4        *h_rgbaImage;
uchar4 *d_rgbaImage = NULL;
unsigned char *h_greyImage;
unsigned char *d_greyImage = NULL;
///////////////////////////////////
Mat image;
image = cv::imread("IMG.jpg");
if (image.empty()) {
cerr << "Couldn't open file: " << endl;
exit(1);
}
///////////////////////////////////
int numRows = image.rows;
int numCols = image.cols;
///////////////////////////////////////
cvtColor(image, imageRGBA, COLOR_BGR2RGBA);    
//Allocate Memory for output
imageGrey.create(image.rows, image.cols, CV_8UC1);
h_rgbaImage = (uchar4 *)imageRGBA.data;
h_greyImage = (unsigned char *)imageGrey.data;
const size_t numPixels = numRows * numCols;
//Allocate memory on the device for both input and output
cudaMalloc((void**)d_rgbaImage, sizeof(uchar4) * numPixels);
cudaMalloc((void**)d_greyImage, sizeof(unsigned char) * numPixels);
cudaMemset((void *)d_greyImage, 0, numPixels * sizeof(unsigned char));
//Copy input array to the GPU
cudaMemcpy(d_rgbaImage, h_rgbaImage, sizeof(uchar4)*numPixels,         
cudaMemcpyHostToDevice);
//Calling the Kernel - 
const dim3 blockSize(32, 16, 1);
const dim3 gridSize(1 + (numCols / blockSize.x), 1 + (numRows /     
blockSize.y), 1);
rgba_to_greyscale <<<gridSize, blockSize >>> (d_rgbaImage, d_greyImage, 
numRows, numCols);
//Copy Output array to Host
cudaMemcpy(h_greyImage, d_greyImage, sizeof(unsigned char) * numPixels,     
cudaMemcpyDeviceToHost);
//Check Output
Mat output;
output = Mat(numRows, numCols, CV_8UC1, (void*)h_greyImage);
imwrite("result.jpg", output);  
}

代码中的设备内存分配调用无效。

cudaMalloc((void**)d_rgbaImage, sizeof(uchar4) * numPixels);
cudaMalloc((void**)d_greyImage, sizeof(unsigned char) * numPixels);

实际上,上述呼吁毫无作用。请按以下方式更正调用,以便实际修改指针。

cudaMalloc((void**)&d_rgbaImage, sizeof(uchar4) * numPixels);
^
cudaMalloc((void**)&d_greyImage, sizeof(unsigned char) * numPixels);
^

此外,请确保检查代码中的CUDA错误,以便轻松跟踪此类问题。