使用CUDA仅将1/3的RGB图像转换为灰度



我正在尝试使用CUDA将RGB图像转换为灰度。我想用stbi_load读取图像,将其传递给convertToGreyscale(),在那里我调用内核并将图像保存为unsigned char* -从那里我可以使用它并应用变量阈值,sobel,多个阈值等。问题是只有1/3的图像被处理,并且实际上受到内核(灰度内核)的影响。下面是我的内核:

__global__ void greyscale(unsigned char* originalImg, unsigned char* d_greyImg, int width, int height, int channels) {
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int id = x + y * width;
if (x < width && y < height) {
unsigned char r = originalImg[id];
unsigned char g = originalImg[id + 1];
unsigned char b = originalImg[id + 2];
int offset = (r+g+b) / channels;
for (int i = 0; i < channels; i++) {
d_greyImg[id + i] = offset;
}
}
}

这是代码的另一部分:

#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
#include "cuda_runtime.h"
#include "cuda_runtime_api.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#define THREADS 8
void convertToGreyscale(unsigned char* originalImg, unsigned char* greyImg, int width, int height, int channels)
{
unsigned char* d_originalImg = NULL;
unsigned char* d_greyImg = NULL;
int size = width * height * channels * sizeof(unsigned char);
cudaMalloc(&d_originalImg, size);
cudaMalloc(&d_greyImg, size);
cudaMemcpy(d_originalImg, originalImg, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_greyImg, greyImg, size, cudaMemcpyHostToDevice);
dim3 dimBlock(THREADS, THREADS);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);
greyscale << <dimGrid, dimBlock >> > (d_originalImg, d_greyImg, width, height, channels);
cudaMemcpy(greyImg, d_greyImg, size, cudaMemcpyDeviceToHost);
cudaFree(d_originalImg);
}
void sobelFilter(unsigned char* originalImg, unsigned char* sobelImg, int width, int height, int channels)
{
dim3 dimBlock(THREADS, THREADS, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);
unsigned char* d_originalImg = NULL;
int size = width * height;
cudaMalloc(&d_originalImg, size * channels * sizeof(unsigned char));
cudaMemcpy(d_originalImg, originalImg, size * channels * sizeof(unsigned char), cudaMemcpyHostToDevice);
sobel << <dimGrid, dimBlock >> > (d_originalImg, width, height);
cudaMemcpy(sobelImg, d_originalImg, size * channels , cudaMemcpyDeviceToHost);
cudaFree(d_originalImg);
}
int main()
{
// read the image
int width, height, channels;
unsigned char* originalImg = stbi_load("lenna.png", &width, &height, &channels, 0);
size_t img_size = width * height * channels;
unsigned char* greyImg = (unsigned char*) malloc(img_size);
unsigned char* sobelImg = (unsigned char*) malloc(img_size);
convertToGreyscale(originalImg, greyImg, width, height, channels);
stbi_write_jpg("greyscale.png", width, height, channels, greyImg, 100);
/*sobelFilter(originalImg, sobelImg, width, height, channels);
stbi_write_jpg("sobel.png", width, height, channels, sobelImg, 100);*/
return 0;
}

你可能想做dimGrid((width+dimBlock.x-1) / dimBlock.x, (height+dimBlock.y-1) / dimBlock.y);,以确保你不会错过角落。

我认为主要的问题是你的输入图像被读取没有考虑通道。您的线程将具有值x,但也值x+1(和相同的y)在某些点。但是,如果您看到如何定义id,您会注意到不是读取3乘3,而是读取1乘1,因为这两个线程将idid+1作为值。你需要确保id的计算已经考虑了channels,但请注意,它将不适用于灰度,你需要一个不同的灰度(或除以3)。

最新更新