图像中使用CUDA中的白色像素索引列表



给定二进制图像,我想使用gpu(计算统一的设备体系结构)返回其白色像素的索引列表。如何确定点向量的索引?这是cuda内核。

//copy only active pixel locations
__global__ void get_white_pixels_kernel(unsigned char* bin_image,
float * points,
int width,
int height,
int grayWidthStep)
{
int row_index = threadIdx.y+ blockIdx.y*blockDim.y;
int col_index = threadIdx.x+blockIdx.x*blockDim.x;
if ((col_index < width) && (row_index < height))
{
    //Location of gray pixel in output
    const int gray_tid = row_index * grayWidthStep + col_index;
    if(input[gray_tid]==255)
       points[--here is the index]= Point2f(row_index,col_index);
}
}

以下是实现所需功能的幼稚方法:

  • 生成带有零值的虚拟值的像素索引蒙版。
  • 计算非零像素的数量
  • 创建一个长度等于非零计数的输出向量。
  • 将非零像素索引从生成的掩码复制到输出向量(一种称为流相复杂的过程)

以下是上述过程的示例代码。

代码

#include <cstdio>
#include <vector>
#include <cuda_runtime.h>
#include <thrust/count.h>
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
#include <opencv2/opencv.hpp>

static void _check_err(cudaError_t err, const char* file, int line)
{
    if(err)
    {
        const char* err_str = cudaGetErrorString(err);
        printf("CUDA Error: %snFile: %snLine: %dn", err_str, file, line);
        exit(EXIT_FAILURE);
    }
}
#define CHECK_ERR(err) _check_err((err), __FILE__, __LINE__)

__global__ void kernel_find_indices(const unsigned char* input, int width, int height, int step, int2* indices)
{
    const int x = blockIdx.x * blockDim.x + threadIdx.x;
    const int y = blockIdx.y * blockDim.y + threadIdx.y;
    if(x < width && y < height)
    {
        const int tidPixel = y * step + x;
        const int tidIndex = y * width + x;
        unsigned char value = input[tidPixel];
        int2 index_to_write;

        if(value)
        {
            //Write actual index to pixels with non-zero value
            index_to_write.x = x;
            index_to_write.y = y;
        }
        else
        {
            //Write dummy index to pixels with zero value
            index_to_write.x = -1;
            index_to_write.y = -1;
        }
        indices[tidIndex] = index_to_write;
    }
}

//Operator to check whether an index is of a non-zero pixel
struct isNonZeroIndex
{
  __host__ __device__ bool operator()(const int2 &idx)
  {
    return (idx.x != -1) && (idx.y != -1);
  }
};

std::vector<cv::Point> getIndicesOfNonZeroPixels(cv::Mat input)
{
    std::vector<int2> output_int2;
    std::vector<cv::Point> output;
    int pixelCount = input.cols * input.rows;
    size_t imageBytes=  input.step * input.rows;
    unsigned char* image_d;
    thrust::device_vector<int2> index_buffer_d(pixelCount);
    //Allocate device memory for input image
    CHECK_ERR(cudaMalloc(&image_d, imageBytes));
    //Copy input image to device
    CHECK_ERR(cudaMemcpy(image_d, input.ptr(), imageBytes, cudaMemcpyHostToDevice));
    dim3 block(16,16);
    dim3 grid;
    grid.x = (input.cols + block.x - 1) / block.x;
    grid.y = (input.rows + block.y - 1) / block.y;
    //Generate an index mask with dummy values for indices with zero pixel value
    kernel_find_indices<<<grid, block>>>(image_d, input.cols, input.rows, input.step, thrust::raw_pointer_cast(index_buffer_d.data()));
    CHECK_ERR(cudaDeviceSynchronize());
    int nonZeroCount = thrust::count_if(index_buffer_d.begin(), index_buffer_d.end(), isNonZeroIndex());
    //Keep only those indices whose pixel value is non-zero (stream compaction)
    thrust::device_vector<int2> compacted(nonZeroCount);
    thrust::copy_if(index_buffer_d.begin(), index_buffer_d.end(), compacted.begin(), isNonZeroIndex());
    //Copy non-zero pixel indices to host
    output_int2.resize(nonZeroCount);
    thrust::copy(compacted.begin(), compacted.end(), output_int2.begin());
    CHECK_ERR(cudaFree(image_d));
    //Convert vector<int2> to vector<cv::Point>
    output.resize(nonZeroCount);
    for(size_t i=0; i<nonZeroCount; i++)
        output[i] = cv::Point(output_int2[i].x, output_int2[i].y);
    return output;
}
void run_test()
{
    //Generate a sample test image
    cv::Mat test = cv::Mat::zeros(100,100, CV_8UC1);
    cv::rectangle(test, cv::Rect(5,5,20,20), cv::Scalar::all(255), CV_FILLED);
    //Get pixel indices of non-zero pixels
    std::vector<cv::Point> indices = getIndicesOfNonZeroPixels(test);
    //Display those indices
    for(size_t i=0; i<indices.size(); i++)
    {
        printf("%d, %dn", indices[i].x, indices[i].y);
    }
    //Show image
    cv::imshow("Sample", test);
    cv::waitKey();
}
int main(int argc, char** argv)
{
    run_test();
    return 0;
}

汇编命令

nvcc -o nz.cu -ark = sm_61 -l/usr/local/lib -lopencv_core -lopencv_highgui -lopencv_imgproc

请记住,此代码仅用于8UC1类型的图像(8位,单个通道)。您可以根据需要轻松将其扩展到其他数据类型。

最新更新