非法内存访问:一个GPU的多线程(OpenCV3.1 + CUDA 8.0)(完整代码)



我想做的只是在一个GTX1060视频卡中运行 5 个线程,但失败了: 【运行环境】: Linux Ubuntu 16.04 + CUDA 8.0 + OpenCV3.1 + GTX1060

[错误]:

OpenCV 错误:GPU API 调用(遇到非法内存访问) 在linesAccum_gpu中,文件/home/weiran/DB_OpenCV/opencv-3.1.0/modules/cudaimgproc/src/cuda/hough_lines.cu, 第 143 行 OpenCV 错误:GPU API 调用(非法内存访问是 遇到)在linesAccum_gpu,文件/home/weiran/DB_OpenCV/opencv-3.1.0/modules/cudaimgproc/src/cuda/hough_lines.cu, 143路 递归调用终止

[代码]:

#include <cmath>
#include <iostream>
#include "opencv2/core.hpp"
#include <opencv2/core/utility.hpp>
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"
#include "pthread.h"
using namespace std;
using namespace cv;
using namespace cv::cuda;
#define PI 3.1415926
#define NUM_LOOP 500
void carReviseInterface_gpu(Mat src, Mat &dst)
{
// Canny
Mat contours;
cuda::GpuMat d_src(src);
//CannyDetect(src, contours);
cuda::GpuMat gray_src;
cv::cuda::cvtColor(d_src, gray_src, COLOR_BGR2GRAY);
cuda::GpuMat d_contours;
Ptr<cuda::CannyEdgeDetector> cuCanny = cuda::createCannyEdgeDetector(74, 147);
cuCanny->detect(gray_src, d_contours);///
//d_contours.download(contours);
//imshow("1 gpu canny", contours);
// 
//std::vector<cv::Vec4i> lines;
//HoughLineDetect(src, lines);
///////////////////
//GpuMat d_src(contours);
GpuMat d_lines;
{
const int64 start = getTickCount();
Ptr<cuda::HoughSegmentDetector> hough = cuda::createHoughSegmentDetector(1.0f, (float)(CV_PI / 180.0f), 129, 20);
hough->detect(d_contours, d_lines);        // d_src
const double timeSec = (getTickCount() - start) / getTickFrequency();
//cout << "GPU Time : " << timeSec * 1000 << " ms" << endl;
//cout << "GPU Found : " << d_lines.cols << endl;
}
vector<Vec4i> lines_gpu;
if (!d_lines.empty())
{
lines_gpu.resize(d_lines.cols);
Mat h_lines(1, d_lines.cols, CV_32SC4, &lines_gpu[0]);
d_lines.download(h_lines);
}
//for (size_t i = 0; i < lines_gpu.size(); ++i)
//{
//        Vec4i l = lines_gpu[i];
//        line(src, Point(l[0], l[1]), Point(l[2], l[3]), Scalar(0, 0, 255), 3, LINE_AA);
//}
//imshow("2 gpu hough ", src);

}

void *threadFun(void *arg)
{
Mat src_gpu = imread("/home/weiran/DB_Multimedia/plate2.png");//plate2.png
Mat res, res_gpu;
for (int i = 0; i < NUM_LOOP; ++i) {
carReviseInterface_gpu(src_gpu, res_gpu);
}
}
int main()
{
pthread_t pth[5];
memset(&pth, 0, sizeof(pth));
Mat src = imread("/home/weiran/DB_Multimedia/plate2.png");//plate2.png
Mat res, res_gpu;
Mat src_gpu = src.clone();
imshow("0 src", src);
cv::Size size;
size.width = 320;
size.height = 240;

const int64 start2 = getTickCount();
for (int i = 0; i < 2; ++i)
{
pthread_create(&pth[i], NULL, threadFun, NULL);
}
//        for (int i = 0; i < NUM_LOOP; ++i)
//        {
//                carReviseInterface_gpu(src_gpu, res_gpu);
//                //cuResize(src_gpu, res_gpu, size);
//        }
const double timeSec2 = (getTickCount() - start2) / getTickFrequency();
cout << "GPU Time : " << (timeSec2 * 1000) / NUM_LOOP << " ms" << endl;

imshow("8 cpu src", src);
//        imshow("9 cpu res", res);
imshow("8 gpu src", src_gpu);
//        imshow("9 gpu res", res_gpu);

waitKey(0);
return 0;
}

我该怎么办? 提前感谢任何帮助~

OpenCV 中多线程的关键是仅使用采用流参数的 GPU 函数。

cv::cuda::Stream myStream;
gpuImage.download(cpuImage, myStream);

不接收流的 OpenCV cuda 函数在从多个线程访问时可能会出现缓冲区覆盖问题。

最新更新