使用OpenCL和GPU并不能提高相机的fps性能



我使用OpenCV和Visual Studio 2012编译了这个简单的颜色跟踪图像处理程序。首先我用CPU编译它。程序:

#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <time.h>
using namespace cv;
using namespace std;
 int main( int argc, char** argv )
 {
    time_t t= time(0);
    VideoCapture cap(0); //capture the video from web cam
    if ( !cap.isOpened() )  // if not success, exit program
    {
         cout << "Cannot open the web cam" << endl;
         return -1;
    }
     double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
   double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
    cout << "Frame size : " << dWidth << " x " << dHeight << endl;
    namedWindow("Control", CV_WINDOW_AUTOSIZE); //create a window called "Control"
  int iLowH = 0;
 int iHighH = 179;
  int iLowS = 0; 
 int iHighS = 255;
  int iLowV = 0;
 int iHighV = 255;
  //Create track bars in "Control" window
 cvCreateTrackbar("LowH", "Control", &iLowH, 179); //Hue (0 - 179)
 cvCreateTrackbar("HighH", "Control", &iHighH, 179);
  cvCreateTrackbar("LowS", "Control", &iLowS, 255); //Saturation (0 - 255)
 cvCreateTrackbar("HighS", "Control", &iHighS, 255);
  cvCreateTrackbar("LowV", "Control", &iLowV, 255); //Value (0 - 255)
 cvCreateTrackbar("HighV", "Control", &iHighV, 255);
    int fps=0;
    int cur=0;
    while (true)
    {
    fps++;
        t=time(0);
        struct tm *tmp = gmtime(&t);
        int h= (t/360) %24;
        int m= (t/60) %60;
        int s = t%60;
        if(cur !=s)
        {
            cout<<fps<<endl;
            fps=0;
            cur=s;
        }
        Mat imgOriginal;
        bool bSuccess = cap.read(imgOriginal); // read a new frame from video
         if (!bSuccess) //if not success, break loop
        {
             cout << "Cannot read a frame from video stream" << endl;
             break;
        }
    Mat imgHSV;
   cvtColor(imgOriginal, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
  Mat imgThresholded;
   inRange(imgHSV, Scalar(iLowH, iLowS, iLowV), Scalar(iHighH, iHighS, iHighV), imgThresholded); //Threshold the image
  //morphological opening (remove small objects from the foreground)
  erode(imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
  dilate( imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) ); 
   //morphological closing (fill small holes in the foreground)
  dilate( imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) ); 
  erode(imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
   imshow("Thresholded Image", imgThresholded); //show the thresholded image
  imshow("Original", imgOriginal); //show the original image
        if (waitKey(30) == 27) //wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
       {
            cout << "esc key is pressed by user" << endl;
            break; 
       }
    }
   return 0;
}

"我的相机"的帧速率为16。然后我使用OpenCL(GPU支持)编译了这个程序。程序:

#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2/ocl/ocl.hpp>
#include <time.h>
using namespace cv;
using namespace std;
 int main( int argc, char** argv )
 {
    time_t t= time(0);
    VideoCapture cap(0); //capture the video from web cam
    if ( !cap.isOpened() )  // if not success, exit program
    {
         cout << "Cannot open the web cam" << endl;
         return -1;
    }
     double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
   double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
    cout << "Frame size : " << dWidth << " x " << dHeight << endl;
    namedWindow("Control", CV_WINDOW_AUTOSIZE); //create a window called "Control"
  int iLowH = 0;
 int iHighH = 179;
  int iLowS = 0; 
 int iHighS = 255;
  int iLowV = 0;
 int iHighV = 255;
  //Create track bars in "Control" window
 cvCreateTrackbar("LowH", "Control", &iLowH, 179); //Hue (0 - 179)
 cvCreateTrackbar("HighH", "Control", &iHighH, 179);
  cvCreateTrackbar("LowS", "Control", &iLowS, 255); //Saturation (0 - 255)
 cvCreateTrackbar("HighS", "Control", &iHighS, 255);
  cvCreateTrackbar("LowV", "Control", &iLowV, 255); //Value (0 - 255)
 cvCreateTrackbar("HighV", "Control", &iHighV, 255);
    int fps=0;
    int cur=0;
    while (true)
    {
    fps++;
        t=time(0);
        struct tm *tmp = gmtime(&t);
        int h= (t/360) %24;
        int m= (t/60) %60;
        int s = t%60;
        if(cur !=s)
        {
            cout<<fps<<endl;
            fps=0;
            cur=s;
        }
        Mat imgOriginal;
        bool bSuccess = cap.read(imgOriginal); // read a new frame from video
         if (!bSuccess) //if not success, break loop
        {
             cout << "Cannot read a frame from video stream" << endl;
             break;
        }
    Mat imgHSV;
   cvtColor(imgOriginal, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
  Mat imgThresholded;
   inRange(imgHSV, Scalar(iLowH, iLowS, iLowV), Scalar(iHighH, iHighS, iHighV), imgThresholded); //Threshold the image
  //morphological opening (remove small objects from the foreground)
    ocl::oclMat alpha(imgThresholded);
    ocl::erode(alpha,alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
    ocl::dilate( alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) ); 
   //morphological closing (fill small holes in the foreground)
    ocl::dilate( alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) ); 
    ocl::erode(alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
    imgThresholded = Mat(alpha);
   imshow("Thresholded Image", imgThresholded); //show the thresholded image
  imshow("Original", imgOriginal); //show the original image
        if (waitKey(30) == 27) //wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
       {
            cout << "esc key is pressed by user" << endl;
            break; 
       }
    }
   return 0;
}

但现在我的fps=10。请有人告诉我为什么会发生这种事。我在某个地方读到GPU支持提高了fps性能。我使用的显卡是AMD RAEDON。

GPU是为大吞吐量而设计的,但将数据从CPU内存移动到GPU内存需要花费大量时间。你不应该认为GPU总是在增加fps。这一切都取决于GPU的力量收获得有多好。

在你的情况下,你似乎对每一帧都做了很少的工作。所以我的猜测是,你的系统大部分时间都在使用将帧移到GPU并将结果移回来的方法。

(正如maZZZu所评论的)

您正在进行串行计算。添加流水线。然后,当一帧被捕获时,opencl同时计算最后一帧。你可以重叠更多的步骤,比如:

  • 获取视频数据
  • 复制到gpu
  • 计算
  • 进入cpu
  • 可视化

则在FPS上将仅可见最大的耗时步骤。如果复制到gpu需要20ms,那么其他程序将被隐藏,程序将显示50FPS。

- Time 1: get video data 1
- (Time 2: get video data 2) and (copy data 1 to gpu)
- (Time 3: get video data 3) and (copy data 2 to gpu) and (compute data 1)
- (Time 4: get video data 4) and (copy data 3 to gpu) and (compute data 2) and ..
- (Time 5: get video data 5) and (copy data 4 to gpu) and (compute data 3) and ..
- (Time 6: get video data 6) and (copy data 5 to gpu) and (compute data 4) and ..
- (Time 7: get video data 8) and (copy data 6 to gpu) and (compute data 5) and ..

所以,如果复制到gpu需要花费%45的时间,而返回结果需要花费%45%的时间,那么FPS应该会增加%90,只需将其中一个隐藏在另一个之后。

最新更新