我使用OpenCV和Visual Studio 2012编译了这个简单的颜色跟踪图像处理程序。首先我用CPU编译它。程序:
#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <time.h>
using namespace cv;
using namespace std;
int main( int argc, char** argv )
{
time_t t= time(0);
VideoCapture cap(0); //capture the video from web cam
if ( !cap.isOpened() ) // if not success, exit program
{
cout << "Cannot open the web cam" << endl;
return -1;
}
double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Frame size : " << dWidth << " x " << dHeight << endl;
namedWindow("Control", CV_WINDOW_AUTOSIZE); //create a window called "Control"
int iLowH = 0;
int iHighH = 179;
int iLowS = 0;
int iHighS = 255;
int iLowV = 0;
int iHighV = 255;
//Create track bars in "Control" window
cvCreateTrackbar("LowH", "Control", &iLowH, 179); //Hue (0 - 179)
cvCreateTrackbar("HighH", "Control", &iHighH, 179);
cvCreateTrackbar("LowS", "Control", &iLowS, 255); //Saturation (0 - 255)
cvCreateTrackbar("HighS", "Control", &iHighS, 255);
cvCreateTrackbar("LowV", "Control", &iLowV, 255); //Value (0 - 255)
cvCreateTrackbar("HighV", "Control", &iHighV, 255);
int fps=0;
int cur=0;
while (true)
{
fps++;
t=time(0);
struct tm *tmp = gmtime(&t);
int h= (t/360) %24;
int m= (t/60) %60;
int s = t%60;
if(cur !=s)
{
cout<<fps<<endl;
fps=0;
cur=s;
}
Mat imgOriginal;
bool bSuccess = cap.read(imgOriginal); // read a new frame from video
if (!bSuccess) //if not success, break loop
{
cout << "Cannot read a frame from video stream" << endl;
break;
}
Mat imgHSV;
cvtColor(imgOriginal, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
Mat imgThresholded;
inRange(imgHSV, Scalar(iLowH, iLowS, iLowV), Scalar(iHighH, iHighS, iHighV), imgThresholded); //Threshold the image
//morphological opening (remove small objects from the foreground)
erode(imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
dilate( imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
//morphological closing (fill small holes in the foreground)
dilate( imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
erode(imgThresholded, imgThresholded, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
imshow("Thresholded Image", imgThresholded); //show the thresholded image
imshow("Original", imgOriginal); //show the original image
if (waitKey(30) == 27) //wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
{
cout << "esc key is pressed by user" << endl;
break;
}
}
return 0;
}
"我的相机"的帧速率为16。然后我使用OpenCL(GPU支持)编译了这个程序。程序:
#include <iostream>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include <opencv2/ocl/ocl.hpp>
#include <time.h>
using namespace cv;
using namespace std;
int main( int argc, char** argv )
{
time_t t= time(0);
VideoCapture cap(0); //capture the video from web cam
if ( !cap.isOpened() ) // if not success, exit program
{
cout << "Cannot open the web cam" << endl;
return -1;
}
double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
cout << "Frame size : " << dWidth << " x " << dHeight << endl;
namedWindow("Control", CV_WINDOW_AUTOSIZE); //create a window called "Control"
int iLowH = 0;
int iHighH = 179;
int iLowS = 0;
int iHighS = 255;
int iLowV = 0;
int iHighV = 255;
//Create track bars in "Control" window
cvCreateTrackbar("LowH", "Control", &iLowH, 179); //Hue (0 - 179)
cvCreateTrackbar("HighH", "Control", &iHighH, 179);
cvCreateTrackbar("LowS", "Control", &iLowS, 255); //Saturation (0 - 255)
cvCreateTrackbar("HighS", "Control", &iHighS, 255);
cvCreateTrackbar("LowV", "Control", &iLowV, 255); //Value (0 - 255)
cvCreateTrackbar("HighV", "Control", &iHighV, 255);
int fps=0;
int cur=0;
while (true)
{
fps++;
t=time(0);
struct tm *tmp = gmtime(&t);
int h= (t/360) %24;
int m= (t/60) %60;
int s = t%60;
if(cur !=s)
{
cout<<fps<<endl;
fps=0;
cur=s;
}
Mat imgOriginal;
bool bSuccess = cap.read(imgOriginal); // read a new frame from video
if (!bSuccess) //if not success, break loop
{
cout << "Cannot read a frame from video stream" << endl;
break;
}
Mat imgHSV;
cvtColor(imgOriginal, imgHSV, COLOR_BGR2HSV); //Convert the captured frame from BGR to HSV
Mat imgThresholded;
inRange(imgHSV, Scalar(iLowH, iLowS, iLowV), Scalar(iHighH, iHighS, iHighV), imgThresholded); //Threshold the image
//morphological opening (remove small objects from the foreground)
ocl::oclMat alpha(imgThresholded);
ocl::erode(alpha,alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
ocl::dilate( alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
//morphological closing (fill small holes in the foreground)
ocl::dilate( alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
ocl::erode(alpha, alpha, getStructuringElement(MORPH_ELLIPSE, Size(5, 5)) );
imgThresholded = Mat(alpha);
imshow("Thresholded Image", imgThresholded); //show the thresholded image
imshow("Original", imgOriginal); //show the original image
if (waitKey(30) == 27) //wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
{
cout << "esc key is pressed by user" << endl;
break;
}
}
return 0;
}
但现在我的fps=10。请有人告诉我为什么会发生这种事。我在某个地方读到GPU支持提高了fps性能。我使用的显卡是AMD RAEDON。
GPU是为大吞吐量而设计的,但将数据从CPU内存移动到GPU内存需要花费大量时间。你不应该认为GPU总是在增加fps。这一切都取决于GPU的力量收获得有多好。
在你的情况下,你似乎对每一帧都做了很少的工作。所以我的猜测是,你的系统大部分时间都在使用将帧移到GPU并将结果移回来的方法。
(正如maZZZu所评论的)
您正在进行串行计算。添加流水线。然后,当一帧被捕获时,opencl同时计算最后一帧。你可以重叠更多的步骤,比如:
- 获取视频数据
- 复制到gpu
- 计算
- 进入cpu
- 可视化
则在FPS上将仅可见最大的耗时步骤。如果复制到gpu需要20ms,那么其他程序将被隐藏,程序将显示50FPS。
- Time 1: get video data 1
- (Time 2: get video data 2) and (copy data 1 to gpu)
- (Time 3: get video data 3) and (copy data 2 to gpu) and (compute data 1)
- (Time 4: get video data 4) and (copy data 3 to gpu) and (compute data 2) and ..
- (Time 5: get video data 5) and (copy data 4 to gpu) and (compute data 3) and ..
- (Time 6: get video data 6) and (copy data 5 to gpu) and (compute data 4) and ..
- (Time 7: get video data 8) and (copy data 6 to gpu) and (compute data 5) and ..
所以,如果复制到gpu需要花费%45的时间,而返回结果需要花费%45%的时间,那么FPS应该会增加%90,只需将其中一个隐藏在另一个之后。