MOSSE跟踪算法源码解析
MOSSE算法源碼簡單解析
?
源碼
如下:
// This file ispart of the OpenCV project. // It is subject to the license terms in the LICENSEfile found in the top-level directory // of this distribution and athttp://opencv.org/license.html. // //[1] David S. Bolme et al. "Visual Object Trackingusing Adaptive Correlation Filters" // http://www.cs.colostate.edu/~draper/papers/bolme_cvpr10.pdf // // // credits: // Kun-Hsin Chen: for initial c++ code // Cracki: for the idea of only converting the usedpatch to gray // #include "opencv2/tracking.hpp" namespace cv { namespace tracking { struct DummyModel :TrackerModel { virtual void modelUpdateImpl(){} virtual void modelEstimationImpl( const std::vector<Mat>& ){} }; const double eps=0.00001; // fornormalization const double rate=0.2; //learning rate const double psrThreshold=5.7; //no detection, if PSR is smaller than this struct MosseImpl :TrackerMOSSE { protected: Point2d center;//center of the bounding box Size size; //size ofthe bounding box Mat hanWin; Mat G; //goal Mat H, A,B; //state// Element-wisedivision of complex numbers in src1 and src2 Mat divDFTs( const Mat &src1, const Mat &src2 ) const { Mat c1[2],c2[2],a1,a2,s1,s2,denom,re,im; // split into re and im per src cv::split(src1, c1); cv::split(src2, c2); // (Re2*Re2 + Im2*Im2) = denom // denom is same forboth channels cv::multiply(c2[0], c2[0], s1); cv::multiply(c2[1], c2[1], s2); cv::add(s1,s2, denom); // (Re1*Re2 + Im1*Im1)/(Re2*Re2 + Im2*Im2) = Re cv::multiply(c1[0], c2[0], a1); cv::multiply(c1[1], c2[1], a2); cv::divide(a1+a2, denom, re, 1.0 ); // (Im1*Re2 - Re1*Im2)/(Re2*Re2 + Im2*Im2) = Im cv::multiply(c1[1], c2[0], a1); cv::multiply(c1[0], c2[1], a2); cv::divide(a1+a2, denom, im, -1.0); // Merge Re and Im back into a complex matrix Mat dst,chn[] = {re,im}; cv::merge(chn, 2, dst); return dst; } void preProcess( Mat &window) const { window.convertTo(window, CV_32F); log(window+ 1.0f, window); //normalize Scalarmean,StdDev; meanStdDev(window, mean, StdDev); window =(window-mean[0]) /(StdDev[0]+eps); //Gaussain weighting window =window.mul(hanWin); } double correlate( const Mat &image_sub, Point&delta_xy ) const//計算相對位移 { MatIMAGE_SUB, RESPONSE, response; // filter in dft space dft(image_sub, IMAGE_SUB, DFT_COMPLEX_OUTPUT); mulSpectrums(IMAGE_SUB, H, RESPONSE, 0, true ); idft(RESPONSE, response, DFT_SCALE|DFT_REAL_OUTPUT); // update center position double maxVal; Point maxLoc; minMaxLoc(response, 0, &maxVal, 0, &maxLoc); delta_xy.x= maxLoc.x - int(response.size().width/2); delta_xy.y= maxLoc.y - int(response.size().height/2); // normalize response Scalarmean,std; meanStdDev(response, mean, std); return (maxVal-mean[0]) / (std[0]+eps); // PSR } Mat randWarp( const Mat& a ) const { cv::RNGrng(8031965); // random rotation double C=0.1; double ang = rng.uniform(-C,C); double c=cos(ang), s=sin(ang); // affine warp matrix Mat_<float> W(2,3); W <<c + rng.uniform(-C,C), -s + rng.uniform(-C,C), 0, s +rng.uniform(-C,C), c +rng.uniform(-C,C), 0;// random translation Mat_<float> center_warp(2, 1); center_warp << a.cols/2, a.rows/2; W.col(2) = center_warp - (W.colRange(0, 2))*center_warp; Mat warped;warpAffine(a, warped, W, a.size(), BORDER_REFLECT); return warped; } virtual bool initImpl( const Mat& image, const Rect2d& boundingBox ){ model =makePtr<DummyModel>(); Mat img; if (image.channels() == 1) img =image; else cvtColor(image, img, COLOR_BGR2GRAY); int w = getOptimalDFTSize(int(boundingBox.width)); int h = getOptimalDFTSize(int(boundingBox.height)); //Get the center position int x1 = int(floor((2*boundingBox.x+boundingBox.width-w)/2)); int y1 = int(floor((2*boundingBox.y+boundingBox.height-h)/2)); center.x =x1 + (w)/2; center.y =y1 + (h)/2; size.width= w; size.height= h; Mat window;getRectSubPix(img, size, center, window); createHanningWindow(hanWin, size, CV_32F); // goal Matg=Mat::zeros(size,CV_32F); g.at<float>(h/2, w/2) = 1;GaussianBlur(g, g, Size(-1,-1),2.0); double maxVal; minMaxLoc(g, 0,&maxVal); g = g /maxVal; dft(g, G,DFT_COMPLEX_OUTPUT); // initial A,B and H A =Mat::zeros(G.size(), G.type()); B =Mat::zeros(G.size(), G.type()); for(int i=0; i<8; i++) { Matwindow_warp = randWarp(window); preProcess(window_warp); MatWINDOW_WARP, A_i, B_i; dft(window_warp, WINDOW_WARP, DFT_COMPLEX_OUTPUT); mulSpectrums(G ,WINDOW_WARP, A_i, 0, true); mulSpectrums(WINDOW_WARP, WINDOW_WARP, B_i, 0, true); A+=A_i;B+=B_i;} H =divDFTs(A,B); return true; } virtual bool updateImpl( const Mat& image,Rect2d& boundingBox ) { if (H.empty()) // not initialized return false; Matimage_sub; getRectSubPix(image, size, center, image_sub); if (image_sub.channels() != 1) cvtColor(image_sub, image_sub, COLOR_BGR2GRAY); preProcess(image_sub); Pointdelta_xy; double PSR =correlate(image_sub, delta_xy); if (PSR < psrThreshold) return false; //update location center.x +=delta_xy.x; center.y +=delta_xy.y; Matimg_sub_new; getRectSubPix(image, size, center, img_sub_new); if (img_sub_new.channels() !=1) cvtColor(img_sub_new, img_sub_new, COLOR_BGR2GRAY); preProcess(img_sub_new); // new state for A and B Mat F, A_new,B_new; dft(img_sub_new, F, DFT_COMPLEX_OUTPUT); mulSpectrums(G, F, A_new, 0, true ); mulSpectrums(F, F, B_new, 0, true ); // update A ,B, and H A = A*(1-rate) + A_new*rate; B = B*(1-rate) + B_new*rate; H =divDFTs(A, B); // return tracked rect double x=center.x, y=center.y; int w = size.width,h=size.height; boundingBox= Rect2d(Point2d(x-0.5*w,y-0.5*h), Point2d(x+0.5*w, y+0.5*h)); return true; } public: MosseImpl() {isInit = 0; } // dummy implementation. virtual void read( const FileNode& ){} virtual void write( FileStorage& ) const{} }; // MosseImpl } // tracking Ptr<TrackerMOSSE> TrackerMOSSE::create() { returnmakePtr<tracking::MosseImpl>(); } } // cv各子函數作用
1:divDFTs
函數完成兩個復數Mat相除的計算,即:
?
$$(src1/src2).re=(src1.re^2+src2.re^2)/(src2.re^2+src2.im^2);\\(src1/src2).im=(-src1.re*src2.im+src2.re*src1.im)/(src2.re^2+src2.im^2);\qquad\text{(undefined)}$$
?
2:preProcess
是對圖像預處理,MOSSE原文講到:? One issue with the FFTconvolution algorithm is that the image and the filter are mapped to thetopological structure of a torus. In other words, it connects the left edge ofthe image to the right edge, and the top to the bottom. During convolution, theimages rotate through the toroidal space instead of translating as they wouldin the spatial domain. Artificially connecting the boundaries of the imageintroducesan artifact which effects the correlation output.
?This effect is reduced by following thepreprocessing steps outlined in [3]. First, the pixel values are transformedusing a log function which helps with low contrast lighting situations. Thepixel values are normalized to have a mean value of 0:0 and a norm of 1:0.Finally, the image is multiplied by a cosine window which gradually reduces thepixel values near the edge to zero. This also has the benefit that it puts moreemphasis near the center of the target.
原始像素值通過一個對數函數轉換,這將有助于低對比度照明的情況。這個原始的像素值就被規范化為0.0和1.0。最終,圖像乘上一個余弦窗口將是靠近圖像邊緣的像素值接近于零。這同樣有利于突出靠近中心的目標。
3:correlate
此函數先對當前幀用上一幀得到的濾波器模版H去卷積得到響應Response,對Response進行反傅里葉變換后找到峰值,更新峰值點(目標所在點),返回峰值旁瓣比,若峰值旁瓣比小代表未能檢測出目標。
4:randWarp
此函數對圖像進行隨機仿射變換。
5:initImpl
此函數是初始化A,B,H的值,先是將圖像轉化為灰度圖,然后以目標框(boundingbox)的中心作傅里葉變換,目標框的初始傅里葉變換值G是將中間值設為1然后作高斯濾波得到,之后就多次帶入如下公式對A,B初始化,初始化過程中學習速率η為1
?
6:updateImpl
先是找到峰值作為新的中心點,然后按照上個公式更新A,B,H。
原文
http://www.cs.colostate.edu/~draper/papers/bolme_cvpr10.pdf
?
總結
以上是生活随笔為你收集整理的MOSSE跟踪算法源码解析的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 复盘二进制的习题(1)
- 下一篇: ShowMsg函数妙用