/* Original code has been submitted by Liu Liu. Here is the copyright. ---------------------------------------------------------------------------------- * An OpenCV Implementation of SURF * Further Information Refer to "SURF: Speed-Up Robust Feature" * Author: Liu Liu * liuliu.1987+opencv@gmail.com * * There are still serveral lacks for this experimental implementation: * 1.The interpolation of sub-pixel mentioned in article was not implemented yet; * 2.A comparision with original libSurf.so shows that the hessian detector is not a 100% match to their implementation; * 3.Due to above reasons, I recommanded the original one for study and reuse; * * However, the speed of this implementation is something comparable to original one. * * Copyright© 2008, Liu Liu All rights reserved. * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * The name of Contributor may not be used to endorse or * promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. */ /* The following changes have been made, comparing to the original contribution: 1. A lot of small optimizations, less memory allocations, got rid of global buffers 2. Reversed order of cvGetQuadrangleSubPix and cvResize calls; probably less accurate, but much faster 3. The descriptor computing part (which is most expensive) is threaded using OpenMP (subpixel-accurate keypoint localization and scale estimation are still TBD) */ #include "_cv.h" CvSURFParams cvSURFParams(double threshold, int extended) { CvSURFParams params; params.hessianThreshold = threshold; params.extended = extended; params.nOctaves = 3; params.nOctaveLayers = 4; return params; } struct CvSurfHF { int p0, p1, p2, p3; float w; }; CV_INLINE float icvCalcHaarPattern( const int* origin, const CvSurfHF* f, int n ) { double d = 0; for( int k = 0; k < n; k++ ) d += (origin[f[k].p0] + origin[f[k].p3] - origin[f[k].p1] - origin[f[k].p2])*f[k].w; return (float)d; } static void icvResizeHaarPattern( const int src[][5], CvSurfHF* dst, int n, int oldSize, int newSize, int widthStep ) { for( int k = 0; k < n; k++ ) { int dx1 = src[k][0]*newSize/oldSize; int dy1 = src[k][1]*newSize/oldSize; int dx2 = src[k][2]*newSize/oldSize; int dy2 = src[k][3]*newSize/oldSize; dst[k].p0 = dy1*widthStep + dx1; dst[k].p1 = dy2*widthStep + dx1; dst[k].p2 = dy1*widthStep + dx2; dst[k].p3 = dy2*widthStep + dx2; dst[k].w = src[k][4]/((float)(dx2-dx1)*(dy2-dy1)); } } static CvSeq* icvFastHessianDetector( const CvMat* sum, const CvMat* mask_sum, CvMemStorage* storage, const CvSURFParams* params ) { CvSeq* points = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvSURFPoint), storage ); int totalLayers = params->nOctaves*(params->nOctaveLayers+2); CvMat** hessians = (CvMat**)cvStackAlloc(totalLayers*sizeof(hessians[0])); CvMat** traces = (CvMat**)cvStackAlloc(totalLayers*sizeof(traces[0])); int size, *sizeCache = (int*)cvStackAlloc(totalLayers*sizeof(sizeCache[0])); int scale, *scaleCache = (int*)cvStackAlloc(totalLayers*sizeof(scaleCache[0])); const int NX=3, NY=3, NXY=4, SIZE0=9; int dx_s[NX][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} }; int dy_s[NY][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} }; int dxy_s[NXY][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} }; int dm[1][5] = { {0, 0, 9, 9, 1} }; CvSurfHF Dx[NX], Dy[NY], Dxy[NXY], Dm; double dx = 0, dy = 0, dxy = 0; int hessian_rows, hessian_cols; int octave, sc; int i, j, k, z; int* xofs = (int*)cvStackAlloc(sum->cols*sizeof(xofs[0])); /* hessian detector */ for( octave = k = 0; octave < params->nOctaves; octave++ ) { for( sc = -1; sc <= params->nOctaveLayers; sc++, k++ ) { if ( sc < 0 ) sizeCache[k] = size = 7 << octave; // gaussian scale 1.0; else sizeCache[k] = size = (sc*6 + 9) << octave; // gaussian scale size*1.2/9.; scaleCache[k] = scale = MAX(size, SIZE0); hessian_rows = (sum->rows)*SIZE0/scale; hessian_cols = (sum->cols)*SIZE0/scale; hessians[k] = cvCreateMat( hessian_rows, hessian_cols, CV_32FC1 ); traces[k] = cvCreateMat( hessian_rows, hessian_cols, CV_32FC1 ); icvResizeHaarPattern( dx_s, Dx, NX, SIZE0, size, sum->cols ); icvResizeHaarPattern( dy_s, Dy, NY, SIZE0, size, sum->cols ); icvResizeHaarPattern( dxy_s, Dxy, NXY, SIZE0, size, sum->cols ); for( i = 0; i < NXY; i++ ) Dxy[i].w *= 0.9f; float* hessian = hessians[k]->data.fl; float* trace = traces[k]->data.fl; for( i = 0; i < hessian_cols*(SIZE0/2); i++ ) hessian[i] = hessian[hessian_cols*hessian_rows-1-i] = trace[i] = trace[hessian_cols*hessian_rows-1-i] = 0.f; hessian += (SIZE0/2)*(hessian_cols + 1); trace += (SIZE0/2)*(hessian_cols + 1); for( j = 0; j <= hessian_cols - SIZE0; j++ ) xofs[j] = j*scale/SIZE0; for( i = 0; i < hessian_rows - SIZE0; i++, trace += hessian_cols, hessian += hessian_cols ) { const int* sum_ptr = sum->data.i + sum->cols*(i*scale/SIZE0); for( j = 0; j < SIZE0/2; j++ ) hessian[-j-1] = hessian[hessian_cols - SIZE0 + j] = trace[-j-1] = trace[hessian_cols - SIZE0 + j] = 0.f; for( j = 0; j <= hessian_cols - SIZE0; j++ ) { const int* s = sum_ptr + xofs[j]; dx = (s[Dx[0].p0] + s[Dx[0].p3] - s[Dx[0].p1] - s[Dx[0].p2])*Dx[0].w + (s[Dx[1].p0] + s[Dx[1].p3] - s[Dx[1].p1] - s[Dx[1].p2])*Dx[1].w + (s[Dx[2].p0] + s[Dx[2].p3] - s[Dx[2].p1] - s[Dx[2].p2])*Dx[2].w; dy = (s[Dy[0].p0] + s[Dy[0].p3] - s[Dy[0].p1] - s[Dy[0].p2])*Dy[0].w + (s[Dy[1].p0] + s[Dy[1].p3] - s[Dy[1].p1] - s[Dy[1].p2])*Dy[1].w + (s[Dy[2].p0] + s[Dy[2].p3] - s[Dy[2].p1] - s[Dy[2].p2])*Dy[2].w; dxy = (s[Dxy[0].p0] + s[Dxy[0].p3] - s[Dxy[0].p1] - s[Dxy[0].p2])*Dxy[0].w + (s[Dxy[1].p0] + s[Dxy[1].p3] - s[Dxy[1].p1] - s[Dxy[1].p2])*Dxy[1].w + (s[Dxy[2].p0] + s[Dxy[2].p3] - s[Dxy[2].p1] - s[Dxy[2].p2])*Dxy[2].w + (s[Dxy[3].p0] + s[Dxy[3].p3] - s[Dxy[3].p1] - s[Dxy[3].p2])*Dxy[3].w; hessian[j] = (float)(dx*dy - dxy*dxy); trace[j] = (float)(dx + dy); } } } } for( octave = 0, k = 1; octave < params->nOctaves; octave++, k+=2 ) { for( sc = 0; sc < params->nOctaveLayers; sc++, k++ ) { size = sizeCache[k]; scale = scaleCache[k]; hessian_rows = hessians[k]->rows; hessian_cols = hessians[k]->cols; icvResizeHaarPattern( dm, &Dm, 1, SIZE0, size, mask_sum ? mask_sum->cols : sum->cols ); int margin = 5*scaleCache[k+1]/scale; for( i = margin; i < hessian_rows-margin; i++ ) { const float* hessian = hessians[k]->data.fl + i*hessian_cols; const float* trace = traces[k]->data.fl + i*hessian_cols; for( j = margin; j < hessian_cols-margin; j++ ) { float val0 = hessian[j]; if( val0 > params->hessianThreshold ) { bool suppressed = false; if( mask_sum ) { const int* mask_ptr = mask_sum->data.i + mask_sum->cols*((i-SIZE0/2)*scale/SIZE0) + (j - SIZE0/2)*scale/SIZE0; float mval = icvCalcHaarPattern( mask_ptr, &Dm, 1 ); if( mval < 0.5 ) continue; } /* non-maxima suppression */ for( z = k-1; z < k+2; z++ ) { int hcols_z = hessians[z]->cols; const float* hessian = hessians[z]->data.fl + (j*scale+scaleCache[z]/2)/scaleCache[z]-1 + ((i*scale + scaleCache[z]/2)/scaleCache[z]-1)*hcols_z; if( val0 < hessian[0] || val0 < hessian[1] || val0 < hessian[2] || val0 < hessian[hcols_z] || val0 < hessian[hcols_z+1] || val0 < hessian[hcols_z+2] || val0 < hessian[hcols_z*2] || val0 < hessian[hcols_z*2+1] || val0 < hessian[hcols_z*2+2] ) { suppressed = true; break; } } if( !suppressed ) { double trace_val = trace[j]; CvSURFPoint point = cvSURFPoint( cvPoint2D32f(j*scale/9.f, i*scale/9.f), CV_SIGN(trace_val), sizeCache[k], 0, val0 ); cvSeqPush( points, &point ); } } } } } } for( octave = k = 0; octave < params->nOctaves; octave++ ) for( sc = -1; sc <= params->nOctaveLayers; sc++, k++ ) { cvReleaseMat( &hessians[k] ); cvReleaseMat( &traces[k] ); } return points; } CV_IMPL void cvExtractSURF( const CvArr* _img, const CvArr* _mask, CvSeq** _keypoints, CvSeq** _descriptors, CvMemStorage* storage, CvSURFParams params ) { CvMat *sum = 0, *mask1 = 0, *mask_sum = 0; if( _keypoints ) *_keypoints = 0; if( _descriptors ) *_descriptors = 0; CV_FUNCNAME( "cvExtractSURF" ); __BEGIN__; CvSeq *keypoints, *descriptors = 0; CvMat imghdr, *img = cvGetMat(_img, &imghdr); CvMat maskhdr, *mask = _mask ? cvGetMat(_mask, &maskhdr) : 0; int descriptor_size = params.extended ? 128 : 64; const int descriptor_data_type = CV_32F; const int NX=2, NY=2; const float sqrt_2 = 1.4142135623730950488016887242097f; const int PATCH_SZ = 20; const int RS_PATCH_SZ = 30; // ceil((PATCH_SZ+1)*sqrt_2); int dx_s[NX][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}}; int dy_s[NY][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}}; float G[9] = {0,0,0,0,0,0,0,0,0}; CvMat _G = cvMat(1, 9, CV_32F, G); float DW[PATCH_SZ][PATCH_SZ]; CvMat _DW = cvMat(PATCH_SZ, PATCH_SZ, CV_32F, DW); CvPoint apt[81]; int i, j, k, nangle0 = 0, N; CV_ASSERT( img != 0 && CV_MAT_TYPE(img->type) == CV_8UC1 && (mask == 0 || (CV_ARE_SIZES_EQ(img,mask) && CV_MAT_TYPE(mask->type) == CV_8UC1)) && storage != 0 && params.hessianThreshold >= 0 && params.nOctaves > 0 && params.nOctaveLayers > 0 ); sum = cvCreateMat( img->height+1, img->width+1, CV_32SC1 ); cvIntegral( img, sum ); if( mask ) { mask1 = cvCreateMat( img->height, img->width, CV_8UC1 ); mask_sum = cvCreateMat( img->height+1, img->width+1, CV_32SC1 ); cvMinS( mask, 1, mask1 ); cvIntegral( mask1, mask_sum ); } keypoints = icvFastHessianDetector( sum, mask_sum, storage, ¶ms ); N = keypoints->total; if( _descriptors ) { descriptors = cvCreateSeq( 0, sizeof(CvSeq), descriptor_size*CV_ELEM_SIZE(descriptor_data_type), storage ); cvSeqPushMulti( descriptors, 0, N ); } CvSepFilter::init_gaussian_kernel( &_G, 2.5 ); { const double sigma = 3.3; double c2 = 1./(sigma*sigma*2), gs = 0; for( i = 0; i < PATCH_SZ; i++ ) { for( j = 0; j < PATCH_SZ; j++ ) { double x = j - PATCH_SZ*0.5, y = i - PATCH_SZ*0.5; double val = exp(-(x*x+y*y)*c2); DW[i][j] = (float)val; gs += val; } } cvScale( &_DW, &_DW, 1./gs ); } for( i = -4; i <= 4; i++ ) for( j = -4; j <= 4; j++ ) { if( i*i + j*j <= 16 ) apt[nangle0++] = cvPoint(j,i); } { #ifdef _OPENMP int nthreads = cvGetNumThreads(); #pragma omp parallel for num_threads(nthreads) schedule(dynamic) #endif for( k = 0; k < N; k++ ) { const int* sum_ptr = sum->data.i; int sum_cols = sum->cols; int i, j, kk, x, y, nangle; CvSurfHF dx_t[NX], dy_t[NY]; float X[81], Y[81], angle[81]; uchar PATCH[PATCH_SZ+1][PATCH_SZ+1], RS_PATCH[RS_PATCH_SZ][RS_PATCH_SZ]; float DX[PATCH_SZ][PATCH_SZ], DY[PATCH_SZ][PATCH_SZ]; CvMat _X = cvMat(1, 81, CV_32F, X); CvMat _Y = cvMat(1, 81, CV_32F, Y); CvMat _angle = cvMat(1, 81, CV_32F, angle); CvMat _patch = cvMat(PATCH_SZ+1, PATCH_SZ+1, CV_8U, PATCH); CvMat _rs_patch = cvMat(RS_PATCH_SZ, RS_PATCH_SZ, CV_8U, RS_PATCH); CvMat _src, *src = img; CvSURFPoint* kp = (CvSURFPoint*)cvGetSeqElem( keypoints, k ); CvPoint2D32f center = kp->pt; int size = kp->size; icvResizeHaarPattern( dx_s, dx_t, NX, 9, size, sum->cols ); icvResizeHaarPattern( dy_s, dy_t, NY, 9, size, sum->cols ); CvPoint pt = cvPointFrom32f(center); float* vec; float alpha0, beta0, sz0, scale0; for( kk = 0, nangle = 0; kk < nangle0; kk++ ) { j = apt[kk].x; i = apt[kk].y; int x = pt.x + (j-2)*size/9; int y = pt.y + (i-2)*size/9; const int* ptr; float vx, vy, w; if( (unsigned)y >= (unsigned)sum->rows - size || (unsigned)x >= (unsigned)sum->cols - size ) continue; ptr = sum_ptr + x + y*sum_cols; w = G[i+4]*G[j+4]; vx = icvCalcHaarPattern( ptr, dx_t, NX )*w; vy = icvCalcHaarPattern( ptr, dy_t, NX )*w; X[nangle] = vx; Y[nangle] = vy; nangle++; } _X.cols = _Y.cols = _angle.cols = nangle; cvCartToPolar( &_X, &_Y, 0, &_angle, 1 ); float bestx = 0, besty = 0, descriptor_mod = 0; for( i = 0; i < 360; i += 5 ) { float sumx = 0, sumy = 0, temp_mod; for( j = 0; j < nangle; j++ ) { int d = abs(cvRound(angle[j]) - i); if( d < 60 || d > 300 ) { sumx += X[j]; sumy += Y[j]; } } temp_mod = sumx*sumx + sumy*sumy; if( temp_mod > descriptor_mod ) { descriptor_mod = temp_mod; bestx = sumx; besty = sumy; } } float descriptor_dir = cvFastArctan( besty, bestx ); kp->dir = descriptor_dir; if( !_descriptors ) continue; descriptor_dir *= (float)(CV_PI/180); alpha0 = (float)cos(descriptor_dir); beta0 = (float)sin(descriptor_dir); sz0 = (float)((PATCH_SZ+1)*size*1.2/9.); scale0 = sz0/(PATCH_SZ+1); if( sz0 > (PATCH_SZ+1)*1.5f ) { float rd = (float)(sz0*sqrt_2*0.5); float alpha1 = (alpha0 - beta0)*sqrt_2*0.5f, beta1 = (alpha0 + beta0)*sqrt_2*0.5f; CvRect patch_rect0 = { INT_MAX, INT_MAX, INT_MIN, INT_MIN }, patch_rect, sr_patch_rect; for( i = 0; i < 4; i++ ) { float a, b, r = i < 2 ? rd : -rd; if( i % 2 == 0 ) a = alpha1, b = beta1; else a = -beta1, b = alpha1; float xf = center.x + r*a; float yf = center.y - r*b; x = cvFloor(xf); patch_rect0.x = MIN(patch_rect0.x, x); y = cvFloor(yf); patch_rect0.y = MIN(patch_rect0.y, y); x = cvCeil(xf)+1; patch_rect0.width = MAX(patch_rect0.width, x); y = cvCeil(yf)+1; patch_rect0.height = MAX(patch_rect0.height, y); } patch_rect = patch_rect0; patch_rect.x = MAX(patch_rect.x, 0); patch_rect.y = MAX(patch_rect.y, 0); patch_rect.width = MIN(patch_rect.width, img->width) - patch_rect.x; patch_rect.height = MIN(patch_rect.height, img->height) - patch_rect.y; patch_rect0.width -= patch_rect0.x; patch_rect0.height -= patch_rect0.y; CvMat _src0; float scale = MIN(1.f,MIN((float)RS_PATCH_SZ/patch_rect0.width, (float)RS_PATCH_SZ/patch_rect0.height)); cvGetSubArr( img, &_src0, patch_rect ); sr_patch_rect = cvRect(0,0, RS_PATCH_SZ, RS_PATCH_SZ); sr_patch_rect.width = cvRound(patch_rect.width*scale); sr_patch_rect.height = cvRound(patch_rect.height*scale); src = cvGetSubArr( &_rs_patch, &_src, sr_patch_rect ); cvResize( &_src0, &_src, CV_INTER_AREA ); center.x = RS_PATCH_SZ*0.5f - (patch_rect.x - patch_rect0.x)*scale; center.y = RS_PATCH_SZ*0.5f - (patch_rect.y - patch_rect0.y)*scale; scale0 *= scale; } { float w[] = { alpha0*scale0, beta0*scale0, center.x, -beta0*scale0, alpha0*scale0, center.y }; CvMat W = cvMat(2, 3, CV_32F, w); cvGetQuadrangleSubPix( src, &_patch, &W ); } for( i = 0; i < PATCH_SZ; i++ ) for( j = 0; j < PATCH_SZ; j++ ) { float dw = DW[i][j]; float vx = (PATCH[i][j+1] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i+1][j])*dw; float vy = (PATCH[i+1][j] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i][j+1])*dw; DX[i][j] = vx; DY[i][j] = vy; } vec = (float*)cvGetSeqElem( descriptors, k ); for( kk = 0; kk < (int)(descriptors->elem_size/sizeof(vec[0])); kk++ ) vec[kk] = 0; if( params.extended ) { /* 128-bin descriptor */ for( i = 0; i < 4; i++ ) for( j = 0; j < 4; j++ ) { for( y = i*5; y < i*5+5; y++ ) { for( x = j*5; x < j*5+5; x++ ) { float tx = DX[y][x], ty = DY[y][x]; if( ty >= 0 ) { vec[0] += tx; vec[1] += (float)fabs(tx); } else { vec[2] += tx; vec[3] += (float)fabs(tx); } if ( tx >= 0 ) { vec[4] += ty; vec[5] += (float)fabs(ty); } else { vec[6] += ty; vec[7] += (float)fabs(ty); } } } /* unit vector is essential for contrast invariance */ double normalize = 0; for( kk = 0; kk < 8; kk++ ) normalize += vec[kk]*vec[kk]; normalize = 1./(sqrt(normalize) + DBL_EPSILON); for( kk = 0; kk < 8; kk++ ) vec[kk] = (float)(vec[kk]*normalize); vec += 8; } } else { /* 64-bin descriptor */ for( i = 0; i < 4; i++ ) for( j = 0; j < 4; j++ ) { for( y = i*5; y < i*5+5; y++ ) { for( x = j*5; x < j*5+5; x++ ) { float tx = DX[y][x], ty = DY[y][x]; vec[0] += tx; vec[1] += ty; vec[2] += (float)fabs(tx); vec[3] += (float)fabs(ty); } } double normalize = 0; for( kk = 0; kk < 4; kk++ ) normalize += vec[kk]*vec[kk]; normalize = 1./(sqrt(normalize) + DBL_EPSILON); for( kk = 0; kk < 4; kk++ ) vec[kk] = (float)(vec[kk]*normalize); vec+=4; } } } } if( _keypoints ) *_keypoints = keypoints; if( _descriptors ) *_descriptors = descriptors; __END__; cvReleaseMat( &sum ); cvReleaseMat( &mask1 ); cvReleaseMat( &mask_sum ); }