1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 //
5 // Copyright (c) 2006-2010, Rob Hess <hess@eecs.oregonstate.edu>
6 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
7 // Copyright (C) 2020, Intel Corporation, all rights reserved.
8
9 /**********************************************************************************************\
10 Implementation of SIFT is based on the code from http://blogs.oregonstate.edu/hess/code/sift/
11 Below is the original copyright.
12 Patent US6711293 expired in March 2020.
13
14 // Copyright (c) 2006-2010, Rob Hess <hess@eecs.oregonstate.edu>
15 // All rights reserved.
16
17 // The following patent has been issued for methods embodied in this
18 // software: "Method and apparatus for identifying scale invariant features
19 // in an image and use of same for locating an object in an image," David
20 // G. Lowe, US Patent 6,711,293 (March 23, 2004). Provisional application
21 // filed March 8, 1999. Asignee: The University of British Columbia. For
22 // further details, contact David Lowe (lowe@cs.ubc.ca) or the
23 // University-Industry Liaison Office of the University of British
24 // Columbia.
25
26 // Note that restrictions imposed by this patent (and possibly others)
27 // exist independently of and may be in conflict with the freedoms granted
28 // in this license, which refers to copyright of the program, not patents
29 // for any methods that it implements. Both copyright and patent law must
30 // be obeyed to legally use and redistribute this program and it is not the
31 // purpose of this license to induce you to infringe any patents or other
32 // property right claims or to contest validity of any such claims. If you
33 // redistribute or use the program, then this license merely protects you
34 // from committing copyright infringement. It does not protect you from
35 // committing patent infringement. So, before you do anything with this
36 // program, make sure that you have permission to do so not merely in terms
37 // of copyright, but also in terms of patent law.
38
39 // Please note that this license is not to be understood as a guarantee
40 // either. If you use the program according to this license, but in
41 // conflict with patent law, it does not mean that the licensor will refund
42 // you for any losses that you incur if you are sued for your patent
43 // infringement.
44
45 // Redistribution and use in source and binary forms, with or without
46 // modification, are permitted provided that the following conditions are
47 // met:
48 // * Redistributions of source code must retain the above copyright and
49 // patent notices, this list of conditions and the following
50 // disclaimer.
51 // * Redistributions in binary form must reproduce the above copyright
52 // notice, this list of conditions and the following disclaimer in
53 // the documentation and/or other materials provided with the
54 // distribution.
55 // * Neither the name of Oregon State University nor the names of its
56 // contributors may be used to endorse or promote products derived
57 // from this software without specific prior written permission.
58
59 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
60 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
62 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
63 // HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
64 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
65 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
66 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
67 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
68 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
69 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
70 \**********************************************************************************************/
71
72 #include "precomp.hpp"
73
74 #include <opencv2/core/hal/hal.hpp>
75 #include "opencv2/core/hal/intrin.hpp"
76 #include <opencv2/core/utils/buffer_area.private.hpp>
77
78 namespace cv {
79
80 #if !defined(CV_CPU_DISPATCH_MODE) || !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY)
81 /******************************* Defs and macros *****************************/
82
83 // default width of descriptor histogram array
84 static const int SIFT_DESCR_WIDTH = 4;
85
86 // default number of bins per histogram in descriptor array
87 static const int SIFT_DESCR_HIST_BINS = 8;
88
89 // assumed gaussian blur for input image
90 static const float SIFT_INIT_SIGMA = 0.5f;
91
92 // width of border in which to ignore keypoints
93 static const int SIFT_IMG_BORDER = 5;
94
95 // maximum steps of keypoint interpolation before failure
96 static const int SIFT_MAX_INTERP_STEPS = 5;
97
98 // default number of bins in histogram for orientation assignment
99 static const int SIFT_ORI_HIST_BINS = 36;
100
101 // determines gaussian sigma for orientation assignment
102 static const float SIFT_ORI_SIG_FCTR = 1.5f;
103
104 // determines the radius of the region used in orientation assignment
105 static const float SIFT_ORI_RADIUS = 4.5f; // 3 * SIFT_ORI_SIG_FCTR;
106
107 // orientation magnitude relative to max that results in new feature
108 static const float SIFT_ORI_PEAK_RATIO = 0.8f;
109
110 // determines the size of a single descriptor orientation histogram
111 static const float SIFT_DESCR_SCL_FCTR = 3.f;
112
113 // threshold on magnitude of elements of descriptor vector
114 static const float SIFT_DESCR_MAG_THR = 0.2f;
115
116 // factor used to convert floating-point descriptor to unsigned char
117 static const float SIFT_INT_DESCR_FCTR = 512.f;
118
119 #define DoG_TYPE_SHORT 0
120 #if DoG_TYPE_SHORT
121 // intermediate type used for DoG pyramids
122 typedef short sift_wt;
123 static const int SIFT_FIXPT_SCALE = 48;
124 #else
125 // intermediate type used for DoG pyramids
126 typedef float sift_wt;
127 static const int SIFT_FIXPT_SCALE = 1;
128 #endif
129
130 #endif // definitions and macros
131
132
133 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
134
135 void findScaleSpaceExtrema(
136 int octave,
137 int layer,
138 int threshold,
139 int idx,
140 int step,
141 int cols,
142 int nOctaveLayers,
143 double contrastThreshold,
144 double edgeThreshold,
145 double sigma,
146 const std::vector<Mat>& gauss_pyr,
147 const std::vector<Mat>& dog_pyr,
148 std::vector<KeyPoint>& kpts,
149 const cv::Range& range);
150
151 void calcSIFTDescriptor(
152 const Mat& img, Point2f ptf, float ori, float scl,
153 int d, int n, Mat& dst, int row
154 );
155
156
157 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
158
159 // Computes a gradient orientation histogram at a specified pixel
160 static
calcOrientationHist(const Mat & img,Point pt,int radius,float sigma,float * hist,int n)161 float calcOrientationHist(
162 const Mat& img, Point pt, int radius,
163 float sigma, float* hist, int n
164 )
165 {
166 CV_TRACE_FUNCTION();
167
168 int i, j, k, len = (radius*2+1)*(radius*2+1);
169
170 float expf_scale = -1.f/(2.f * sigma * sigma);
171
172 cv::utils::BufferArea area;
173 float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *temphist = 0;
174 area.allocate(X, len, CV_SIMD_WIDTH);
175 area.allocate(Y, len, CV_SIMD_WIDTH);
176 area.allocate(Ori, len, CV_SIMD_WIDTH);
177 area.allocate(W, len, CV_SIMD_WIDTH);
178 area.allocate(temphist, n+4, CV_SIMD_WIDTH);
179 area.commit();
180 temphist += 2;
181 Mag = X;
182
183 for( i = 0; i < n; i++ )
184 temphist[i] = 0.f;
185
186 for( i = -radius, k = 0; i <= radius; i++ )
187 {
188 int y = pt.y + i;
189 if( y <= 0 || y >= img.rows - 1 )
190 continue;
191 for( j = -radius; j <= radius; j++ )
192 {
193 int x = pt.x + j;
194 if( x <= 0 || x >= img.cols - 1 )
195 continue;
196
197 float dx = (float)(img.at<sift_wt>(y, x+1) - img.at<sift_wt>(y, x-1));
198 float dy = (float)(img.at<sift_wt>(y-1, x) - img.at<sift_wt>(y+1, x));
199
200 X[k] = dx; Y[k] = dy; W[k] = (i*i + j*j)*expf_scale;
201 k++;
202 }
203 }
204
205 len = k;
206
207 // compute gradient values, orientations and the weights over the pixel neighborhood
208 cv::hal::exp32f(W, W, len);
209 cv::hal::fastAtan2(Y, X, Ori, len, true);
210 cv::hal::magnitude32f(X, Y, Mag, len);
211
212 k = 0;
213 #if CV_SIMD
214 const int vecsize = v_float32::nlanes;
215 v_float32 nd360 = vx_setall_f32(n/360.f);
216 v_int32 __n = vx_setall_s32(n);
217 int CV_DECL_ALIGNED(CV_SIMD_WIDTH) bin_buf[vecsize];
218 float CV_DECL_ALIGNED(CV_SIMD_WIDTH) w_mul_mag_buf[vecsize];
219
220 for( ; k <= len - vecsize; k += vecsize )
221 {
222 v_float32 w = vx_load_aligned( W + k );
223 v_float32 mag = vx_load_aligned( Mag + k );
224 v_float32 ori = vx_load_aligned( Ori + k );
225 v_int32 bin = v_round( nd360 * ori );
226
227 bin = v_select(bin >= __n, bin - __n, bin);
228 bin = v_select(bin < vx_setzero_s32(), bin + __n, bin);
229
230 w = w * mag;
231 v_store_aligned(bin_buf, bin);
232 v_store_aligned(w_mul_mag_buf, w);
233 for(int vi = 0; vi < vecsize; vi++)
234 {
235 temphist[bin_buf[vi]] += w_mul_mag_buf[vi];
236 }
237 }
238 #endif
239 for( ; k < len; k++ )
240 {
241 int bin = cvRound((n/360.f)*Ori[k]);
242 if( bin >= n )
243 bin -= n;
244 if( bin < 0 )
245 bin += n;
246 temphist[bin] += W[k]*Mag[k];
247 }
248
249 // smooth the histogram
250 temphist[-1] = temphist[n-1];
251 temphist[-2] = temphist[n-2];
252 temphist[n] = temphist[0];
253 temphist[n+1] = temphist[1];
254
255 i = 0;
256 #if CV_SIMD
257 v_float32 d_1_16 = vx_setall_f32(1.f/16.f);
258 v_float32 d_4_16 = vx_setall_f32(4.f/16.f);
259 v_float32 d_6_16 = vx_setall_f32(6.f/16.f);
260 for( ; i <= n - v_float32::nlanes; i += v_float32::nlanes )
261 {
262 v_float32 tn2 = vx_load_aligned(temphist + i-2);
263 v_float32 tn1 = vx_load(temphist + i-1);
264 v_float32 t0 = vx_load(temphist + i);
265 v_float32 t1 = vx_load(temphist + i+1);
266 v_float32 t2 = vx_load(temphist + i+2);
267 v_float32 _hist = v_fma(tn2 + t2, d_1_16,
268 v_fma(tn1 + t1, d_4_16, t0 * d_6_16));
269 v_store(hist + i, _hist);
270 }
271 #endif
272 for( ; i < n; i++ )
273 {
274 hist[i] = (temphist[i-2] + temphist[i+2])*(1.f/16.f) +
275 (temphist[i-1] + temphist[i+1])*(4.f/16.f) +
276 temphist[i]*(6.f/16.f);
277 }
278
279 float maxval = hist[0];
280 for( i = 1; i < n; i++ )
281 maxval = std::max(maxval, hist[i]);
282
283 return maxval;
284 }
285
286
287 //
288 // Interpolates a scale-space extremum's location and scale to subpixel
289 // accuracy to form an image feature. Rejects features with low contrast.
290 // Based on Section 4 of Lowe's paper.
291 static
adjustLocalExtrema(const std::vector<Mat> & dog_pyr,KeyPoint & kpt,int octv,int & layer,int & r,int & c,int nOctaveLayers,float contrastThreshold,float edgeThreshold,float sigma)292 bool adjustLocalExtrema(
293 const std::vector<Mat>& dog_pyr, KeyPoint& kpt, int octv,
294 int& layer, int& r, int& c, int nOctaveLayers,
295 float contrastThreshold, float edgeThreshold, float sigma
296 )
297 {
298 CV_TRACE_FUNCTION();
299
300 const float img_scale = 1.f/(255*SIFT_FIXPT_SCALE);
301 const float deriv_scale = img_scale*0.5f;
302 const float second_deriv_scale = img_scale;
303 const float cross_deriv_scale = img_scale*0.25f;
304
305 float xi=0, xr=0, xc=0, contr=0;
306 int i = 0;
307
308 for( ; i < SIFT_MAX_INTERP_STEPS; i++ )
309 {
310 int idx = octv*(nOctaveLayers+2) + layer;
311 const Mat& img = dog_pyr[idx];
312 const Mat& prev = dog_pyr[idx-1];
313 const Mat& next = dog_pyr[idx+1];
314
315 Vec3f dD((img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1))*deriv_scale,
316 (img.at<sift_wt>(r+1, c) - img.at<sift_wt>(r-1, c))*deriv_scale,
317 (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c))*deriv_scale);
318
319 float v2 = (float)img.at<sift_wt>(r, c)*2;
320 float dxx = (img.at<sift_wt>(r, c+1) + img.at<sift_wt>(r, c-1) - v2)*second_deriv_scale;
321 float dyy = (img.at<sift_wt>(r+1, c) + img.at<sift_wt>(r-1, c) - v2)*second_deriv_scale;
322 float dss = (next.at<sift_wt>(r, c) + prev.at<sift_wt>(r, c) - v2)*second_deriv_scale;
323 float dxy = (img.at<sift_wt>(r+1, c+1) - img.at<sift_wt>(r+1, c-1) -
324 img.at<sift_wt>(r-1, c+1) + img.at<sift_wt>(r-1, c-1))*cross_deriv_scale;
325 float dxs = (next.at<sift_wt>(r, c+1) - next.at<sift_wt>(r, c-1) -
326 prev.at<sift_wt>(r, c+1) + prev.at<sift_wt>(r, c-1))*cross_deriv_scale;
327 float dys = (next.at<sift_wt>(r+1, c) - next.at<sift_wt>(r-1, c) -
328 prev.at<sift_wt>(r+1, c) + prev.at<sift_wt>(r-1, c))*cross_deriv_scale;
329
330 Matx33f H(dxx, dxy, dxs,
331 dxy, dyy, dys,
332 dxs, dys, dss);
333
334 Vec3f X = H.solve(dD, DECOMP_LU);
335
336 xi = -X[2];
337 xr = -X[1];
338 xc = -X[0];
339
340 if( std::abs(xi) < 0.5f && std::abs(xr) < 0.5f && std::abs(xc) < 0.5f )
341 break;
342
343 if( std::abs(xi) > (float)(INT_MAX/3) ||
344 std::abs(xr) > (float)(INT_MAX/3) ||
345 std::abs(xc) > (float)(INT_MAX/3) )
346 return false;
347
348 c += cvRound(xc);
349 r += cvRound(xr);
350 layer += cvRound(xi);
351
352 if( layer < 1 || layer > nOctaveLayers ||
353 c < SIFT_IMG_BORDER || c >= img.cols - SIFT_IMG_BORDER ||
354 r < SIFT_IMG_BORDER || r >= img.rows - SIFT_IMG_BORDER )
355 return false;
356 }
357
358 // ensure convergence of interpolation
359 if( i >= SIFT_MAX_INTERP_STEPS )
360 return false;
361
362 {
363 int idx = octv*(nOctaveLayers+2) + layer;
364 const Mat& img = dog_pyr[idx];
365 const Mat& prev = dog_pyr[idx-1];
366 const Mat& next = dog_pyr[idx+1];
367 Matx31f dD((img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1))*deriv_scale,
368 (img.at<sift_wt>(r+1, c) - img.at<sift_wt>(r-1, c))*deriv_scale,
369 (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c))*deriv_scale);
370 float t = dD.dot(Matx31f(xc, xr, xi));
371
372 contr = img.at<sift_wt>(r, c)*img_scale + t * 0.5f;
373 if( std::abs( contr ) * nOctaveLayers < contrastThreshold )
374 return false;
375
376 // principal curvatures are computed using the trace and det of Hessian
377 float v2 = img.at<sift_wt>(r, c)*2.f;
378 float dxx = (img.at<sift_wt>(r, c+1) + img.at<sift_wt>(r, c-1) - v2)*second_deriv_scale;
379 float dyy = (img.at<sift_wt>(r+1, c) + img.at<sift_wt>(r-1, c) - v2)*second_deriv_scale;
380 float dxy = (img.at<sift_wt>(r+1, c+1) - img.at<sift_wt>(r+1, c-1) -
381 img.at<sift_wt>(r-1, c+1) + img.at<sift_wt>(r-1, c-1)) * cross_deriv_scale;
382 float tr = dxx + dyy;
383 float det = dxx * dyy - dxy * dxy;
384
385 if( det <= 0 || tr*tr*edgeThreshold >= (edgeThreshold + 1)*(edgeThreshold + 1)*det )
386 return false;
387 }
388
389 kpt.pt.x = (c + xc) * (1 << octv);
390 kpt.pt.y = (r + xr) * (1 << octv);
391 kpt.octave = octv + (layer << 8) + (cvRound((xi + 0.5)*255) << 16);
392 kpt.size = sigma*powf(2.f, (layer + xi) / nOctaveLayers)*(1 << octv)*2;
393 kpt.response = std::abs(contr);
394
395 return true;
396 }
397
398 namespace {
399
400 class findScaleSpaceExtremaT
401 {
402 public:
findScaleSpaceExtremaT(int _o,int _i,int _threshold,int _idx,int _step,int _cols,int _nOctaveLayers,double _contrastThreshold,double _edgeThreshold,double _sigma,const std::vector<Mat> & _gauss_pyr,const std::vector<Mat> & _dog_pyr,std::vector<KeyPoint> & kpts)403 findScaleSpaceExtremaT(
404 int _o,
405 int _i,
406 int _threshold,
407 int _idx,
408 int _step,
409 int _cols,
410 int _nOctaveLayers,
411 double _contrastThreshold,
412 double _edgeThreshold,
413 double _sigma,
414 const std::vector<Mat>& _gauss_pyr,
415 const std::vector<Mat>& _dog_pyr,
416 std::vector<KeyPoint>& kpts)
417
418 : o(_o),
419 i(_i),
420 threshold(_threshold),
421 idx(_idx),
422 step(_step),
423 cols(_cols),
424 nOctaveLayers(_nOctaveLayers),
425 contrastThreshold(_contrastThreshold),
426 edgeThreshold(_edgeThreshold),
427 sigma(_sigma),
428 gauss_pyr(_gauss_pyr),
429 dog_pyr(_dog_pyr),
430 kpts_(kpts)
431 {
432 // nothing
433 }
process(const cv::Range & range)434 void process(const cv::Range& range)
435 {
436 CV_TRACE_FUNCTION();
437
438 const int begin = range.start;
439 const int end = range.end;
440
441 static const int n = SIFT_ORI_HIST_BINS;
442 float CV_DECL_ALIGNED(CV_SIMD_WIDTH) hist[n];
443
444 const Mat& img = dog_pyr[idx];
445 const Mat& prev = dog_pyr[idx-1];
446 const Mat& next = dog_pyr[idx+1];
447
448 for( int r = begin; r < end; r++)
449 {
450 const sift_wt* currptr = img.ptr<sift_wt>(r);
451 const sift_wt* prevptr = prev.ptr<sift_wt>(r);
452 const sift_wt* nextptr = next.ptr<sift_wt>(r);
453 int c = SIFT_IMG_BORDER;
454
455 #if CV_SIMD && !(DoG_TYPE_SHORT)
456 const int vecsize = v_float32::nlanes;
457 for( ; c <= cols-SIFT_IMG_BORDER - vecsize; c += vecsize)
458 {
459 v_float32 val = vx_load(&currptr[c]);
460 v_float32 _00,_01,_02;
461 v_float32 _10, _12;
462 v_float32 _20,_21,_22;
463
464 v_float32 vmin,vmax;
465
466
467 v_float32 cond = v_abs(val) > vx_setall_f32((float)threshold);
468 if (!v_check_any(cond))
469 {
470 continue;
471 }
472
473 _00 = vx_load(&currptr[c-step-1]); _01 = vx_load(&currptr[c-step]); _02 = vx_load(&currptr[c-step+1]);
474 _10 = vx_load(&currptr[c -1]); _12 = vx_load(&currptr[c +1]);
475 _20 = vx_load(&currptr[c+step-1]); _21 = vx_load(&currptr[c+step]); _22 = vx_load(&currptr[c+step+1]);
476
477 vmax = v_max(v_max(v_max(_00,_01),v_max(_02,_10)),v_max(v_max(_12,_20),v_max(_21,_22)));
478 vmin = v_min(v_min(v_min(_00,_01),v_min(_02,_10)),v_min(v_min(_12,_20),v_min(_21,_22)));
479
480 v_float32 condp = cond & (val > vx_setall_f32(0)) & (val >= vmax);
481 v_float32 condm = cond & (val < vx_setall_f32(0)) & (val <= vmin);
482
483 cond = condp | condm;
484 if (!v_check_any(cond))
485 {
486 continue;
487 }
488
489 _00 = vx_load(&prevptr[c-step-1]); _01 = vx_load(&prevptr[c-step]); _02 = vx_load(&prevptr[c-step+1]);
490 _10 = vx_load(&prevptr[c -1]); _12 = vx_load(&prevptr[c +1]);
491 _20 = vx_load(&prevptr[c+step-1]); _21 = vx_load(&prevptr[c+step]); _22 = vx_load(&prevptr[c+step+1]);
492
493 vmax = v_max(v_max(v_max(_00,_01),v_max(_02,_10)),v_max(v_max(_12,_20),v_max(_21,_22)));
494 vmin = v_min(v_min(v_min(_00,_01),v_min(_02,_10)),v_min(v_min(_12,_20),v_min(_21,_22)));
495
496 condp &= (val >= vmax);
497 condm &= (val <= vmin);
498
499 cond = condp | condm;
500 if (!v_check_any(cond))
501 {
502 continue;
503 }
504
505 v_float32 _11p = vx_load(&prevptr[c]);
506 v_float32 _11n = vx_load(&nextptr[c]);
507
508 v_float32 max_middle = v_max(_11n,_11p);
509 v_float32 min_middle = v_min(_11n,_11p);
510
511 _00 = vx_load(&nextptr[c-step-1]); _01 = vx_load(&nextptr[c-step]); _02 = vx_load(&nextptr[c-step+1]);
512 _10 = vx_load(&nextptr[c -1]); _12 = vx_load(&nextptr[c +1]);
513 _20 = vx_load(&nextptr[c+step-1]); _21 = vx_load(&nextptr[c+step]); _22 = vx_load(&nextptr[c+step+1]);
514
515 vmax = v_max(v_max(v_max(_00,_01),v_max(_02,_10)),v_max(v_max(_12,_20),v_max(_21,_22)));
516 vmin = v_min(v_min(v_min(_00,_01),v_min(_02,_10)),v_min(v_min(_12,_20),v_min(_21,_22)));
517
518 condp &= (val >= v_max(vmax,max_middle));
519 condm &= (val <= v_min(vmin,min_middle));
520
521 cond = condp | condm;
522 if (!v_check_any(cond))
523 {
524 continue;
525 }
526
527 int mask = v_signmask(cond);
528 for (int k = 0; k<vecsize;k++)
529 {
530 if ((mask & (1<<k)) == 0)
531 continue;
532
533 CV_TRACE_REGION("pixel_candidate_simd");
534
535 KeyPoint kpt;
536 int r1 = r, c1 = c+k, layer = i;
537 if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1,
538 nOctaveLayers, (float)contrastThreshold,
539 (float)edgeThreshold, (float)sigma) )
540 continue;
541 float scl_octv = kpt.size*0.5f/(1 << o);
542 float omax = calcOrientationHist(gauss_pyr[o*(nOctaveLayers+3) + layer],
543 Point(c1, r1),
544 cvRound(SIFT_ORI_RADIUS * scl_octv),
545 SIFT_ORI_SIG_FCTR * scl_octv,
546 hist, n);
547 float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO);
548 for( int j = 0; j < n; j++ )
549 {
550 int l = j > 0 ? j - 1 : n - 1;
551 int r2 = j < n-1 ? j + 1 : 0;
552
553 if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr )
554 {
555 float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]);
556 bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin;
557 kpt.angle = 360.f - (float)((360.f/n) * bin);
558 if(std::abs(kpt.angle - 360.f) < FLT_EPSILON)
559 kpt.angle = 0.f;
560
561 kpts_.push_back(kpt);
562 }
563 }
564 }
565 }
566
567 #endif //CV_SIMD && !(DoG_TYPE_SHORT)
568
569 // vector loop reminder, better predictibility and less branch density
570 for( ; c < cols-SIFT_IMG_BORDER; c++)
571 {
572 sift_wt val = currptr[c];
573 if (std::abs(val) <= threshold)
574 continue;
575
576 sift_wt _00,_01,_02;
577 sift_wt _10, _12;
578 sift_wt _20,_21,_22;
579 _00 = currptr[c-step-1]; _01 = currptr[c-step]; _02 = currptr[c-step+1];
580 _10 = currptr[c -1]; _12 = currptr[c +1];
581 _20 = currptr[c+step-1]; _21 = currptr[c+step]; _22 = currptr[c+step+1];
582
583 bool calculate = false;
584 if (val > 0)
585 {
586 sift_wt vmax = std::max(std::max(std::max(_00,_01),std::max(_02,_10)),std::max(std::max(_12,_20),std::max(_21,_22)));
587 if (val >= vmax)
588 {
589 _00 = prevptr[c-step-1]; _01 = prevptr[c-step]; _02 = prevptr[c-step+1];
590 _10 = prevptr[c -1]; _12 = prevptr[c +1];
591 _20 = prevptr[c+step-1]; _21 = prevptr[c+step]; _22 = prevptr[c+step+1];
592 vmax = std::max(std::max(std::max(_00,_01),std::max(_02,_10)),std::max(std::max(_12,_20),std::max(_21,_22)));
593 if (val >= vmax)
594 {
595 _00 = nextptr[c-step-1]; _01 = nextptr[c-step]; _02 = nextptr[c-step+1];
596 _10 = nextptr[c -1]; _12 = nextptr[c +1];
597 _20 = nextptr[c+step-1]; _21 = nextptr[c+step]; _22 = nextptr[c+step+1];
598 vmax = std::max(std::max(std::max(_00,_01),std::max(_02,_10)),std::max(std::max(_12,_20),std::max(_21,_22)));
599 if (val >= vmax)
600 {
601 sift_wt _11p = prevptr[c], _11n = nextptr[c];
602 calculate = (val >= std::max(_11p,_11n));
603 }
604 }
605 }
606
607 } else { // val cant be zero here (first abs took care of zero), must be negative
608 sift_wt vmin = std::min(std::min(std::min(_00,_01),std::min(_02,_10)),std::min(std::min(_12,_20),std::min(_21,_22)));
609 if (val <= vmin)
610 {
611 _00 = prevptr[c-step-1]; _01 = prevptr[c-step]; _02 = prevptr[c-step+1];
612 _10 = prevptr[c -1]; _12 = prevptr[c +1];
613 _20 = prevptr[c+step-1]; _21 = prevptr[c+step]; _22 = prevptr[c+step+1];
614 vmin = std::min(std::min(std::min(_00,_01),std::min(_02,_10)),std::min(std::min(_12,_20),std::min(_21,_22)));
615 if (val <= vmin)
616 {
617 _00 = nextptr[c-step-1]; _01 = nextptr[c-step]; _02 = nextptr[c-step+1];
618 _10 = nextptr[c -1]; _12 = nextptr[c +1];
619 _20 = nextptr[c+step-1]; _21 = nextptr[c+step]; _22 = nextptr[c+step+1];
620 vmin = std::min(std::min(std::min(_00,_01),std::min(_02,_10)),std::min(std::min(_12,_20),std::min(_21,_22)));
621 if (val <= vmin)
622 {
623 sift_wt _11p = prevptr[c], _11n = nextptr[c];
624 calculate = (val <= std::min(_11p,_11n));
625 }
626 }
627 }
628 }
629
630 if (calculate)
631 {
632 CV_TRACE_REGION("pixel_candidate");
633
634 KeyPoint kpt;
635 int r1 = r, c1 = c, layer = i;
636 if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1,
637 nOctaveLayers, (float)contrastThreshold,
638 (float)edgeThreshold, (float)sigma) )
639 continue;
640 float scl_octv = kpt.size*0.5f/(1 << o);
641 float omax = calcOrientationHist(gauss_pyr[o*(nOctaveLayers+3) + layer],
642 Point(c1, r1),
643 cvRound(SIFT_ORI_RADIUS * scl_octv),
644 SIFT_ORI_SIG_FCTR * scl_octv,
645 hist, n);
646 float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO);
647 for( int j = 0; j < n; j++ )
648 {
649 int l = j > 0 ? j - 1 : n - 1;
650 int r2 = j < n-1 ? j + 1 : 0;
651
652 if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr )
653 {
654 float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]);
655 bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin;
656 kpt.angle = 360.f - (float)((360.f/n) * bin);
657 if(std::abs(kpt.angle - 360.f) < FLT_EPSILON)
658 kpt.angle = 0.f;
659
660 kpts_.push_back(kpt);
661 }
662 }
663 }
664 }
665 }
666 }
667 private:
668 int o, i;
669 int threshold;
670 int idx, step, cols;
671 int nOctaveLayers;
672 double contrastThreshold;
673 double edgeThreshold;
674 double sigma;
675 const std::vector<Mat>& gauss_pyr;
676 const std::vector<Mat>& dog_pyr;
677 std::vector<KeyPoint>& kpts_;
678 };
679
680 } // namespace
681
682
findScaleSpaceExtrema(int octave,int layer,int threshold,int idx,int step,int cols,int nOctaveLayers,double contrastThreshold,double edgeThreshold,double sigma,const std::vector<Mat> & gauss_pyr,const std::vector<Mat> & dog_pyr,std::vector<KeyPoint> & kpts,const cv::Range & range)683 void findScaleSpaceExtrema(
684 int octave,
685 int layer,
686 int threshold,
687 int idx,
688 int step,
689 int cols,
690 int nOctaveLayers,
691 double contrastThreshold,
692 double edgeThreshold,
693 double sigma,
694 const std::vector<Mat>& gauss_pyr,
695 const std::vector<Mat>& dog_pyr,
696 std::vector<KeyPoint>& kpts,
697 const cv::Range& range)
698 {
699 CV_TRACE_FUNCTION();
700
701 findScaleSpaceExtremaT(octave, layer, threshold, idx,
702 step, cols,
703 nOctaveLayers, contrastThreshold, edgeThreshold, sigma,
704 gauss_pyr, dog_pyr,
705 kpts)
706 .process(range);
707 }
708
calcSIFTDescriptor(const Mat & img,Point2f ptf,float ori,float scl,int d,int n,Mat & dstMat,int row)709 void calcSIFTDescriptor(
710 const Mat& img, Point2f ptf, float ori, float scl,
711 int d, int n, Mat& dstMat, int row
712 )
713 {
714 CV_TRACE_FUNCTION();
715
716 Point pt(cvRound(ptf.x), cvRound(ptf.y));
717 float cos_t = cosf(ori*(float)(CV_PI/180));
718 float sin_t = sinf(ori*(float)(CV_PI/180));
719 float bins_per_rad = n / 360.f;
720 float exp_scale = -1.f/(d * d * 0.5f);
721 float hist_width = SIFT_DESCR_SCL_FCTR * scl;
722 int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f);
723 // Clip the radius to the diagonal of the image to avoid autobuffer too large exception
724 radius = std::min(radius, (int)std::sqrt(((double) img.cols)*img.cols + ((double) img.rows)*img.rows));
725 cos_t /= hist_width;
726 sin_t /= hist_width;
727
728 int i, j, k, len = (radius*2+1)*(radius*2+1), histlen = (d+2)*(d+2)*(n+2);
729 int rows = img.rows, cols = img.cols;
730
731 cv::utils::BufferArea area;
732 float *X = 0, *Y = 0, *Mag, *Ori = 0, *W = 0, *RBin = 0, *CBin = 0, *hist = 0, *rawDst = 0;
733 area.allocate(X, len, CV_SIMD_WIDTH);
734 area.allocate(Y, len, CV_SIMD_WIDTH);
735 area.allocate(Ori, len, CV_SIMD_WIDTH);
736 area.allocate(W, len, CV_SIMD_WIDTH);
737 area.allocate(RBin, len, CV_SIMD_WIDTH);
738 area.allocate(CBin, len, CV_SIMD_WIDTH);
739 area.allocate(hist, histlen, CV_SIMD_WIDTH);
740 area.allocate(rawDst, len, CV_SIMD_WIDTH);
741 area.commit();
742 Mag = Y;
743
744 for( i = 0; i < d+2; i++ )
745 {
746 for( j = 0; j < d+2; j++ )
747 for( k = 0; k < n+2; k++ )
748 hist[(i*(d+2) + j)*(n+2) + k] = 0.;
749 }
750
751 for( i = -radius, k = 0; i <= radius; i++ )
752 for( j = -radius; j <= radius; j++ )
753 {
754 // Calculate sample's histogram array coords rotated relative to ori.
755 // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e.
756 // r_rot = 1.5) have full weight placed in row 1 after interpolation.
757 float c_rot = j * cos_t - i * sin_t;
758 float r_rot = j * sin_t + i * cos_t;
759 float rbin = r_rot + d/2 - 0.5f;
760 float cbin = c_rot + d/2 - 0.5f;
761 int r = pt.y + i, c = pt.x + j;
762
763 if( rbin > -1 && rbin < d && cbin > -1 && cbin < d &&
764 r > 0 && r < rows - 1 && c > 0 && c < cols - 1 )
765 {
766 float dx = (float)(img.at<sift_wt>(r, c+1) - img.at<sift_wt>(r, c-1));
767 float dy = (float)(img.at<sift_wt>(r-1, c) - img.at<sift_wt>(r+1, c));
768 X[k] = dx; Y[k] = dy; RBin[k] = rbin; CBin[k] = cbin;
769 W[k] = (c_rot * c_rot + r_rot * r_rot)*exp_scale;
770 k++;
771 }
772 }
773
774 len = k;
775 cv::hal::fastAtan2(Y, X, Ori, len, true);
776 cv::hal::magnitude32f(X, Y, Mag, len);
777 cv::hal::exp32f(W, W, len);
778
779 k = 0;
780 #if CV_SIMD
781 {
782 const int vecsize = v_float32::nlanes;
783 int CV_DECL_ALIGNED(CV_SIMD_WIDTH) idx_buf[vecsize];
784 float CV_DECL_ALIGNED(CV_SIMD_WIDTH) rco_buf[8*vecsize];
785 const v_float32 __ori = vx_setall_f32(ori);
786 const v_float32 __bins_per_rad = vx_setall_f32(bins_per_rad);
787 const v_int32 __n = vx_setall_s32(n);
788 const v_int32 __1 = vx_setall_s32(1);
789 const v_int32 __d_plus_2 = vx_setall_s32(d+2);
790 const v_int32 __n_plus_2 = vx_setall_s32(n+2);
791 for( ; k <= len - vecsize; k += vecsize )
792 {
793 v_float32 rbin = vx_load_aligned(RBin + k);
794 v_float32 cbin = vx_load_aligned(CBin + k);
795 v_float32 obin = (vx_load_aligned(Ori + k) - __ori) * __bins_per_rad;
796 v_float32 mag = vx_load_aligned(Mag + k) * vx_load_aligned(W + k);
797
798 v_int32 r0 = v_floor(rbin);
799 v_int32 c0 = v_floor(cbin);
800 v_int32 o0 = v_floor(obin);
801 rbin -= v_cvt_f32(r0);
802 cbin -= v_cvt_f32(c0);
803 obin -= v_cvt_f32(o0);
804
805 o0 = v_select(o0 < vx_setzero_s32(), o0 + __n, o0);
806 o0 = v_select(o0 >= __n, o0 - __n, o0);
807
808 v_float32 v_r1 = mag*rbin, v_r0 = mag - v_r1;
809 v_float32 v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11;
810 v_float32 v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01;
811 v_float32 v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111;
812 v_float32 v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101;
813 v_float32 v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011;
814 v_float32 v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001;
815
816 v_int32 idx = v_muladd(v_muladd(r0+__1, __d_plus_2, c0+__1), __n_plus_2, o0);
817 v_store_aligned(idx_buf, idx);
818
819 v_store_aligned(rco_buf, v_rco000);
820 v_store_aligned(rco_buf+vecsize, v_rco001);
821 v_store_aligned(rco_buf+vecsize*2, v_rco010);
822 v_store_aligned(rco_buf+vecsize*3, v_rco011);
823 v_store_aligned(rco_buf+vecsize*4, v_rco100);
824 v_store_aligned(rco_buf+vecsize*5, v_rco101);
825 v_store_aligned(rco_buf+vecsize*6, v_rco110);
826 v_store_aligned(rco_buf+vecsize*7, v_rco111);
827
828 for(int id = 0; id < vecsize; id++)
829 {
830 hist[idx_buf[id]] += rco_buf[id];
831 hist[idx_buf[id]+1] += rco_buf[vecsize + id];
832 hist[idx_buf[id]+(n+2)] += rco_buf[2*vecsize + id];
833 hist[idx_buf[id]+(n+3)] += rco_buf[3*vecsize + id];
834 hist[idx_buf[id]+(d+2)*(n+2)] += rco_buf[4*vecsize + id];
835 hist[idx_buf[id]+(d+2)*(n+2)+1] += rco_buf[5*vecsize + id];
836 hist[idx_buf[id]+(d+3)*(n+2)] += rco_buf[6*vecsize + id];
837 hist[idx_buf[id]+(d+3)*(n+2)+1] += rco_buf[7*vecsize + id];
838 }
839 }
840 }
841 #endif
842 for( ; k < len; k++ )
843 {
844 float rbin = RBin[k], cbin = CBin[k];
845 float obin = (Ori[k] - ori)*bins_per_rad;
846 float mag = Mag[k]*W[k];
847
848 int r0 = cvFloor( rbin );
849 int c0 = cvFloor( cbin );
850 int o0 = cvFloor( obin );
851 rbin -= r0;
852 cbin -= c0;
853 obin -= o0;
854
855 if( o0 < 0 )
856 o0 += n;
857 if( o0 >= n )
858 o0 -= n;
859
860 // histogram update using tri-linear interpolation
861 float v_r1 = mag*rbin, v_r0 = mag - v_r1;
862 float v_rc11 = v_r1*cbin, v_rc10 = v_r1 - v_rc11;
863 float v_rc01 = v_r0*cbin, v_rc00 = v_r0 - v_rc01;
864 float v_rco111 = v_rc11*obin, v_rco110 = v_rc11 - v_rco111;
865 float v_rco101 = v_rc10*obin, v_rco100 = v_rc10 - v_rco101;
866 float v_rco011 = v_rc01*obin, v_rco010 = v_rc01 - v_rco011;
867 float v_rco001 = v_rc00*obin, v_rco000 = v_rc00 - v_rco001;
868
869 int idx = ((r0+1)*(d+2) + c0+1)*(n+2) + o0;
870 hist[idx] += v_rco000;
871 hist[idx+1] += v_rco001;
872 hist[idx+(n+2)] += v_rco010;
873 hist[idx+(n+3)] += v_rco011;
874 hist[idx+(d+2)*(n+2)] += v_rco100;
875 hist[idx+(d+2)*(n+2)+1] += v_rco101;
876 hist[idx+(d+3)*(n+2)] += v_rco110;
877 hist[idx+(d+3)*(n+2)+1] += v_rco111;
878 }
879
880 // finalize histogram, since the orientation histograms are circular
881 for( i = 0; i < d; i++ )
882 for( j = 0; j < d; j++ )
883 {
884 int idx = ((i+1)*(d+2) + (j+1))*(n+2);
885 hist[idx] += hist[idx+n];
886 hist[idx+1] += hist[idx+n+1];
887 for( k = 0; k < n; k++ )
888 rawDst[(i*d + j)*n + k] = hist[idx+k];
889 }
890 // copy histogram to the descriptor,
891 // apply hysteresis thresholding
892 // and scale the result, so that it can be easily converted
893 // to byte array
894 float nrm2 = 0;
895 len = d*d*n;
896 k = 0;
897 #if CV_SIMD
898 {
899 v_float32 __nrm2 = vx_setzero_f32();
900 v_float32 __rawDst;
901 for( ; k <= len - v_float32::nlanes; k += v_float32::nlanes )
902 {
903 __rawDst = vx_load_aligned(rawDst + k);
904 __nrm2 = v_fma(__rawDst, __rawDst, __nrm2);
905 }
906 nrm2 = (float)v_reduce_sum(__nrm2);
907 }
908 #endif
909 for( ; k < len; k++ )
910 nrm2 += rawDst[k]*rawDst[k];
911
912 float thr = std::sqrt(nrm2)*SIFT_DESCR_MAG_THR;
913
914 i = 0, nrm2 = 0;
915 #if 0 //CV_AVX2
916 // This code cannot be enabled because it sums nrm2 in a different order,
917 // thus producing slightly different results
918 {
919 float CV_DECL_ALIGNED(CV_SIMD_WIDTH) nrm2_buf[8];
920 __m256 __dst;
921 __m256 __nrm2 = _mm256_setzero_ps();
922 __m256 __thr = _mm256_set1_ps(thr);
923 for( ; i <= len - 8; i += 8 )
924 {
925 __dst = _mm256_loadu_ps(&rawDst[i]);
926 __dst = _mm256_min_ps(__dst, __thr);
927 _mm256_storeu_ps(&rawDst[i], __dst);
928 #if CV_FMA3
929 __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2);
930 #else
931 __nrm2 = _mm256_add_ps(__nrm2, _mm256_mul_ps(__dst, __dst));
932 #endif
933 }
934 _mm256_store_ps(nrm2_buf, __nrm2);
935 nrm2 = nrm2_buf[0] + nrm2_buf[1] + nrm2_buf[2] + nrm2_buf[3] +
936 nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7];
937 }
938 #endif
939 for( ; i < len; i++ )
940 {
941 float val = std::min(rawDst[i], thr);
942 rawDst[i] = val;
943 nrm2 += val*val;
944 }
945 nrm2 = SIFT_INT_DESCR_FCTR/std::max(std::sqrt(nrm2), FLT_EPSILON);
946
947 #if 1
948 k = 0;
949 if( dstMat.type() == CV_32F )
950 {
951 float* dst = dstMat.ptr<float>(row);
952 #if CV_SIMD
953 v_float32 __dst;
954 v_float32 __min = vx_setzero_f32();
955 v_float32 __max = vx_setall_f32(255.0f); // max of uchar
956 v_float32 __nrm2 = vx_setall_f32(nrm2);
957 for( k = 0; k <= len - v_float32::nlanes; k += v_float32::nlanes )
958 {
959 __dst = vx_load_aligned(rawDst + k);
960 __dst = v_min(v_max(v_cvt_f32(v_round(__dst * __nrm2)), __min), __max);
961 v_store(dst + k, __dst);
962 }
963 #endif
964 for( ; k < len; k++ )
965 {
966 dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
967 }
968 }
969 else // CV_8U
970 {
971 uint8_t* dst = dstMat.ptr<uint8_t>(row);
972 #if CV_SIMD
973 v_float32 __dst0, __dst1;
974 v_uint16 __pack01;
975 v_float32 __nrm2 = vx_setall_f32(nrm2);
976 for( k = 0; k <= len - v_float32::nlanes * 2; k += v_float32::nlanes * 2 )
977 {
978 __dst0 = vx_load_aligned(rawDst + k);
979 __dst1 = vx_load_aligned(rawDst + k + v_float32::nlanes);
980
981 __pack01 = v_pack_u(v_round(__dst0 * __nrm2), v_round(__dst1 * __nrm2));
982 v_pack_store(dst + k, __pack01);
983 }
984 #endif
985 for( ; k < len; k++ )
986 {
987 dst[k] = saturate_cast<uchar>(rawDst[k]*nrm2);
988 }
989 }
990 #else
991 float* dst = dstMat.ptr<float>(row);
992 float nrm1 = 0;
993 for( k = 0; k < len; k++ )
994 {
995 rawDst[k] *= nrm2;
996 nrm1 += rawDst[k];
997 }
998 nrm1 = 1.f/std::max(nrm1, FLT_EPSILON);
999 if( dstMat.type() == CV_32F )
1000 {
1001 for( k = 0; k < len; k++ )
1002 {
1003 dst[k] = std::sqrt(rawDst[k] * nrm1);
1004 }
1005 }
1006 else // CV_8U
1007 {
1008 for( k = 0; k < len; k++ )
1009 {
1010 dst[k] = saturate_cast<uchar>(std::sqrt(rawDst[k] * nrm1)*SIFT_INT_DESCR_FCTR);
1011 }
1012 }
1013 #endif
1014 }
1015
1016 #endif
1017 CV_CPU_OPTIMIZATION_NAMESPACE_END
1018 } // namespace
1019