1 // Copyright (C) 2009  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_HESSIAN_PYRAMId_Hh_
4 #define DLIB_HESSIAN_PYRAMId_Hh_
5 
6 #include "hessian_pyramid_abstract.h"
7 #include "../algs.h"
8 #include "../image_transforms/integral_image.h"
9 #include "../array.h"
10 #include "../array2d.h"
11 #include "../noncopyable.h"
12 #include "../matrix.h"
13 #include "../stl_checked.h"
14 #include <algorithm>
15 #include <vector>
16 
17 namespace dlib
18 {
19 
20 // ----------------------------------------------------------------------------------------
21 
22     struct interest_point
23     {
interest_pointinterest_point24         interest_point() : scale(0), score(0), laplacian(0) {}
25 
26         dlib::vector<double,2> center;
27         double scale;
28         double score;
29         double laplacian;
30 
31         bool operator < (const interest_point& p) const { return score < p.score; }
32     };
33 
34 // ----------------------------------------------------------------------------------------
35 
serialize(const interest_point & item,std::ostream & out)36     inline void serialize(
37         const interest_point& item,
38         std::ostream& out
39     )
40     {
41         try
42         {
43             serialize(item.center,out);
44             serialize(item.scale,out);
45             serialize(item.score,out);
46             serialize(item.laplacian,out);
47         }
48         catch (serialization_error& e)
49         {
50             throw serialization_error(e.info + "\n   while serializing object of type interest_point");
51         }
52     }
53 
54 // ----------------------------------------------------------------------------------------
55 
deserialize(interest_point & item,std::istream & in)56     inline void deserialize(
57         interest_point& item,
58         std::istream& in
59     )
60     {
61         try
62         {
63             deserialize(item.center,in);
64             deserialize(item.scale,in);
65             deserialize(item.score,in);
66             deserialize(item.laplacian,in);
67         }
68         catch (serialization_error& e)
69         {
70             throw serialization_error(e.info + "\n   while deserializing object of type interest_point");
71         }
72     }
73 
74 // ----------------------------------------------------------------------------------------
75 
76     class hessian_pyramid : noncopyable
77     {
78     public:
hessian_pyramid()79         hessian_pyramid()
80         {
81             num_octaves = 0;
82             num_intervals = 0;
83             initial_step_size = 0;
84         }
85 
86         template <typename integral_image_type>
build_pyramid(const integral_image_type & img,long num_octaves,long num_intervals,long initial_step_size)87         void build_pyramid (
88             const integral_image_type& img,
89             long num_octaves,
90             long num_intervals,
91             long initial_step_size
92         )
93         {
94             DLIB_ASSERT(num_octaves > 0 && num_intervals > 0 && initial_step_size > 0,
95                 "\tvoid build_pyramid()"
96                 << "\n\tAll arguments to this function must be > 0"
97                 << "\n\t this:              " << this
98                 << "\n\t num_octaves:       " << num_octaves
99                 << "\n\t num_intervals:     " << num_intervals
100                 << "\n\t initial_step_size: " << initial_step_size
101             );
102 
103             this->num_octaves = num_octaves;
104             this->num_intervals = num_intervals;
105             this->initial_step_size = initial_step_size;
106 
107             // allocate space for the pyramid
108             pyramid.resize(num_octaves*num_intervals);
109             for (long o = 0; o < num_octaves; ++o)
110             {
111                 const long step_size = get_step_size(o);
112                 for (long i = 0; i < num_intervals; ++i)
113                 {
114                     pyramid[num_intervals*o + i].set_size(img.nr()/step_size, img.nc()/step_size);
115                 }
116             }
117 
118             // now fill out the pyramid with data
119             for (long o = 0; o < num_octaves; ++o)
120             {
121                 const long step_size = get_step_size(o);
122 
123                 for (long i = 0; i < num_intervals; ++i)
124                 {
125                     const long border_size = get_border_size(i)*step_size;
126                     const long lobe_size = static_cast<long>(std::pow(2.0, o+1.0)+0.5)*(i+1) + 1;
127                     const double area_inv = 1.0/std::pow(3.0*lobe_size, 2.0);
128 
129                     const long lobe_offset = lobe_size/2+1;
130                     const point tl(-lobe_offset,-lobe_offset);
131                     const point tr(lobe_offset,-lobe_offset);
132                     const point bl(-lobe_offset,lobe_offset);
133                     const point br(lobe_offset,lobe_offset);
134 
135                     for (long r = border_size; r < img.nr() - border_size; r += step_size)
136                     {
137                         for (long c = border_size; c < img.nc() - border_size; c += step_size)
138                         {
139                             const point p(c,r);
140 
141                             double Dxx = img.get_sum_of_area(centered_rect(p, lobe_size*3, 2*lobe_size-1)) -
142                                          img.get_sum_of_area(centered_rect(p, lobe_size,   2*lobe_size-1))*3.0;
143 
144                             double Dyy = img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size*3)) -
145                                          img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size))*3.0;
146 
147                             double Dxy = img.get_sum_of_area(centered_rect(p+bl, lobe_size, lobe_size)) +
148                                          img.get_sum_of_area(centered_rect(p+tr, lobe_size, lobe_size)) -
149                                          img.get_sum_of_area(centered_rect(p+tl, lobe_size, lobe_size)) -
150                                          img.get_sum_of_area(centered_rect(p+br, lobe_size, lobe_size));
151 
152                             // now we normalize the filter responses
153                             Dxx *= area_inv;
154                             Dyy *= area_inv;
155                             Dxy *= area_inv;
156 
157 
158                             double sign_of_laplacian = +1;
159                             if (Dxx + Dyy < 0)
160                                 sign_of_laplacian = -1;
161 
162                             double determinant = Dxx*Dyy - 0.81*Dxy*Dxy;
163 
164                             // If the determinant is negative then just blank it out by setting
165                             // it to zero.
166                             if (determinant < 0)
167                                 determinant = 0;
168 
169                             // Save the determinant of the Hessian into our image pyramid.  Also
170                             // pack the laplacian sign into the value so we can get it out later.
171                             pyramid[o*num_intervals + i][r/step_size][c/step_size] = sign_of_laplacian*determinant;
172 
173                         }
174                     }
175 
176                 }
177             }
178         }
179 
get_border_size(long interval)180         long get_border_size (
181             long interval
182         ) const
183         {
184             DLIB_ASSERT(0 <= interval && interval < intervals(),
185                 "\tlong get_border_size(interval)"
186                 << "\n\tInvalid interval value"
187                 << "\n\t this:   " << this
188                 << "\n\t interval: " << interval
189             );
190 
191             const double lobe_size = 2.0*(interval+1) + 1;
192             const double filter_size = 3*lobe_size;
193 
194             const long bs = static_cast<long>(std::ceil(filter_size/2.0));
195             return bs;
196         }
197 
get_step_size(long octave)198         long get_step_size (
199             long octave
200         ) const
201         {
202             DLIB_ASSERT(0 <= octave && octave < octaves(),
203                 "\tlong get_step_size(octave)"
204                 << "\n\tInvalid octave value"
205                 << "\n\t this:   " << this
206                 << "\n\t octave: " << octave
207             );
208 
209             return initial_step_size*static_cast<long>(std::pow(2.0, (double)octave)+0.5);
210         }
211 
nr(long octave)212         long nr (
213             long octave
214         ) const
215         {
216             DLIB_ASSERT(0 <= octave && octave < octaves(),
217                 "\tlong nr(octave)"
218                 << "\n\tInvalid octave value"
219                 << "\n\t this:   " << this
220                 << "\n\t octave: " << octave
221             );
222 
223             return pyramid[num_intervals*octave].nr();
224         }
225 
nc(long octave)226         long nc (
227             long octave
228         ) const
229         {
230             DLIB_ASSERT(0 <= octave && octave < octaves(),
231                 "\tlong nc(octave)"
232                 << "\n\tInvalid octave value"
233                 << "\n\t this:   " << this
234                 << "\n\t octave: " << octave
235             );
236 
237             return pyramid[num_intervals*octave].nc();
238         }
239 
get_value(long octave,long interval,long r,long c)240         double get_value (
241             long octave,
242             long interval,
243             long r,
244             long c
245         ) const
246         {
247             DLIB_ASSERT(0 <= octave && octave < octaves() &&
248                         0 <= interval && interval < intervals() &&
249                         get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) &&
250                         get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval),
251                 "\tdouble get_value(octave, interval, r, c)"
252                 << "\n\tInvalid inputs to this function"
253                 << "\n\t this:      " << this
254                 << "\n\t octave:    " << octave
255                 << "\n\t interval:  " << interval
256                 << "\n\t octaves:   " << octaves()
257                 << "\n\t intervals: " << intervals()
258                 << "\n\t r:         " << r
259                 << "\n\t c:         " << c
260                 << "\n\t nr(octave): " << nr(octave)
261                 << "\n\t nc(octave): " << nc(octave)
262                 << "\n\t get_border_size(interval): " << get_border_size(interval)
263             );
264 
265             return std::abs(pyramid[num_intervals*octave + interval][r][c]);
266         }
267 
get_laplacian(long octave,long interval,long r,long c)268         double get_laplacian (
269             long octave,
270             long interval,
271             long r,
272             long c
273         ) const
274         {
275             DLIB_ASSERT(0 <= octave && octave < octaves() &&
276                         0 <= interval && interval < intervals() &&
277                         get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) &&
278                         get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval),
279                 "\tdouble get_laplacian(octave, interval, r, c)"
280                 << "\n\tInvalid inputs to this function"
281                 << "\n\t this:      " << this
282                 << "\n\t octave:    " << octave
283                 << "\n\t interval:  " << interval
284                 << "\n\t octaves:   " << octaves()
285                 << "\n\t intervals: " << intervals()
286                 << "\n\t r:         " << r
287                 << "\n\t c:         " << c
288                 << "\n\t nr(octave): " << nr(octave)
289                 << "\n\t nc(octave): " << nc(octave)
290                 << "\n\t get_border_size(interval): " << get_border_size(interval)
291             );
292 
293             // return the sign of the laplacian
294             if (pyramid[num_intervals*octave + interval][r][c] > 0)
295                 return +1;
296             else
297                 return -1;
298         }
299 
octaves()300         long octaves (
301         ) const { return num_octaves; }
302 
intervals()303         long intervals (
304         ) const { return num_intervals; }
305 
306     private:
307 
308         long num_octaves;
309         long num_intervals;
310         long initial_step_size;
311 
312         typedef array2d<double> image_type;
313         typedef array<image_type> pyramid_type;
314 
315         pyramid_type pyramid;
316     };
317 
318 // ----------------------------------------------------------------------------------------
319 // ----------------------------------------------------------------------------------------
320 // ----------------------------------------------------------------------------------------
321 
322     namespace hessian_pyramid_helpers
323     {
is_maximum_in_region(const hessian_pyramid & pyr,long o,long i,long r,long c)324         inline bool is_maximum_in_region(
325             const hessian_pyramid& pyr,
326             long o,
327             long i,
328             long r,
329             long c
330         )
331         {
332             // First check if this point is near the edge of the octave
333             // If it is then we say it isn't a maximum as these points are
334             // not as reliable.
335             if (i <= 0 || i+1 >= pyr.intervals())
336             {
337                 return false;
338             }
339 
340             const double val = pyr.get_value(o,i,r,c);
341 
342             // now check if there are any bigger values around this guy
343             for (long ii = i-1; ii <= i+1; ++ii)
344             {
345                 for (long rr = r-1; rr <= r+1; ++rr)
346                 {
347                     for (long cc = c-1; cc <= c+1; ++cc)
348                     {
349                         if (pyr.get_value(o,ii,rr,cc) > val)
350                             return false;
351                     }
352                 }
353             }
354 
355             return true;
356         }
357 
358     // ------------------------------------------------------------------------------------
359 
get_hessian_gradient(const hessian_pyramid & pyr,long o,long i,long r,long c)360         inline const matrix<double,3,1> get_hessian_gradient (
361             const hessian_pyramid& pyr,
362             long o,
363             long i,
364             long r,
365             long c
366         )
367         {
368             matrix<double,3,1> grad;
369             grad(0) = (pyr.get_value(o,i,r,c+1) - pyr.get_value(o,i,r,c-1))/2.0;
370             grad(1) = (pyr.get_value(o,i,r+1,c) - pyr.get_value(o,i,r-1,c))/2.0;
371             grad(2) = (pyr.get_value(o,i+1,r,c) - pyr.get_value(o,i-1,r,c))/2.0;
372             return grad;
373         }
374 
375     // ------------------------------------------------------------------------------------
376 
get_hessian_hessian(const hessian_pyramid & pyr,long o,long i,long r,long c)377         inline const matrix<double,3,3> get_hessian_hessian (
378             const hessian_pyramid& pyr,
379             long o,
380             long i,
381             long r,
382             long c
383         )
384         {
385             matrix<double,3,3> hess;
386             const double val = pyr.get_value(o,i,r,c);
387 
388             double Dxx = (pyr.get_value(o,i,r,c+1) + pyr.get_value(o,i,r,c-1)) - 2*val;
389             double Dyy = (pyr.get_value(o,i,r+1,c) + pyr.get_value(o,i,r-1,c)) - 2*val;
390             double Dss = (pyr.get_value(o,i+1,r,c) + pyr.get_value(o,i-1,r,c)) - 2*val;
391 
392             double Dxy = (pyr.get_value(o,i,r+1,c+1) + pyr.get_value(o,i,r-1,c-1) -
393                           pyr.get_value(o,i,r-1,c+1) - pyr.get_value(o,i,r+1,c-1)) / 4.0;
394 
395             double Dxs = (pyr.get_value(o,i+1,r,c+1) + pyr.get_value(o,i-1,r,c-1) -
396                           pyr.get_value(o,i-1,r,c+1) - pyr.get_value(o,i+1,r,c-1)) / 4.0;
397 
398             double Dys = (pyr.get_value(o,i+1,r+1,c) + pyr.get_value(o,i-1,r-1,c) -
399                           pyr.get_value(o,i-1,r+1,c) - pyr.get_value(o,i+1,r-1,c)) / 4.0;
400 
401 
402             hess = Dxx, Dxy, Dxs,
403             Dxy, Dyy, Dys,
404             Dxs, Dys, Dss;
405 
406             return hess;
407         }
408 
409     // ------------------------------------------------------------------------------------
410 
interpolate_point(const hessian_pyramid & pyr,long o,long i,long r,long c)411         inline const interest_point interpolate_point (
412             const hessian_pyramid& pyr,
413             long o,
414             long i,
415             long r,
416             long c
417         )
418         {
419             dlib::vector<double,2> p(c,r);
420 
421             dlib::vector<double,3> start_point(c,r,i);
422             dlib::vector<double,3> interpolated_point = -inv(get_hessian_hessian(pyr,o,i,r,c))*get_hessian_gradient(pyr,o,i,r,c);
423 
424             //cout << "inter: " <<  trans(interpolated_point);
425 
426             interest_point temp;
427             if (max(abs(interpolated_point)) < 0.5)
428             {
429                 p = (start_point+interpolated_point)*pyr.get_step_size(o);
430                 const double lobe_size = std::pow(2.0, o+1.0)*(i+interpolated_point.z()+1) + 1;
431                 const double filter_size = 3*lobe_size;
432                 const double scale = 1.2/9.0 * filter_size;
433 
434                 temp.center = p;
435                 temp.scale = scale;
436                 temp.score = pyr.get_value(o,i,r,c);
437                 temp.laplacian = pyr.get_laplacian(o,i,r,c);
438             }
439             else
440             {
441                 // this indicates to the caller that no interest point was found.
442                 temp.score = -1;
443             }
444 
445             return temp;
446         }
447 
448     }
449 
450 // ----------------------------------------------------------------------------------------
451 
452     template <typename Alloc>
get_interest_points(const hessian_pyramid & pyr,double threshold,std::vector<interest_point,Alloc> & result_points)453     void get_interest_points (
454         const hessian_pyramid& pyr,
455         double threshold,
456         std::vector<interest_point,Alloc>& result_points
457     )
458     {
459         DLIB_ASSERT(threshold >= 0,
460             "\tvoid get_interest_points()"
461             << "\n\t Invalid arguments to this function"
462             << "\n\t threshold: " << threshold
463         );
464         using namespace std;
465         using namespace hessian_pyramid_helpers;
466 
467         result_points.clear();
468 
469         for (long o = 0; o < pyr.octaves(); ++o)
470         {
471             const long nr = pyr.nr(o);
472             const long nc = pyr.nc(o);
473 
474             // do non-maximum suppression on all the intervals in the current octave and
475             // accumulate the results in result_points
476             for (long i = 1; i < pyr.intervals()-1;  i += 1)
477             {
478                 const long border_size = pyr.get_border_size(i+1);
479                 for (long r = border_size+1; r < nr - border_size-1; r += 1)
480                 {
481                     for (long c = border_size+1; c < nc - border_size-1; c += 1)
482                     {
483                         double max_val = pyr.get_value(o,i,r,c);
484                         long max_i = i;
485                         long max_r = r;
486                         long max_c = c;
487 
488 
489                         // If the max point we found is really a maximum in its own region and
490                         // is big enough then add it to the results.
491                         if (max_val >= threshold && is_maximum_in_region(pyr, o, max_i, max_r, max_c))
492                         {
493                             //cout << max_val << endl;
494                             interest_point sp = interpolate_point (pyr, o, max_i, max_r, max_c);
495                             if (sp.score >= threshold)
496                             {
497                                 result_points.push_back(sp);
498                             }
499                         }
500 
501                     }
502                 }
503             }
504         }
505 
506     }
507 
508 // ----------------------------------------------------------------------------------------
509 
510     template <typename Alloc>
get_interest_points(const hessian_pyramid & pyr,double threshold,std_vector_c<interest_point,Alloc> & result_points)511     void get_interest_points (
512         const hessian_pyramid& pyr,
513         double threshold,
514         std_vector_c<interest_point,Alloc>& result_points
515     )
516     /*!
517         This function is just an overload that automatically casts std_vector_c objects
518         into std::vector objects.  (Usually this is automatic but the template argument
519         there messes up the conversion so we have to do it explicitly)
520     !*/
521     {
522         std::vector<interest_point,Alloc>& v = result_points;
523         get_interest_points(pyr, threshold, v);
524     }
525 
526 // ----------------------------------------------------------------------------------------
527 
528 }
529 
530 #endif  // DLIB_HESSIAN_PYRAMId_Hh_
531 
532