1 // Copyright (C) 2009 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_HESSIAN_PYRAMId_Hh_ 4 #define DLIB_HESSIAN_PYRAMId_Hh_ 5 6 #include "hessian_pyramid_abstract.h" 7 #include "../algs.h" 8 #include "../image_transforms/integral_image.h" 9 #include "../array.h" 10 #include "../array2d.h" 11 #include "../noncopyable.h" 12 #include "../matrix.h" 13 #include "../stl_checked.h" 14 #include <algorithm> 15 #include <vector> 16 17 namespace dlib 18 { 19 20 // ---------------------------------------------------------------------------------------- 21 22 struct interest_point 23 { interest_pointinterest_point24 interest_point() : scale(0), score(0), laplacian(0) {} 25 26 dlib::vector<double,2> center; 27 double scale; 28 double score; 29 double laplacian; 30 31 bool operator < (const interest_point& p) const { return score < p.score; } 32 }; 33 34 // ---------------------------------------------------------------------------------------- 35 serialize(const interest_point & item,std::ostream & out)36 inline void serialize( 37 const interest_point& item, 38 std::ostream& out 39 ) 40 { 41 try 42 { 43 serialize(item.center,out); 44 serialize(item.scale,out); 45 serialize(item.score,out); 46 serialize(item.laplacian,out); 47 } 48 catch (serialization_error& e) 49 { 50 throw serialization_error(e.info + "\n while serializing object of type interest_point"); 51 } 52 } 53 54 // ---------------------------------------------------------------------------------------- 55 deserialize(interest_point & item,std::istream & in)56 inline void deserialize( 57 interest_point& item, 58 std::istream& in 59 ) 60 { 61 try 62 { 63 deserialize(item.center,in); 64 deserialize(item.scale,in); 65 deserialize(item.score,in); 66 deserialize(item.laplacian,in); 67 } 68 catch (serialization_error& e) 69 { 70 throw serialization_error(e.info + "\n while deserializing object of type interest_point"); 71 } 72 } 73 74 // ---------------------------------------------------------------------------------------- 75 76 class hessian_pyramid : noncopyable 77 { 78 public: hessian_pyramid()79 hessian_pyramid() 80 { 81 num_octaves = 0; 82 num_intervals = 0; 83 initial_step_size = 0; 84 } 85 86 template <typename integral_image_type> build_pyramid(const integral_image_type & img,long num_octaves,long num_intervals,long initial_step_size)87 void build_pyramid ( 88 const integral_image_type& img, 89 long num_octaves, 90 long num_intervals, 91 long initial_step_size 92 ) 93 { 94 DLIB_ASSERT(num_octaves > 0 && num_intervals > 0 && initial_step_size > 0, 95 "\tvoid build_pyramid()" 96 << "\n\tAll arguments to this function must be > 0" 97 << "\n\t this: " << this 98 << "\n\t num_octaves: " << num_octaves 99 << "\n\t num_intervals: " << num_intervals 100 << "\n\t initial_step_size: " << initial_step_size 101 ); 102 103 this->num_octaves = num_octaves; 104 this->num_intervals = num_intervals; 105 this->initial_step_size = initial_step_size; 106 107 // allocate space for the pyramid 108 pyramid.resize(num_octaves*num_intervals); 109 for (long o = 0; o < num_octaves; ++o) 110 { 111 const long step_size = get_step_size(o); 112 for (long i = 0; i < num_intervals; ++i) 113 { 114 pyramid[num_intervals*o + i].set_size(img.nr()/step_size, img.nc()/step_size); 115 } 116 } 117 118 // now fill out the pyramid with data 119 for (long o = 0; o < num_octaves; ++o) 120 { 121 const long step_size = get_step_size(o); 122 123 for (long i = 0; i < num_intervals; ++i) 124 { 125 const long border_size = get_border_size(i)*step_size; 126 const long lobe_size = static_cast<long>(std::pow(2.0, o+1.0)+0.5)*(i+1) + 1; 127 const double area_inv = 1.0/std::pow(3.0*lobe_size, 2.0); 128 129 const long lobe_offset = lobe_size/2+1; 130 const point tl(-lobe_offset,-lobe_offset); 131 const point tr(lobe_offset,-lobe_offset); 132 const point bl(-lobe_offset,lobe_offset); 133 const point br(lobe_offset,lobe_offset); 134 135 for (long r = border_size; r < img.nr() - border_size; r += step_size) 136 { 137 for (long c = border_size; c < img.nc() - border_size; c += step_size) 138 { 139 const point p(c,r); 140 141 double Dxx = img.get_sum_of_area(centered_rect(p, lobe_size*3, 2*lobe_size-1)) - 142 img.get_sum_of_area(centered_rect(p, lobe_size, 2*lobe_size-1))*3.0; 143 144 double Dyy = img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size*3)) - 145 img.get_sum_of_area(centered_rect(p, 2*lobe_size-1, lobe_size))*3.0; 146 147 double Dxy = img.get_sum_of_area(centered_rect(p+bl, lobe_size, lobe_size)) + 148 img.get_sum_of_area(centered_rect(p+tr, lobe_size, lobe_size)) - 149 img.get_sum_of_area(centered_rect(p+tl, lobe_size, lobe_size)) - 150 img.get_sum_of_area(centered_rect(p+br, lobe_size, lobe_size)); 151 152 // now we normalize the filter responses 153 Dxx *= area_inv; 154 Dyy *= area_inv; 155 Dxy *= area_inv; 156 157 158 double sign_of_laplacian = +1; 159 if (Dxx + Dyy < 0) 160 sign_of_laplacian = -1; 161 162 double determinant = Dxx*Dyy - 0.81*Dxy*Dxy; 163 164 // If the determinant is negative then just blank it out by setting 165 // it to zero. 166 if (determinant < 0) 167 determinant = 0; 168 169 // Save the determinant of the Hessian into our image pyramid. Also 170 // pack the laplacian sign into the value so we can get it out later. 171 pyramid[o*num_intervals + i][r/step_size][c/step_size] = sign_of_laplacian*determinant; 172 173 } 174 } 175 176 } 177 } 178 } 179 get_border_size(long interval)180 long get_border_size ( 181 long interval 182 ) const 183 { 184 DLIB_ASSERT(0 <= interval && interval < intervals(), 185 "\tlong get_border_size(interval)" 186 << "\n\tInvalid interval value" 187 << "\n\t this: " << this 188 << "\n\t interval: " << interval 189 ); 190 191 const double lobe_size = 2.0*(interval+1) + 1; 192 const double filter_size = 3*lobe_size; 193 194 const long bs = static_cast<long>(std::ceil(filter_size/2.0)); 195 return bs; 196 } 197 get_step_size(long octave)198 long get_step_size ( 199 long octave 200 ) const 201 { 202 DLIB_ASSERT(0 <= octave && octave < octaves(), 203 "\tlong get_step_size(octave)" 204 << "\n\tInvalid octave value" 205 << "\n\t this: " << this 206 << "\n\t octave: " << octave 207 ); 208 209 return initial_step_size*static_cast<long>(std::pow(2.0, (double)octave)+0.5); 210 } 211 nr(long octave)212 long nr ( 213 long octave 214 ) const 215 { 216 DLIB_ASSERT(0 <= octave && octave < octaves(), 217 "\tlong nr(octave)" 218 << "\n\tInvalid octave value" 219 << "\n\t this: " << this 220 << "\n\t octave: " << octave 221 ); 222 223 return pyramid[num_intervals*octave].nr(); 224 } 225 nc(long octave)226 long nc ( 227 long octave 228 ) const 229 { 230 DLIB_ASSERT(0 <= octave && octave < octaves(), 231 "\tlong nc(octave)" 232 << "\n\tInvalid octave value" 233 << "\n\t this: " << this 234 << "\n\t octave: " << octave 235 ); 236 237 return pyramid[num_intervals*octave].nc(); 238 } 239 get_value(long octave,long interval,long r,long c)240 double get_value ( 241 long octave, 242 long interval, 243 long r, 244 long c 245 ) const 246 { 247 DLIB_ASSERT(0 <= octave && octave < octaves() && 248 0 <= interval && interval < intervals() && 249 get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) && 250 get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval), 251 "\tdouble get_value(octave, interval, r, c)" 252 << "\n\tInvalid inputs to this function" 253 << "\n\t this: " << this 254 << "\n\t octave: " << octave 255 << "\n\t interval: " << interval 256 << "\n\t octaves: " << octaves() 257 << "\n\t intervals: " << intervals() 258 << "\n\t r: " << r 259 << "\n\t c: " << c 260 << "\n\t nr(octave): " << nr(octave) 261 << "\n\t nc(octave): " << nc(octave) 262 << "\n\t get_border_size(interval): " << get_border_size(interval) 263 ); 264 265 return std::abs(pyramid[num_intervals*octave + interval][r][c]); 266 } 267 get_laplacian(long octave,long interval,long r,long c)268 double get_laplacian ( 269 long octave, 270 long interval, 271 long r, 272 long c 273 ) const 274 { 275 DLIB_ASSERT(0 <= octave && octave < octaves() && 276 0 <= interval && interval < intervals() && 277 get_border_size(interval) <= r && r < nr(octave)-get_border_size(interval) && 278 get_border_size(interval) <= c && c < nc(octave)-get_border_size(interval), 279 "\tdouble get_laplacian(octave, interval, r, c)" 280 << "\n\tInvalid inputs to this function" 281 << "\n\t this: " << this 282 << "\n\t octave: " << octave 283 << "\n\t interval: " << interval 284 << "\n\t octaves: " << octaves() 285 << "\n\t intervals: " << intervals() 286 << "\n\t r: " << r 287 << "\n\t c: " << c 288 << "\n\t nr(octave): " << nr(octave) 289 << "\n\t nc(octave): " << nc(octave) 290 << "\n\t get_border_size(interval): " << get_border_size(interval) 291 ); 292 293 // return the sign of the laplacian 294 if (pyramid[num_intervals*octave + interval][r][c] > 0) 295 return +1; 296 else 297 return -1; 298 } 299 octaves()300 long octaves ( 301 ) const { return num_octaves; } 302 intervals()303 long intervals ( 304 ) const { return num_intervals; } 305 306 private: 307 308 long num_octaves; 309 long num_intervals; 310 long initial_step_size; 311 312 typedef array2d<double> image_type; 313 typedef array<image_type> pyramid_type; 314 315 pyramid_type pyramid; 316 }; 317 318 // ---------------------------------------------------------------------------------------- 319 // ---------------------------------------------------------------------------------------- 320 // ---------------------------------------------------------------------------------------- 321 322 namespace hessian_pyramid_helpers 323 { is_maximum_in_region(const hessian_pyramid & pyr,long o,long i,long r,long c)324 inline bool is_maximum_in_region( 325 const hessian_pyramid& pyr, 326 long o, 327 long i, 328 long r, 329 long c 330 ) 331 { 332 // First check if this point is near the edge of the octave 333 // If it is then we say it isn't a maximum as these points are 334 // not as reliable. 335 if (i <= 0 || i+1 >= pyr.intervals()) 336 { 337 return false; 338 } 339 340 const double val = pyr.get_value(o,i,r,c); 341 342 // now check if there are any bigger values around this guy 343 for (long ii = i-1; ii <= i+1; ++ii) 344 { 345 for (long rr = r-1; rr <= r+1; ++rr) 346 { 347 for (long cc = c-1; cc <= c+1; ++cc) 348 { 349 if (pyr.get_value(o,ii,rr,cc) > val) 350 return false; 351 } 352 } 353 } 354 355 return true; 356 } 357 358 // ------------------------------------------------------------------------------------ 359 get_hessian_gradient(const hessian_pyramid & pyr,long o,long i,long r,long c)360 inline const matrix<double,3,1> get_hessian_gradient ( 361 const hessian_pyramid& pyr, 362 long o, 363 long i, 364 long r, 365 long c 366 ) 367 { 368 matrix<double,3,1> grad; 369 grad(0) = (pyr.get_value(o,i,r,c+1) - pyr.get_value(o,i,r,c-1))/2.0; 370 grad(1) = (pyr.get_value(o,i,r+1,c) - pyr.get_value(o,i,r-1,c))/2.0; 371 grad(2) = (pyr.get_value(o,i+1,r,c) - pyr.get_value(o,i-1,r,c))/2.0; 372 return grad; 373 } 374 375 // ------------------------------------------------------------------------------------ 376 get_hessian_hessian(const hessian_pyramid & pyr,long o,long i,long r,long c)377 inline const matrix<double,3,3> get_hessian_hessian ( 378 const hessian_pyramid& pyr, 379 long o, 380 long i, 381 long r, 382 long c 383 ) 384 { 385 matrix<double,3,3> hess; 386 const double val = pyr.get_value(o,i,r,c); 387 388 double Dxx = (pyr.get_value(o,i,r,c+1) + pyr.get_value(o,i,r,c-1)) - 2*val; 389 double Dyy = (pyr.get_value(o,i,r+1,c) + pyr.get_value(o,i,r-1,c)) - 2*val; 390 double Dss = (pyr.get_value(o,i+1,r,c) + pyr.get_value(o,i-1,r,c)) - 2*val; 391 392 double Dxy = (pyr.get_value(o,i,r+1,c+1) + pyr.get_value(o,i,r-1,c-1) - 393 pyr.get_value(o,i,r-1,c+1) - pyr.get_value(o,i,r+1,c-1)) / 4.0; 394 395 double Dxs = (pyr.get_value(o,i+1,r,c+1) + pyr.get_value(o,i-1,r,c-1) - 396 pyr.get_value(o,i-1,r,c+1) - pyr.get_value(o,i+1,r,c-1)) / 4.0; 397 398 double Dys = (pyr.get_value(o,i+1,r+1,c) + pyr.get_value(o,i-1,r-1,c) - 399 pyr.get_value(o,i-1,r+1,c) - pyr.get_value(o,i+1,r-1,c)) / 4.0; 400 401 402 hess = Dxx, Dxy, Dxs, 403 Dxy, Dyy, Dys, 404 Dxs, Dys, Dss; 405 406 return hess; 407 } 408 409 // ------------------------------------------------------------------------------------ 410 interpolate_point(const hessian_pyramid & pyr,long o,long i,long r,long c)411 inline const interest_point interpolate_point ( 412 const hessian_pyramid& pyr, 413 long o, 414 long i, 415 long r, 416 long c 417 ) 418 { 419 dlib::vector<double,2> p(c,r); 420 421 dlib::vector<double,3> start_point(c,r,i); 422 dlib::vector<double,3> interpolated_point = -inv(get_hessian_hessian(pyr,o,i,r,c))*get_hessian_gradient(pyr,o,i,r,c); 423 424 //cout << "inter: " << trans(interpolated_point); 425 426 interest_point temp; 427 if (max(abs(interpolated_point)) < 0.5) 428 { 429 p = (start_point+interpolated_point)*pyr.get_step_size(o); 430 const double lobe_size = std::pow(2.0, o+1.0)*(i+interpolated_point.z()+1) + 1; 431 const double filter_size = 3*lobe_size; 432 const double scale = 1.2/9.0 * filter_size; 433 434 temp.center = p; 435 temp.scale = scale; 436 temp.score = pyr.get_value(o,i,r,c); 437 temp.laplacian = pyr.get_laplacian(o,i,r,c); 438 } 439 else 440 { 441 // this indicates to the caller that no interest point was found. 442 temp.score = -1; 443 } 444 445 return temp; 446 } 447 448 } 449 450 // ---------------------------------------------------------------------------------------- 451 452 template <typename Alloc> get_interest_points(const hessian_pyramid & pyr,double threshold,std::vector<interest_point,Alloc> & result_points)453 void get_interest_points ( 454 const hessian_pyramid& pyr, 455 double threshold, 456 std::vector<interest_point,Alloc>& result_points 457 ) 458 { 459 DLIB_ASSERT(threshold >= 0, 460 "\tvoid get_interest_points()" 461 << "\n\t Invalid arguments to this function" 462 << "\n\t threshold: " << threshold 463 ); 464 using namespace std; 465 using namespace hessian_pyramid_helpers; 466 467 result_points.clear(); 468 469 for (long o = 0; o < pyr.octaves(); ++o) 470 { 471 const long nr = pyr.nr(o); 472 const long nc = pyr.nc(o); 473 474 // do non-maximum suppression on all the intervals in the current octave and 475 // accumulate the results in result_points 476 for (long i = 1; i < pyr.intervals()-1; i += 1) 477 { 478 const long border_size = pyr.get_border_size(i+1); 479 for (long r = border_size+1; r < nr - border_size-1; r += 1) 480 { 481 for (long c = border_size+1; c < nc - border_size-1; c += 1) 482 { 483 double max_val = pyr.get_value(o,i,r,c); 484 long max_i = i; 485 long max_r = r; 486 long max_c = c; 487 488 489 // If the max point we found is really a maximum in its own region and 490 // is big enough then add it to the results. 491 if (max_val >= threshold && is_maximum_in_region(pyr, o, max_i, max_r, max_c)) 492 { 493 //cout << max_val << endl; 494 interest_point sp = interpolate_point (pyr, o, max_i, max_r, max_c); 495 if (sp.score >= threshold) 496 { 497 result_points.push_back(sp); 498 } 499 } 500 501 } 502 } 503 } 504 } 505 506 } 507 508 // ---------------------------------------------------------------------------------------- 509 510 template <typename Alloc> get_interest_points(const hessian_pyramid & pyr,double threshold,std_vector_c<interest_point,Alloc> & result_points)511 void get_interest_points ( 512 const hessian_pyramid& pyr, 513 double threshold, 514 std_vector_c<interest_point,Alloc>& result_points 515 ) 516 /*! 517 This function is just an overload that automatically casts std_vector_c objects 518 into std::vector objects. (Usually this is automatic but the template argument 519 there messes up the conversion so we have to do it explicitly) 520 !*/ 521 { 522 std::vector<interest_point,Alloc>& v = result_points; 523 get_interest_points(pyr, threshold, v); 524 } 525 526 // ---------------------------------------------------------------------------------------- 527 528 } 529 530 #endif // DLIB_HESSIAN_PYRAMId_Hh_ 531 532