1 // Copyright (C) 2015 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_CORRELATION_TrACKER_H_ 4 #define DLIB_CORRELATION_TrACKER_H_ 5 6 #include "correlation_tracker_abstract.h" 7 #include "../geometry.h" 8 #include "../matrix.h" 9 #include "../array2d.h" 10 #include "../image_transforms/assign_image.h" 11 #include "../image_transforms/interpolation.h" 12 13 14 namespace dlib 15 { 16 17 // ---------------------------------------------------------------------------------------- 18 19 class correlation_tracker 20 { 21 public: 22 23 explicit correlation_tracker (unsigned long filter_size = 6, 24 unsigned long num_scale_levels = 5, 25 unsigned long scale_window_size = 23, 26 double regularizer_space = 0.001, 27 double nu_space = 0.025, 28 double regularizer_scale = 0.001, 29 double nu_scale = 0.025, 30 double scale_pyramid_alpha = 1.020 31 ) 32 : filter_size(1 << filter_size), num_scale_levels(1 << num_scale_levels), 33 scale_window_size(scale_window_size), 34 regularizer_space(regularizer_space), nu_space(nu_space), 35 regularizer_scale(regularizer_scale), nu_scale(nu_scale), 36 scale_pyramid_alpha(scale_pyramid_alpha) 37 { 38 // Create the cosine mask used for space filtering. 39 mask = make_cosine_mask(); 40 41 // Create the cosine mask used for the scale filtering. 42 scale_cos_mask.resize(get_num_scale_levels()); 43 const long max_level = get_num_scale_levels()/2; 44 for (unsigned long k = 0; k < get_num_scale_levels(); ++k) 45 { 46 double dist = std::abs((double)k-max_level)/max_level*pi/2; 47 dist = std::min(dist, pi/2); 48 scale_cos_mask[k] = std::cos(dist); 49 } 50 } 51 52 template <typename image_type> start_track(const image_type & img,const drectangle & p)53 void start_track ( 54 const image_type& img, 55 const drectangle& p 56 ) 57 { 58 DLIB_CASSERT(p.is_empty() == false, 59 "\t void correlation_tracker::start_track()" 60 << "\n\t You can't give an empty rectangle." 61 ); 62 63 B.set_size(0,0); 64 65 point_transform_affine tform = inv(make_chip(img, p, F)); 66 for (unsigned long i = 0; i < F.size(); ++i) 67 fft_inplace(F[i]); 68 make_target_location_image(tform(center(p)), G); 69 A.resize(F.size()); 70 for (unsigned long i = 0; i < F.size(); ++i) 71 { 72 A[i] = pointwise_multiply(G, F[i]); 73 B += squared(real(F[i]))+squared(imag(F[i])); 74 } 75 76 position = p; 77 78 // now do the scale space stuff 79 make_scale_space(img, Fs); 80 for (unsigned long i = 0; i < Fs.size(); ++i) 81 fft_inplace(Fs[i]); 82 make_scale_target_location_image(get_num_scale_levels()/2, Gs); 83 Bs.set_size(0); 84 As.resize(Fs.size()); 85 for (unsigned long i = 0; i < Fs.size(); ++i) 86 { 87 As[i] = pointwise_multiply(Gs, Fs[i]); 88 Bs += squared(real(Fs[i]))+squared(imag(Fs[i])); 89 } 90 } 91 92 get_filter_size()93 unsigned long get_filter_size ( 94 ) const { return filter_size; } 95 get_num_scale_levels()96 unsigned long get_num_scale_levels( 97 ) const { return num_scale_levels; } 98 get_scale_window_size()99 unsigned long get_scale_window_size ( 100 ) const { return scale_window_size; } 101 get_regularizer_space()102 double get_regularizer_space ( 103 ) const { return regularizer_space; } get_nu_space()104 inline double get_nu_space ( 105 ) const { return nu_space;} 106 get_regularizer_scale()107 double get_regularizer_scale ( 108 ) const { return regularizer_scale; } get_nu_scale()109 double get_nu_scale ( 110 ) const { return nu_scale;} 111 get_position()112 drectangle get_position ( 113 ) const 114 { 115 return position; 116 } 117 get_scale_pyramid_alpha()118 double get_scale_pyramid_alpha ( 119 ) const { return scale_pyramid_alpha; } 120 121 122 template <typename image_type> update_noscale(const image_type & img,const drectangle & guess)123 double update_noscale( 124 const image_type& img, 125 const drectangle& guess 126 ) 127 { 128 DLIB_CASSERT(get_position().is_empty() == false, 129 "\t double correlation_tracker::update()" 130 << "\n\t You must call start_track() first before calling update()." 131 ); 132 133 134 const point_transform_affine tform = make_chip(img, guess, F); 135 for (unsigned long i = 0; i < F.size(); ++i) 136 fft_inplace(F[i]); 137 138 // use the current filter to predict the object's location 139 G = 0; 140 for (unsigned long i = 0; i < F.size(); ++i) 141 G += pointwise_multiply(F[i],conj(A[i])); 142 G = pointwise_multiply(G, reciprocal(B+get_regularizer_space())); 143 ifft_inplace(G); 144 const dlib::vector<double,2> pp = max_point_interpolated(real(G)); 145 146 147 // Compute the peak to side lobe ratio. 148 const point p = pp; 149 running_stats<double> rs; 150 const rectangle peak = centered_rect(p, 8,8); 151 for (long r = 0; r < G.nr(); ++r) 152 { 153 for (long c = 0; c < G.nc(); ++c) 154 { 155 if (!peak.contains(point(c,r))) 156 rs.add(G(r,c).real()); 157 } 158 } 159 const double psr = (G(p.y(),p.x()).real()-rs.mean())/rs.stddev(); 160 161 // update the position of the object 162 position = translate_rect(guess, tform(pp)-center(guess)); 163 164 // now update the position filters 165 make_target_location_image(pp, G); 166 B *= (1-get_nu_space()); 167 for (unsigned long i = 0; i < F.size(); ++i) 168 { 169 A[i] = get_nu_space()*pointwise_multiply(G, F[i]) + (1-get_nu_space())*A[i]; 170 B += get_nu_space()*(squared(real(F[i]))+squared(imag(F[i]))); 171 } 172 173 return psr; 174 } 175 176 template <typename image_type> update(const image_type & img,const drectangle & guess)177 double update ( 178 const image_type& img, 179 const drectangle& guess 180 ) 181 { 182 double psr = update_noscale(img, guess); 183 184 // Now predict the scale change 185 make_scale_space(img, Fs); 186 for (unsigned long i = 0; i < Fs.size(); ++i) 187 fft_inplace(Fs[i]); 188 Gs = 0; 189 for (unsigned long i = 0; i < Fs.size(); ++i) 190 Gs += pointwise_multiply(Fs[i],conj(As[i])); 191 Gs = pointwise_multiply(Gs, reciprocal(Bs+get_regularizer_scale())); 192 ifft_inplace(Gs); 193 const double pos = max_point_interpolated(real(Gs)).y(); 194 195 // update the rectangle's scale 196 position *= std::pow(get_scale_pyramid_alpha(), pos-(double)get_num_scale_levels()/2); 197 198 199 200 // Now update the scale filters 201 make_scale_target_location_image(pos, Gs); 202 Bs *= (1-get_nu_scale()); 203 for (unsigned long i = 0; i < Fs.size(); ++i) 204 { 205 As[i] = get_nu_scale()*pointwise_multiply(Gs, Fs[i]) + (1-get_nu_scale())*As[i]; 206 Bs += get_nu_scale()*(squared(real(Fs[i]))+squared(imag(Fs[i]))); 207 } 208 209 210 return psr; 211 } 212 213 template <typename image_type> update_noscale(const image_type & img)214 double update_noscale ( 215 const image_type& img 216 ) 217 { 218 return update_noscale(img, get_position()); 219 } 220 221 template <typename image_type> update(const image_type & img)222 double update( 223 const image_type& img 224 ) 225 { 226 return update(img, get_position()); 227 } 228 229 private: 230 231 template <typename image_type> make_scale_space(const image_type & img,std::vector<matrix<std::complex<double>,0,1>> & Fs)232 void make_scale_space( 233 const image_type& img, 234 std::vector<matrix<std::complex<double>,0,1> >& Fs 235 ) const 236 { 237 typedef typename image_traits<image_type>::pixel_type pixel_type; 238 239 // Make an image pyramid and put it into the chips array. 240 const long chip_size = get_scale_window_size(); 241 drectangle ppp = position*std::pow(get_scale_pyramid_alpha(), -(double)get_num_scale_levels()/2); 242 dlib::array<array2d<pixel_type> > chips; 243 std::vector<dlib::vector<double,2> > from_points, to_points; 244 from_points.push_back(point(0,0)); 245 from_points.push_back(point(chip_size-1,0)); 246 from_points.push_back(point(chip_size-1,chip_size-1)); 247 for (unsigned long i = 0; i < get_num_scale_levels(); ++i) 248 { 249 array2d<pixel_type> chip(chip_size,chip_size); 250 251 // pull box into chip 252 to_points.clear(); 253 to_points.push_back(ppp.tl_corner()); 254 to_points.push_back(ppp.tr_corner()); 255 to_points.push_back(ppp.br_corner()); 256 transform_image(img,chip,interpolate_bilinear(),find_affine_transform(from_points, to_points)); 257 258 chips.push_back(chip); 259 ppp *= get_scale_pyramid_alpha(); 260 } 261 262 263 // extract HOG for each chip 264 dlib::array<dlib::array<array2d<float> > > hogs(chips.size()); 265 for (unsigned long i = 0; i < chips.size(); ++i) 266 { 267 extract_fhog_features(chips[i], hogs[i], 4); 268 hogs[i].resize(32); 269 assign_image(hogs[i][31], chips[i]); 270 assign_image(hogs[i][31], mat(hogs[i][31])/255.0); 271 } 272 273 // Now copy the hog features into the Fs outputs and also apply the cosine 274 // windowing. 275 Fs.resize(hogs[0].size()*hogs[0][0].size()); 276 unsigned long i = 0; 277 for (long r = 0; r < hogs[0][0].nr(); ++r) 278 { 279 for (long c = 0; c < hogs[0][0].nc(); ++c) 280 { 281 for (unsigned long j = 0; j < hogs[0].size(); ++j) 282 { 283 Fs[i].set_size(hogs.size()); 284 for (unsigned long k = 0; k < hogs.size(); ++k) 285 { 286 Fs[i](k) = hogs[k][j][r][c]*scale_cos_mask[k]; 287 } 288 ++i; 289 } 290 } 291 } 292 } 293 294 template <typename image_type> make_chip(const image_type & img,drectangle p,std::vector<matrix<std::complex<double>>> & chip)295 point_transform_affine make_chip ( 296 const image_type& img, 297 drectangle p, 298 std::vector<matrix<std::complex<double> > >& chip 299 ) const 300 { 301 typedef typename image_traits<image_type>::pixel_type pixel_type; 302 array2d<pixel_type> temp; 303 const double padding = 1.4; 304 const chip_details details(p*padding, chip_dims(get_filter_size(), get_filter_size())); 305 extract_image_chip(img, details, temp); 306 307 308 chip.resize(32); 309 dlib::array<array2d<float> > hog; 310 extract_fhog_features(temp, hog, 1, 3,3 ); 311 for (unsigned long i = 0; i < hog.size(); ++i) 312 assign_image(chip[i], pointwise_multiply(matrix_cast<double>(mat(hog[i])), mask)); 313 314 assign_image(chip[31], temp); 315 assign_image(chip[31], pointwise_multiply(mat(chip[31]), mask)/255.0); 316 317 return inv(get_mapping_to_chip(details)); 318 } 319 make_target_location_image(const dlib::vector<double,2> & p,matrix<std::complex<double>> & g)320 void make_target_location_image ( 321 const dlib::vector<double,2>& p, 322 matrix<std::complex<double> >& g 323 ) const 324 { 325 g.set_size(get_filter_size(), get_filter_size()); 326 g = 0; 327 rectangle area = centered_rect(p, 21,21).intersect(get_rect(g)); 328 for (long r = area.top(); r <= area.bottom(); ++r) 329 { 330 for (long c = area.left(); c <= area.right(); ++c) 331 { 332 double dist = length(point(c,r)-p); 333 g(r,c) = std::exp(-dist/3.0); 334 } 335 } 336 fft_inplace(g); 337 g = conj(g); 338 } 339 340 make_scale_target_location_image(const double scale,matrix<std::complex<double>,0,1> & g)341 void make_scale_target_location_image ( 342 const double scale, 343 matrix<std::complex<double>,0,1>& g 344 ) const 345 { 346 g.set_size(get_num_scale_levels()); 347 for (long i = 0; i < g.size(); ++i) 348 { 349 double dist = std::pow((i-scale),2.0); 350 g(i) = std::exp(-dist/1.000); 351 } 352 fft_inplace(g); 353 g = conj(g); 354 } 355 make_cosine_mask()356 matrix<double> make_cosine_mask ( 357 ) const 358 { 359 const long size = get_filter_size(); 360 matrix<double> temp(size,size); 361 point cent = center(get_rect(temp)); 362 for (long r = 0; r < temp.nr(); ++r) 363 { 364 for (long c = 0; c < temp.nc(); ++c) 365 { 366 point delta = point(c,r)-cent; 367 double dist = length(delta)/(size/2.0)*(pi/2); 368 dist = std::min(dist*1.0, pi/2); 369 370 temp(r,c) = std::cos(dist); 371 } 372 } 373 return temp; 374 } 375 376 377 std::vector<matrix<std::complex<double> > > A, F; 378 matrix<double> B; 379 380 std::vector<matrix<std::complex<double>,0,1> > As, Fs; 381 matrix<double,0,1> Bs; 382 drectangle position; 383 384 matrix<double> mask; 385 std::vector<double> scale_cos_mask; 386 387 // G and Gs do not logically contribute to the state of this object. They are 388 // here just so we can void reallocating them over and over. 389 matrix<std::complex<double> > G; 390 matrix<std::complex<double>,0,1> Gs; 391 392 unsigned long filter_size; 393 unsigned long num_scale_levels; 394 unsigned long scale_window_size; 395 double regularizer_space; 396 double nu_space; 397 double regularizer_scale; 398 double nu_scale; 399 double scale_pyramid_alpha; 400 }; 401 } 402 403 #endif // DLIB_CORRELATION_TrACKER_H_ 404 405