1 // Copyright (C) 2015  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_CORRELATION_TrACKER_H_
4 #define DLIB_CORRELATION_TrACKER_H_
5 
6 #include "correlation_tracker_abstract.h"
7 #include "../geometry.h"
8 #include "../matrix.h"
9 #include "../array2d.h"
10 #include "../image_transforms/assign_image.h"
11 #include "../image_transforms/interpolation.h"
12 
13 
14 namespace dlib
15 {
16 
17 // ----------------------------------------------------------------------------------------
18 
19     class correlation_tracker
20     {
21     public:
22 
23         explicit correlation_tracker (unsigned long filter_size = 6,
24             unsigned long num_scale_levels = 5,
25             unsigned long scale_window_size = 23,
26             double regularizer_space = 0.001,
27             double nu_space = 0.025,
28             double regularizer_scale = 0.001,
29             double nu_scale = 0.025,
30             double scale_pyramid_alpha = 1.020
31         )
32             : filter_size(1 << filter_size), num_scale_levels(1 << num_scale_levels),
33             scale_window_size(scale_window_size),
34             regularizer_space(regularizer_space), nu_space(nu_space),
35             regularizer_scale(regularizer_scale), nu_scale(nu_scale),
36             scale_pyramid_alpha(scale_pyramid_alpha)
37         {
38             // Create the cosine mask used for space filtering.
39             mask = make_cosine_mask();
40 
41             // Create the cosine mask used for the scale filtering.
42             scale_cos_mask.resize(get_num_scale_levels());
43             const long max_level = get_num_scale_levels()/2;
44             for (unsigned long k = 0; k < get_num_scale_levels(); ++k)
45             {
46                 double dist = std::abs((double)k-max_level)/max_level*pi/2;
47                 dist = std::min(dist, pi/2);
48                 scale_cos_mask[k] = std::cos(dist);
49             }
50         }
51 
52         template <typename image_type>
start_track(const image_type & img,const drectangle & p)53         void start_track (
54             const image_type& img,
55             const drectangle& p
56         )
57         {
58             DLIB_CASSERT(p.is_empty() == false,
59                 "\t void correlation_tracker::start_track()"
60                 << "\n\t You can't give an empty rectangle."
61             );
62 
63             B.set_size(0,0);
64 
65             point_transform_affine tform = inv(make_chip(img, p, F));
66             for (unsigned long i = 0; i < F.size(); ++i)
67                 fft_inplace(F[i]);
68             make_target_location_image(tform(center(p)), G);
69             A.resize(F.size());
70             for (unsigned long i = 0; i < F.size(); ++i)
71             {
72                 A[i] = pointwise_multiply(G, F[i]);
73                 B += squared(real(F[i]))+squared(imag(F[i]));
74             }
75 
76             position = p;
77 
78             // now do the scale space stuff
79             make_scale_space(img, Fs);
80             for (unsigned long i = 0; i < Fs.size(); ++i)
81                 fft_inplace(Fs[i]);
82             make_scale_target_location_image(get_num_scale_levels()/2, Gs);
83             Bs.set_size(0);
84             As.resize(Fs.size());
85             for (unsigned long i = 0; i < Fs.size(); ++i)
86             {
87                 As[i] = pointwise_multiply(Gs, Fs[i]);
88                 Bs += squared(real(Fs[i]))+squared(imag(Fs[i]));
89             }
90         }
91 
92 
get_filter_size()93         unsigned long get_filter_size (
94         ) const { return filter_size; }
95 
get_num_scale_levels()96         unsigned long get_num_scale_levels(
97         ) const { return num_scale_levels; }
98 
get_scale_window_size()99         unsigned long get_scale_window_size (
100         ) const { return scale_window_size; }
101 
get_regularizer_space()102         double get_regularizer_space (
103         ) const { return regularizer_space; }
get_nu_space()104         inline double get_nu_space (
105         ) const { return nu_space;}
106 
get_regularizer_scale()107         double get_regularizer_scale (
108         ) const { return regularizer_scale; }
get_nu_scale()109         double get_nu_scale (
110         ) const { return nu_scale;}
111 
get_position()112         drectangle get_position (
113         ) const
114         {
115             return position;
116         }
117 
get_scale_pyramid_alpha()118         double get_scale_pyramid_alpha (
119         ) const { return scale_pyramid_alpha; }
120 
121 
122         template <typename image_type>
update_noscale(const image_type & img,const drectangle & guess)123         double update_noscale(
124             const image_type& img,
125             const drectangle& guess
126         )
127         {
128             DLIB_CASSERT(get_position().is_empty() == false,
129                 "\t double correlation_tracker::update()"
130                 << "\n\t You must call start_track() first before calling update()."
131             );
132 
133 
134             const point_transform_affine tform = make_chip(img, guess, F);
135             for (unsigned long i = 0; i < F.size(); ++i)
136                 fft_inplace(F[i]);
137 
138             // use the current filter to predict the object's location
139             G = 0;
140             for (unsigned long i = 0; i < F.size(); ++i)
141                 G += pointwise_multiply(F[i],conj(A[i]));
142             G = pointwise_multiply(G, reciprocal(B+get_regularizer_space()));
143             ifft_inplace(G);
144             const dlib::vector<double,2> pp = max_point_interpolated(real(G));
145 
146 
147             // Compute the peak to side lobe ratio.
148             const point p = pp;
149             running_stats<double> rs;
150             const rectangle peak = centered_rect(p, 8,8);
151             for (long r = 0; r < G.nr(); ++r)
152             {
153                 for (long c = 0; c < G.nc(); ++c)
154                 {
155                     if (!peak.contains(point(c,r)))
156                         rs.add(G(r,c).real());
157                 }
158             }
159             const double psr = (G(p.y(),p.x()).real()-rs.mean())/rs.stddev();
160 
161             // update the position of the object
162             position = translate_rect(guess, tform(pp)-center(guess));
163 
164             // now update the position filters
165             make_target_location_image(pp, G);
166             B *= (1-get_nu_space());
167             for (unsigned long i = 0; i < F.size(); ++i)
168             {
169                 A[i] = get_nu_space()*pointwise_multiply(G, F[i]) + (1-get_nu_space())*A[i];
170                 B += get_nu_space()*(squared(real(F[i]))+squared(imag(F[i])));
171             }
172 
173             return psr;
174         }
175 
176         template <typename image_type>
update(const image_type & img,const drectangle & guess)177         double update (
178             const image_type& img,
179             const drectangle& guess
180         )
181         {
182             double psr = update_noscale(img, guess);
183 
184             // Now predict the scale change
185             make_scale_space(img, Fs);
186             for (unsigned long i = 0; i < Fs.size(); ++i)
187                 fft_inplace(Fs[i]);
188             Gs = 0;
189             for (unsigned long i = 0; i < Fs.size(); ++i)
190                 Gs += pointwise_multiply(Fs[i],conj(As[i]));
191             Gs = pointwise_multiply(Gs, reciprocal(Bs+get_regularizer_scale()));
192             ifft_inplace(Gs);
193             const double pos = max_point_interpolated(real(Gs)).y();
194 
195             // update the rectangle's scale
196             position *= std::pow(get_scale_pyramid_alpha(), pos-(double)get_num_scale_levels()/2);
197 
198 
199 
200             // Now update the scale filters
201             make_scale_target_location_image(pos, Gs);
202             Bs *= (1-get_nu_scale());
203             for (unsigned long i = 0; i < Fs.size(); ++i)
204             {
205                 As[i] = get_nu_scale()*pointwise_multiply(Gs, Fs[i]) + (1-get_nu_scale())*As[i];
206                 Bs += get_nu_scale()*(squared(real(Fs[i]))+squared(imag(Fs[i])));
207             }
208 
209 
210             return psr;
211         }
212 
213         template <typename image_type>
update_noscale(const image_type & img)214         double update_noscale (
215             const image_type& img
216         )
217         {
218             return update_noscale(img, get_position());
219         }
220 
221         template <typename image_type>
update(const image_type & img)222         double update(
223             const image_type& img
224             )
225         {
226             return update(img, get_position());
227         }
228 
229     private:
230 
231         template <typename image_type>
make_scale_space(const image_type & img,std::vector<matrix<std::complex<double>,0,1>> & Fs)232         void make_scale_space(
233             const image_type& img,
234             std::vector<matrix<std::complex<double>,0,1> >& Fs
235         ) const
236         {
237             typedef typename image_traits<image_type>::pixel_type pixel_type;
238 
239             // Make an image pyramid and put it into the chips array.
240             const long chip_size = get_scale_window_size();
241             drectangle ppp = position*std::pow(get_scale_pyramid_alpha(), -(double)get_num_scale_levels()/2);
242             dlib::array<array2d<pixel_type> > chips;
243             std::vector<dlib::vector<double,2> > from_points, to_points;
244             from_points.push_back(point(0,0));
245             from_points.push_back(point(chip_size-1,0));
246             from_points.push_back(point(chip_size-1,chip_size-1));
247             for (unsigned long i = 0; i < get_num_scale_levels(); ++i)
248             {
249                 array2d<pixel_type> chip(chip_size,chip_size);
250 
251                 // pull box into chip
252                 to_points.clear();
253                 to_points.push_back(ppp.tl_corner());
254                 to_points.push_back(ppp.tr_corner());
255                 to_points.push_back(ppp.br_corner());
256                 transform_image(img,chip,interpolate_bilinear(),find_affine_transform(from_points, to_points));
257 
258                 chips.push_back(chip);
259                 ppp *= get_scale_pyramid_alpha();
260             }
261 
262 
263             // extract HOG for each chip
264             dlib::array<dlib::array<array2d<float> > > hogs(chips.size());
265             for (unsigned long i = 0; i < chips.size(); ++i)
266             {
267                 extract_fhog_features(chips[i], hogs[i], 4);
268                 hogs[i].resize(32);
269                 assign_image(hogs[i][31], chips[i]);
270                 assign_image(hogs[i][31], mat(hogs[i][31])/255.0);
271             }
272 
273             // Now copy the hog features into the Fs outputs and also apply the cosine
274             // windowing.
275             Fs.resize(hogs[0].size()*hogs[0][0].size());
276             unsigned long i = 0;
277             for (long r = 0; r < hogs[0][0].nr(); ++r)
278             {
279                 for (long c = 0; c < hogs[0][0].nc(); ++c)
280                 {
281                     for (unsigned long j = 0; j < hogs[0].size(); ++j)
282                     {
283                         Fs[i].set_size(hogs.size());
284                         for (unsigned long k = 0; k < hogs.size(); ++k)
285                         {
286                             Fs[i](k) = hogs[k][j][r][c]*scale_cos_mask[k];
287                         }
288                         ++i;
289                     }
290                 }
291             }
292         }
293 
294         template <typename image_type>
make_chip(const image_type & img,drectangle p,std::vector<matrix<std::complex<double>>> & chip)295         point_transform_affine make_chip (
296             const image_type& img,
297             drectangle p,
298             std::vector<matrix<std::complex<double> > >& chip
299         ) const
300         {
301             typedef typename image_traits<image_type>::pixel_type pixel_type;
302             array2d<pixel_type> temp;
303             const double padding = 1.4;
304             const chip_details details(p*padding, chip_dims(get_filter_size(), get_filter_size()));
305             extract_image_chip(img, details, temp);
306 
307 
308             chip.resize(32);
309             dlib::array<array2d<float> > hog;
310             extract_fhog_features(temp, hog, 1, 3,3 );
311             for (unsigned long i = 0; i < hog.size(); ++i)
312                 assign_image(chip[i], pointwise_multiply(matrix_cast<double>(mat(hog[i])), mask));
313 
314             assign_image(chip[31], temp);
315             assign_image(chip[31], pointwise_multiply(mat(chip[31]), mask)/255.0);
316 
317             return inv(get_mapping_to_chip(details));
318         }
319 
make_target_location_image(const dlib::vector<double,2> & p,matrix<std::complex<double>> & g)320         void make_target_location_image (
321             const dlib::vector<double,2>& p,
322             matrix<std::complex<double> >& g
323         ) const
324         {
325             g.set_size(get_filter_size(), get_filter_size());
326             g = 0;
327             rectangle area = centered_rect(p, 21,21).intersect(get_rect(g));
328             for (long r = area.top(); r <= area.bottom(); ++r)
329             {
330                 for (long c = area.left(); c <= area.right(); ++c)
331                 {
332                     double dist = length(point(c,r)-p);
333                     g(r,c) = std::exp(-dist/3.0);
334                 }
335             }
336             fft_inplace(g);
337             g = conj(g);
338         }
339 
340 
make_scale_target_location_image(const double scale,matrix<std::complex<double>,0,1> & g)341         void make_scale_target_location_image (
342             const double scale,
343             matrix<std::complex<double>,0,1>& g
344         ) const
345         {
346             g.set_size(get_num_scale_levels());
347             for (long i = 0; i < g.size(); ++i)
348             {
349                 double dist = std::pow((i-scale),2.0);
350                 g(i) = std::exp(-dist/1.000);
351             }
352             fft_inplace(g);
353             g = conj(g);
354         }
355 
make_cosine_mask()356         matrix<double> make_cosine_mask (
357         ) const
358         {
359             const long size = get_filter_size();
360             matrix<double> temp(size,size);
361             point cent = center(get_rect(temp));
362             for (long r = 0; r < temp.nr(); ++r)
363             {
364                 for (long c = 0; c < temp.nc(); ++c)
365                 {
366                     point delta = point(c,r)-cent;
367                     double dist = length(delta)/(size/2.0)*(pi/2);
368                     dist = std::min(dist*1.0, pi/2);
369 
370                     temp(r,c) = std::cos(dist);
371                 }
372             }
373             return temp;
374         }
375 
376 
377         std::vector<matrix<std::complex<double> > > A, F;
378         matrix<double> B;
379 
380         std::vector<matrix<std::complex<double>,0,1> > As, Fs;
381         matrix<double,0,1> Bs;
382         drectangle position;
383 
384         matrix<double> mask;
385         std::vector<double> scale_cos_mask;
386 
387         // G and Gs do not logically contribute to the state of this object.  They are
388         // here just so we can void reallocating them over and over.
389         matrix<std::complex<double> > G;
390         matrix<std::complex<double>,0,1> Gs;
391 
392         unsigned long filter_size;
393         unsigned long num_scale_levels;
394         unsigned long scale_window_size;
395         double regularizer_space;
396         double nu_space;
397         double regularizer_scale;
398         double nu_scale;
399         double scale_pyramid_alpha;
400     };
401 }
402 
403 #endif // DLIB_CORRELATION_TrACKER_H_
404 
405