1 // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
2 /*
3     This example shows how you can use the dlib machine learning tools to make
4     an object tracker.  Depending on your tracking application there can be a
5     lot of components to a tracker.  However, a central element of many trackers
6     is the "detection to track" association step and this is the part of the
7     tracker we discuss in this example.  Therefore, in the code below we define
8     simple detection and track structures and then go through the steps needed
9     to learn, using training data, how to best associate detections to tracks.
10 
11     It should be noted that these tools are implemented essentially as wrappers
12     around the more general assignment learning tools present in dlib.  So if
13     you want to get an idea of how they work under the covers you should read
14     the assignment_learning_ex.cpp example program and its supporting
15     documentation.  However, to just use the learning-to-track tools you won't
16     need to understand these implementation details.
17 */
18 
19 
20 #include <iostream>
21 #include <dlib/svm_threaded.h>
22 #include <dlib/rand.h>
23 
24 using namespace std;
25 using namespace dlib;
26 
27 // ----------------------------------------------------------------------------------------
28 
29 struct detection
30 {
31     /*
32         When you use these tools you need to define two structures.  One represents a
33         detection and another a track.  In this example we call these structures detection
34         and track but you can name them however you like.  Moreover, You can put anything
35         you want in your detection structure.  The only requirement is that detection be
36         copyable and contain a public typedef named track_type that tells us the track type
37         meant for use with this detection object.
38     */
39     typedef struct track track_type;
40 
41 
42 
43     // Again, note that this field is NOT REQUIRED by the dlib tools.  You can put whatever
44     // you want in your detection object.  Here we are including a column vector of
45     // measurements from the sensor that generated the detection.  In this example we don't
46     // have a real sensor so we will simulate a very basic one using a random number
47     // generator.   But the idea is that you should be able to use the contents of your
48     // detection to somehow tell which track it goes with.  So these numbers should contain
49     // some identifying information about the real world object that caused this detection.
50     matrix<double,0,1> measurements;
51 };
52 
53 
54 struct track
55 {
56     /*
57         Here we define our corresponding track object.  This object has more requirements
58         than the detection.  In particular, the dlib machine learning tools require it to
59         have the following elements:
60             - A typedef named feature_vector_type
61             - It should be copyable and default constructable
62             - The three functions: get_similarity_features(), update_track(), and propagate_track()
63 
64         Just like the detection object, you can also add any additional fields you like.
65         In this example we keep it simple and say that a track maintains only a copy of the
66         most recent sensor measurements it has seen and also a number telling us how long
67         it has been since the track was updated with a detection.
68     */
69 
70     // This type should be a dlib::matrix capable of storing column vectors or an
71     // unsorted sparse vector type such as std::vector<std::pair<unsigned long,double>>.
72     typedef matrix<double,0,1> feature_vector_type;
73 
tracktrack74     track()
75     {
76         time_since_last_association = 0;
77     }
78 
get_similarity_featurestrack79     void get_similarity_features(const detection& det, feature_vector_type& feats) const
80     {
81         /*
82             The get_similarity_features() function takes a detection and outputs a feature
83             vector that tells the machine learning tools how "similar" the detection is to
84             the track.  The idea here is to output a set of numbers (i.e. the contents of
85             feats) that can be used to decide if det should be associated with this track.
86             In this example we output the difference between the last sensor measurements
87             for this track and the detection's measurements.  This works since we expect
88             the sensor measurements to be relatively constant for each track because that's
89             how our simple sensor simulator in this example works.  However, in a real
90             world application it's likely to be much more complex.  But here we keep things
91             simple.
92 
93             It should also be noted that get_similarity_features() must always output
94             feature vectors with the same number of dimensions.  Finally, the machine
95             learning tools are going to learn a linear function of feats and use that to
96             predict if det should associate to this track.  So try and define features that
97             you think would work in a linear function.  There are all kinds of ways to do
98             this.  If you want to get really clever about it you can even use kernel
99             methods like the empirical_kernel_map (see empirical_kernel_map_ex.cpp).  I
100             would start out with something simple first though.
101         */
102         feats = abs(last_measurements - det.measurements);
103     }
104 
update_tracktrack105     void update_track(const detection& det)
106     {
107         /*
108             This function is called when the dlib tools have decided that det should be
109             associated with this track.  So the point of update_track() is to, as the name
110             suggests, update the track with the given detection.  In general, you can do
111             whatever you want in this function.  Here we simply record the last measurement
112             state and reset the time since last association.
113         */
114         last_measurements = det.measurements;
115         time_since_last_association = 0;
116     }
117 
propagate_tracktrack118     void propagate_track()
119     {
120         /*
121             This function is called when the dlib tools have decided, for the current time
122             step, that none of the available detections associate with this track.  So the
123             point of this function is to perform a track update without a detection.  To
124             say that another way.  Every time you ask the dlib tools to perform detection
125             to track association they will update each track by calling either
126             update_track() or propagate_track().  Which function they call depends on
127             whether or not a detection was associated to the track.
128         */
129         ++time_since_last_association;
130     }
131 
132     matrix<double,0,1> last_measurements;
133     unsigned long time_since_last_association;
134 };
135 
136 // ----------------------------------------------------------------------------------------
137 
138 /*
139     Now that we have defined our detection and track structures we are going to define our
140     sensor simulator.  In it we will imagine that there are num_objects things in the world
141     and those things generate detections from our sensor.  Moreover, each detection from
142     the sensor comes with a measurement vector with num_properties elements.
143 
144     So the first function, initialize_object_properties(), just randomly generates
145     num_objects and saves them in a global variable.  Then when we are generating
146     detections we will output copies of these objects that have been corrupted by a little
147     bit of random noise.
148 */
149 
150 dlib::rand rnd;
151 const long num_objects = 4;
152 const long num_properties = 6;
153 std::vector<matrix<double,0,1> > object_properties(num_objects);
154 
initialize_object_properties()155 void initialize_object_properties()
156 {
157     for (unsigned long i = 0; i < object_properties.size(); ++i)
158         object_properties[i] = randm(num_properties,1,rnd);
159 }
160 
161 // So here is our function that samples a detection from our simulated sensor.  You tell it
162 // what object you want to sample a detection from and it returns a detection from that
163 // object.
sample_detection_from_sensor(long object_id)164 detection sample_detection_from_sensor(long object_id)
165 {
166     DLIB_CASSERT(object_id < num_objects,
167         "You can't ask to sample a detection from an object that doesn't exist.");
168     detection temp;
169     // Set the measurements equal to the object's true property values plus a little bit of
170     // noise.
171     temp.measurements = object_properties[object_id] + randm(num_properties,1,rnd)*0.1;
172     return temp;
173 }
174 
175 // ----------------------------------------------------------------------------------------
176 
177 typedef std::vector<labeled_detection<detection> > detections_at_single_time_step;
178 typedef std::vector<detections_at_single_time_step> track_history;
179 
make_random_tracking_data_for_training()180 track_history make_random_tracking_data_for_training()
181 {
182     /*
183         Since we are using machine learning we need some training data.  This function
184         samples data from our sensor and creates labeled track histories.  In these track
185         histories, each detection is labeled with its true track ID.  The goal of the
186         machine learning tools will then be to learn to associate all the detections with
187         the same ID to the same track object.
188     */
189 
190     track_history data;
191 
192     // At each time step we get a set of detections from the objects in the world.
193     // Simulate 100 time steps worth of data where there are 3 objects present.
194     const int num_time_steps = 100;
195     for (int i = 0; i < num_time_steps; ++i)
196     {
197         detections_at_single_time_step dets(3);
198         // sample a detection from object 0
199         dets[0].det = sample_detection_from_sensor(0);
200         dets[0].label = 0;
201 
202         // sample a detection from object 1
203         dets[1].det = sample_detection_from_sensor(1);
204         dets[1].label = 1;
205 
206         // sample a detection from object 2
207         dets[2].det = sample_detection_from_sensor(2);
208         dets[2].label = 2;
209 
210         data.push_back(dets);
211     }
212 
213     // Now let's imagine object 1 and 2 are gone but a new object, object 3 has arrived.
214     for (int i = 0; i < num_time_steps; ++i)
215     {
216         detections_at_single_time_step dets(2);
217         // sample a detection from object 0
218         dets[0].det = sample_detection_from_sensor(0);
219         dets[0].label = 0;
220 
221         // sample a detection from object 3
222         dets[1].det = sample_detection_from_sensor(3);
223         dets[1].label = 3;
224 
225         data.push_back(dets);
226     }
227 
228     return data;
229 }
230 
231 // ----------------------------------------------------------------------------------------
232 
make_random_detections(long num_dets)233 std::vector<detection> make_random_detections(long num_dets)
234 {
235     /*
236         Finally, when we test the tracker we learned we will need to sample regular old
237         unlabeled detections.  This function helps us do that.
238     */
239     DLIB_CASSERT(num_dets <= num_objects,
240         "You can't ask for more detections than there are objects in our little simulation.");
241 
242     std::vector<detection> dets(num_dets);
243     for (unsigned long i = 0; i < dets.size(); ++i)
244     {
245         dets[i] = sample_detection_from_sensor(i);
246     }
247     return dets;
248 }
249 
250 // ----------------------------------------------------------------------------------------
251 
main()252 int main()
253 {
254     initialize_object_properties();
255 
256 
257     // Get some training data.  Here we sample 5 independent track histories.  In a real
258     // world problem you would get this kind of data by, for example, collecting data from
259     // your sensor on 5 separate days where you did an independent collection each day.
260     // You can train a model with just one track history but the more you have the better.
261     std::vector<track_history> data;
262     data.push_back(make_random_tracking_data_for_training());
263     data.push_back(make_random_tracking_data_for_training());
264     data.push_back(make_random_tracking_data_for_training());
265     data.push_back(make_random_tracking_data_for_training());
266     data.push_back(make_random_tracking_data_for_training());
267 
268 
269     structural_track_association_trainer trainer;
270     // Note that the machine learning tools have a parameter.  This is the usual SVM C
271     // parameter that controls the trade-off between trying to fit the training data or
272     // producing a "simpler" solution.  You need to try a few different values of this
273     // parameter to find out what setting works best for your problem (try values in the
274     // range 0.001 to 1000000).
275     trainer.set_c(100);
276     // Now do the training.
277     track_association_function<detection> assoc = trainer.train(data);
278 
279     // We can test the accuracy of the learned association function on some track history
280     // data.  Here we test it on the data we trained on.  It outputs a single number that
281     // measures the fraction of detections which were correctly associated to their tracks.
282     // So a value of 1 indicates perfect tracking and a value of 0 indicates totally wrong
283     // tracking.
284     cout << "Association accuracy on training data: "<< test_track_association_function(assoc, data) << endl;
285     // It's very important to test the output of a machine learning method on data it
286     // wasn't trained on.  You can do that by calling test_track_association_function() on
287     // held out data.  You can also use cross-validation like so:
288     cout << "Association accuracy from 5-fold CV:   "<< cross_validate_track_association_trainer(trainer, data, 5) << endl;
289     // Unsurprisingly, the testing functions show that the assoc function we learned
290     // perfectly associates all detections to tracks in this easy data.
291 
292 
293 
294 
295     // OK.  So how do you use this assoc thing?  Let's use it to do some tracking!
296 
297     // tracks contains all our current tracks.  Initially it is empty.
298     std::vector<track> tracks;
299     cout << "number of tracks: "<< tracks.size() << endl;
300 
301     // Sample detections from 3 objects.
302     std::vector<detection> dets = make_random_detections(3);
303     // Calling assoc(), the function we just learned, performs the detection to track
304     // association.  It will also call each track's update_track() function with the
305     // associated detection.  For tracks that don't get a detection, it calls
306     // propagate_track().
307     assoc(tracks, dets);
308     // Now there are 3 things in tracks.
309     cout << "number of tracks: "<< tracks.size() << endl;
310 
311     // Run the tracker for a few more time steps...
312     dets = make_random_detections(3);
313     assoc(tracks, dets);
314     cout << "number of tracks: "<< tracks.size() << endl;
315 
316     dets = make_random_detections(3);
317     assoc(tracks, dets);
318     cout << "number of tracks: "<< tracks.size() << endl;
319 
320     // Now another object has appeared!  There are 4 objects now.
321     dets = make_random_detections(4);
322     assoc(tracks, dets);
323     // Now there are 4 tracks instead of 3!
324     cout << "number of tracks: "<< tracks.size() << endl;
325 
326     // That 4th object just vanished.  Let's look at the time_since_last_association values
327     // for each track.  We will see that one of the tracks isn't getting updated with
328     // detections anymore since the object it corresponds to is no longer present.
329     dets = make_random_detections(3);
330     assoc(tracks, dets);
331     cout << "number of tracks: "<< tracks.size() << endl;
332     for (unsigned long i = 0; i < tracks.size(); ++i)
333         cout << "   time since last association: "<< tracks[i].time_since_last_association << endl;
334 
335     dets = make_random_detections(3);
336     assoc(tracks, dets);
337     cout << "number of tracks: "<< tracks.size() << endl;
338     for (unsigned long i = 0; i < tracks.size(); ++i)
339         cout << "   time since last association: "<< tracks[i].time_since_last_association << endl;
340 
341 
342 
343 
344 
345 
346     // Finally, you can save your track_association_function to disk like so:
347     serialize("track_assoc.svm") << assoc;
348 
349     // And recall it from disk later like so:
350     deserialize("track_assoc.svm") >> assoc;
351 }
352 
353 // ----------------------------------------------------------------------------------------
354 
355