1 // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
2 /*
3 This example shows how you can use the dlib machine learning tools to make
4 an object tracker. Depending on your tracking application there can be a
5 lot of components to a tracker. However, a central element of many trackers
6 is the "detection to track" association step and this is the part of the
7 tracker we discuss in this example. Therefore, in the code below we define
8 simple detection and track structures and then go through the steps needed
9 to learn, using training data, how to best associate detections to tracks.
10
11 It should be noted that these tools are implemented essentially as wrappers
12 around the more general assignment learning tools present in dlib. So if
13 you want to get an idea of how they work under the covers you should read
14 the assignment_learning_ex.cpp example program and its supporting
15 documentation. However, to just use the learning-to-track tools you won't
16 need to understand these implementation details.
17 */
18
19
20 #include <iostream>
21 #include <dlib/svm_threaded.h>
22 #include <dlib/rand.h>
23
24 using namespace std;
25 using namespace dlib;
26
27 // ----------------------------------------------------------------------------------------
28
29 struct detection
30 {
31 /*
32 When you use these tools you need to define two structures. One represents a
33 detection and another a track. In this example we call these structures detection
34 and track but you can name them however you like. Moreover, You can put anything
35 you want in your detection structure. The only requirement is that detection be
36 copyable and contain a public typedef named track_type that tells us the track type
37 meant for use with this detection object.
38 */
39 typedef struct track track_type;
40
41
42
43 // Again, note that this field is NOT REQUIRED by the dlib tools. You can put whatever
44 // you want in your detection object. Here we are including a column vector of
45 // measurements from the sensor that generated the detection. In this example we don't
46 // have a real sensor so we will simulate a very basic one using a random number
47 // generator. But the idea is that you should be able to use the contents of your
48 // detection to somehow tell which track it goes with. So these numbers should contain
49 // some identifying information about the real world object that caused this detection.
50 matrix<double,0,1> measurements;
51 };
52
53
54 struct track
55 {
56 /*
57 Here we define our corresponding track object. This object has more requirements
58 than the detection. In particular, the dlib machine learning tools require it to
59 have the following elements:
60 - A typedef named feature_vector_type
61 - It should be copyable and default constructable
62 - The three functions: get_similarity_features(), update_track(), and propagate_track()
63
64 Just like the detection object, you can also add any additional fields you like.
65 In this example we keep it simple and say that a track maintains only a copy of the
66 most recent sensor measurements it has seen and also a number telling us how long
67 it has been since the track was updated with a detection.
68 */
69
70 // This type should be a dlib::matrix capable of storing column vectors or an
71 // unsorted sparse vector type such as std::vector<std::pair<unsigned long,double>>.
72 typedef matrix<double,0,1> feature_vector_type;
73
tracktrack74 track()
75 {
76 time_since_last_association = 0;
77 }
78
get_similarity_featurestrack79 void get_similarity_features(const detection& det, feature_vector_type& feats) const
80 {
81 /*
82 The get_similarity_features() function takes a detection and outputs a feature
83 vector that tells the machine learning tools how "similar" the detection is to
84 the track. The idea here is to output a set of numbers (i.e. the contents of
85 feats) that can be used to decide if det should be associated with this track.
86 In this example we output the difference between the last sensor measurements
87 for this track and the detection's measurements. This works since we expect
88 the sensor measurements to be relatively constant for each track because that's
89 how our simple sensor simulator in this example works. However, in a real
90 world application it's likely to be much more complex. But here we keep things
91 simple.
92
93 It should also be noted that get_similarity_features() must always output
94 feature vectors with the same number of dimensions. Finally, the machine
95 learning tools are going to learn a linear function of feats and use that to
96 predict if det should associate to this track. So try and define features that
97 you think would work in a linear function. There are all kinds of ways to do
98 this. If you want to get really clever about it you can even use kernel
99 methods like the empirical_kernel_map (see empirical_kernel_map_ex.cpp). I
100 would start out with something simple first though.
101 */
102 feats = abs(last_measurements - det.measurements);
103 }
104
update_tracktrack105 void update_track(const detection& det)
106 {
107 /*
108 This function is called when the dlib tools have decided that det should be
109 associated with this track. So the point of update_track() is to, as the name
110 suggests, update the track with the given detection. In general, you can do
111 whatever you want in this function. Here we simply record the last measurement
112 state and reset the time since last association.
113 */
114 last_measurements = det.measurements;
115 time_since_last_association = 0;
116 }
117
propagate_tracktrack118 void propagate_track()
119 {
120 /*
121 This function is called when the dlib tools have decided, for the current time
122 step, that none of the available detections associate with this track. So the
123 point of this function is to perform a track update without a detection. To
124 say that another way. Every time you ask the dlib tools to perform detection
125 to track association they will update each track by calling either
126 update_track() or propagate_track(). Which function they call depends on
127 whether or not a detection was associated to the track.
128 */
129 ++time_since_last_association;
130 }
131
132 matrix<double,0,1> last_measurements;
133 unsigned long time_since_last_association;
134 };
135
136 // ----------------------------------------------------------------------------------------
137
138 /*
139 Now that we have defined our detection and track structures we are going to define our
140 sensor simulator. In it we will imagine that there are num_objects things in the world
141 and those things generate detections from our sensor. Moreover, each detection from
142 the sensor comes with a measurement vector with num_properties elements.
143
144 So the first function, initialize_object_properties(), just randomly generates
145 num_objects and saves them in a global variable. Then when we are generating
146 detections we will output copies of these objects that have been corrupted by a little
147 bit of random noise.
148 */
149
150 dlib::rand rnd;
151 const long num_objects = 4;
152 const long num_properties = 6;
153 std::vector<matrix<double,0,1> > object_properties(num_objects);
154
initialize_object_properties()155 void initialize_object_properties()
156 {
157 for (unsigned long i = 0; i < object_properties.size(); ++i)
158 object_properties[i] = randm(num_properties,1,rnd);
159 }
160
161 // So here is our function that samples a detection from our simulated sensor. You tell it
162 // what object you want to sample a detection from and it returns a detection from that
163 // object.
sample_detection_from_sensor(long object_id)164 detection sample_detection_from_sensor(long object_id)
165 {
166 DLIB_CASSERT(object_id < num_objects,
167 "You can't ask to sample a detection from an object that doesn't exist.");
168 detection temp;
169 // Set the measurements equal to the object's true property values plus a little bit of
170 // noise.
171 temp.measurements = object_properties[object_id] + randm(num_properties,1,rnd)*0.1;
172 return temp;
173 }
174
175 // ----------------------------------------------------------------------------------------
176
177 typedef std::vector<labeled_detection<detection> > detections_at_single_time_step;
178 typedef std::vector<detections_at_single_time_step> track_history;
179
make_random_tracking_data_for_training()180 track_history make_random_tracking_data_for_training()
181 {
182 /*
183 Since we are using machine learning we need some training data. This function
184 samples data from our sensor and creates labeled track histories. In these track
185 histories, each detection is labeled with its true track ID. The goal of the
186 machine learning tools will then be to learn to associate all the detections with
187 the same ID to the same track object.
188 */
189
190 track_history data;
191
192 // At each time step we get a set of detections from the objects in the world.
193 // Simulate 100 time steps worth of data where there are 3 objects present.
194 const int num_time_steps = 100;
195 for (int i = 0; i < num_time_steps; ++i)
196 {
197 detections_at_single_time_step dets(3);
198 // sample a detection from object 0
199 dets[0].det = sample_detection_from_sensor(0);
200 dets[0].label = 0;
201
202 // sample a detection from object 1
203 dets[1].det = sample_detection_from_sensor(1);
204 dets[1].label = 1;
205
206 // sample a detection from object 2
207 dets[2].det = sample_detection_from_sensor(2);
208 dets[2].label = 2;
209
210 data.push_back(dets);
211 }
212
213 // Now let's imagine object 1 and 2 are gone but a new object, object 3 has arrived.
214 for (int i = 0; i < num_time_steps; ++i)
215 {
216 detections_at_single_time_step dets(2);
217 // sample a detection from object 0
218 dets[0].det = sample_detection_from_sensor(0);
219 dets[0].label = 0;
220
221 // sample a detection from object 3
222 dets[1].det = sample_detection_from_sensor(3);
223 dets[1].label = 3;
224
225 data.push_back(dets);
226 }
227
228 return data;
229 }
230
231 // ----------------------------------------------------------------------------------------
232
make_random_detections(long num_dets)233 std::vector<detection> make_random_detections(long num_dets)
234 {
235 /*
236 Finally, when we test the tracker we learned we will need to sample regular old
237 unlabeled detections. This function helps us do that.
238 */
239 DLIB_CASSERT(num_dets <= num_objects,
240 "You can't ask for more detections than there are objects in our little simulation.");
241
242 std::vector<detection> dets(num_dets);
243 for (unsigned long i = 0; i < dets.size(); ++i)
244 {
245 dets[i] = sample_detection_from_sensor(i);
246 }
247 return dets;
248 }
249
250 // ----------------------------------------------------------------------------------------
251
main()252 int main()
253 {
254 initialize_object_properties();
255
256
257 // Get some training data. Here we sample 5 independent track histories. In a real
258 // world problem you would get this kind of data by, for example, collecting data from
259 // your sensor on 5 separate days where you did an independent collection each day.
260 // You can train a model with just one track history but the more you have the better.
261 std::vector<track_history> data;
262 data.push_back(make_random_tracking_data_for_training());
263 data.push_back(make_random_tracking_data_for_training());
264 data.push_back(make_random_tracking_data_for_training());
265 data.push_back(make_random_tracking_data_for_training());
266 data.push_back(make_random_tracking_data_for_training());
267
268
269 structural_track_association_trainer trainer;
270 // Note that the machine learning tools have a parameter. This is the usual SVM C
271 // parameter that controls the trade-off between trying to fit the training data or
272 // producing a "simpler" solution. You need to try a few different values of this
273 // parameter to find out what setting works best for your problem (try values in the
274 // range 0.001 to 1000000).
275 trainer.set_c(100);
276 // Now do the training.
277 track_association_function<detection> assoc = trainer.train(data);
278
279 // We can test the accuracy of the learned association function on some track history
280 // data. Here we test it on the data we trained on. It outputs a single number that
281 // measures the fraction of detections which were correctly associated to their tracks.
282 // So a value of 1 indicates perfect tracking and a value of 0 indicates totally wrong
283 // tracking.
284 cout << "Association accuracy on training data: "<< test_track_association_function(assoc, data) << endl;
285 // It's very important to test the output of a machine learning method on data it
286 // wasn't trained on. You can do that by calling test_track_association_function() on
287 // held out data. You can also use cross-validation like so:
288 cout << "Association accuracy from 5-fold CV: "<< cross_validate_track_association_trainer(trainer, data, 5) << endl;
289 // Unsurprisingly, the testing functions show that the assoc function we learned
290 // perfectly associates all detections to tracks in this easy data.
291
292
293
294
295 // OK. So how do you use this assoc thing? Let's use it to do some tracking!
296
297 // tracks contains all our current tracks. Initially it is empty.
298 std::vector<track> tracks;
299 cout << "number of tracks: "<< tracks.size() << endl;
300
301 // Sample detections from 3 objects.
302 std::vector<detection> dets = make_random_detections(3);
303 // Calling assoc(), the function we just learned, performs the detection to track
304 // association. It will also call each track's update_track() function with the
305 // associated detection. For tracks that don't get a detection, it calls
306 // propagate_track().
307 assoc(tracks, dets);
308 // Now there are 3 things in tracks.
309 cout << "number of tracks: "<< tracks.size() << endl;
310
311 // Run the tracker for a few more time steps...
312 dets = make_random_detections(3);
313 assoc(tracks, dets);
314 cout << "number of tracks: "<< tracks.size() << endl;
315
316 dets = make_random_detections(3);
317 assoc(tracks, dets);
318 cout << "number of tracks: "<< tracks.size() << endl;
319
320 // Now another object has appeared! There are 4 objects now.
321 dets = make_random_detections(4);
322 assoc(tracks, dets);
323 // Now there are 4 tracks instead of 3!
324 cout << "number of tracks: "<< tracks.size() << endl;
325
326 // That 4th object just vanished. Let's look at the time_since_last_association values
327 // for each track. We will see that one of the tracks isn't getting updated with
328 // detections anymore since the object it corresponds to is no longer present.
329 dets = make_random_detections(3);
330 assoc(tracks, dets);
331 cout << "number of tracks: "<< tracks.size() << endl;
332 for (unsigned long i = 0; i < tracks.size(); ++i)
333 cout << " time since last association: "<< tracks[i].time_since_last_association << endl;
334
335 dets = make_random_detections(3);
336 assoc(tracks, dets);
337 cout << "number of tracks: "<< tracks.size() << endl;
338 for (unsigned long i = 0; i < tracks.size(); ++i)
339 cout << " time since last association: "<< tracks[i].time_since_last_association << endl;
340
341
342
343
344
345
346 // Finally, you can save your track_association_function to disk like so:
347 serialize("track_assoc.svm") << assoc;
348
349 // And recall it from disk later like so:
350 deserialize("track_assoc.svm") >> assoc;
351 }
352
353 // ----------------------------------------------------------------------------------------
354
355