1 // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
2 /*
3
4 This example program shows how to use dlib's implementation of the paper:
5 One Millisecond Face Alignment with an Ensemble of Regression Trees by
6 Vahid Kazemi and Josephine Sullivan, CVPR 2014
7
8 In particular, we will train a face landmarking model based on a small dataset
9 and then evaluate it. If you want to visualize the output of the trained
10 model on some images then you can run the face_landmark_detection_ex.cpp
11 example program with sp.dat as the input model.
12
13 It should also be noted that this kind of model, while often used for face
14 landmarking, is quite general and can be used for a variety of shape
15 prediction tasks. But here we demonstrate it only on a simple face
16 landmarking task.
17 */
18
19
20 #include <dlib/image_processing.h>
21 #include <dlib/data_io.h>
22 #include <iostream>
23
24 using namespace dlib;
25 using namespace std;
26
27 // ----------------------------------------------------------------------------------------
28
29 std::vector<std::vector<double> > get_interocular_distances (
30 const std::vector<std::vector<full_object_detection> >& objects
31 );
32 /*!
33 ensures
34 - returns an object D such that:
35 - D[i][j] == the distance, in pixels, between the eyes for the face represented
36 by objects[i][j].
37 !*/
38
39 // ----------------------------------------------------------------------------------------
40
main(int argc,char ** argv)41 int main(int argc, char** argv)
42 {
43 try
44 {
45 // In this example we are going to train a shape_predictor based on the
46 // small faces dataset in the examples/faces directory. So the first
47 // thing we do is load that dataset. This means you need to supply the
48 // path to this faces folder as a command line argument so we will know
49 // where it is.
50 if (argc != 2)
51 {
52 cout << "Give the path to the examples/faces directory as the argument to this" << endl;
53 cout << "program. For example, if you are in the examples folder then execute " << endl;
54 cout << "this program by running: " << endl;
55 cout << " ./train_shape_predictor_ex faces" << endl;
56 cout << endl;
57 return 0;
58 }
59 const std::string faces_directory = argv[1];
60 // The faces directory contains a training dataset and a separate
61 // testing dataset. The training data consists of 4 images, each
62 // annotated with rectangles that bound each human face along with 68
63 // face landmarks on each face. The idea is to use this training data
64 // to learn to identify the position of landmarks on human faces in new
65 // images.
66 //
67 // Once you have trained a shape_predictor it is always important to
68 // test it on data it wasn't trained on. Therefore, we will also load
69 // a separate testing set of 5 images. Once we have a shape_predictor
70 // created from the training data we will see how well it works by
71 // running it on the testing images.
72 //
73 // So here we create the variables that will hold our dataset.
74 // images_train will hold the 4 training images and faces_train holds
75 // the locations and poses of each face in the training images. So for
76 // example, the image images_train[0] has the faces given by the
77 // full_object_detections in faces_train[0].
78 dlib::array<array2d<unsigned char> > images_train, images_test;
79 std::vector<std::vector<full_object_detection> > faces_train, faces_test;
80
81 // Now we load the data. These XML files list the images in each
82 // dataset and also contain the positions of the face boxes and
83 // landmarks (called parts in the XML file). Obviously you can use any
84 // kind of input format you like so long as you store the data into
85 // images_train and faces_train. But for convenience dlib comes with
86 // tools for creating and loading XML image dataset files. Here you see
87 // how to load the data. To create the XML files you can use the imglab
88 // tool which can be found in the tools/imglab folder. It is a simple
89 // graphical tool for labeling objects in images. To see how to use it
90 // read the tools/imglab/README.txt file.
91 load_image_dataset(images_train, faces_train, faces_directory+"/training_with_face_landmarks.xml");
92 load_image_dataset(images_test, faces_test, faces_directory+"/testing_with_face_landmarks.xml");
93
94 // Now make the object responsible for training the model.
95 shape_predictor_trainer trainer;
96 // This algorithm has a bunch of parameters you can mess with. The
97 // documentation for the shape_predictor_trainer explains all of them.
98 // You should also read Kazemi's paper which explains all the parameters
99 // in great detail. However, here I'm just setting three of them
100 // differently than their default values. I'm doing this because we
101 // have a very small dataset. In particular, setting the oversampling
102 // to a high amount (300) effectively boosts the training set size, so
103 // that helps this example.
104 trainer.set_oversampling_amount(300);
105 // I'm also reducing the capacity of the model by explicitly increasing
106 // the regularization (making nu smaller) and by using trees with
107 // smaller depths.
108 trainer.set_nu(0.05);
109 trainer.set_tree_depth(2);
110
111 // some parts of training process can be parallelized.
112 // Trainer will use this count of threads when possible
113 trainer.set_num_threads(2);
114
115 // Tell the trainer to print status messages to the console so we can
116 // see how long the training will take.
117 trainer.be_verbose();
118
119 // Now finally generate the shape model
120 shape_predictor sp = trainer.train(images_train, faces_train);
121
122
123 // Now that we have a model we can test it. This function measures the
124 // average distance between a face landmark output by the
125 // shape_predictor and where it should be according to the truth data.
126 // Note that there is an optional 4th argument that lets us rescale the
127 // distances. Here we are causing the output to scale each face's
128 // distances by the interocular distance, as is customary when
129 // evaluating face landmarking systems.
130 cout << "mean training error: "<<
131 test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl;
132
133 // The real test is to see how well it does on data it wasn't trained
134 // on. We trained it on a very small dataset so the accuracy is not
135 // extremely high, but it's still doing quite good. Moreover, if you
136 // train it on one of the large face landmarking datasets you will
137 // obtain state-of-the-art results, as shown in the Kazemi paper.
138 cout << "mean testing error: "<<
139 test_shape_predictor(sp, images_test, faces_test, get_interocular_distances(faces_test)) << endl;
140
141 // Finally, we save the model to disk so we can use it later.
142 serialize("sp.dat") << sp;
143 }
144 catch (exception& e)
145 {
146 cout << "\nexception thrown!" << endl;
147 cout << e.what() << endl;
148 }
149 }
150
151 // ----------------------------------------------------------------------------------------
152
interocular_distance(const full_object_detection & det)153 double interocular_distance (
154 const full_object_detection& det
155 )
156 {
157 dlib::vector<double,2> l, r;
158 double cnt = 0;
159 // Find the center of the left eye by averaging the points around
160 // the eye.
161 for (unsigned long i = 36; i <= 41; ++i)
162 {
163 l += det.part(i);
164 ++cnt;
165 }
166 l /= cnt;
167
168 // Find the center of the right eye by averaging the points around
169 // the eye.
170 cnt = 0;
171 for (unsigned long i = 42; i <= 47; ++i)
172 {
173 r += det.part(i);
174 ++cnt;
175 }
176 r /= cnt;
177
178 // Now return the distance between the centers of the eyes
179 return length(l-r);
180 }
181
get_interocular_distances(const std::vector<std::vector<full_object_detection>> & objects)182 std::vector<std::vector<double> > get_interocular_distances (
183 const std::vector<std::vector<full_object_detection> >& objects
184 )
185 {
186 std::vector<std::vector<double> > temp(objects.size());
187 for (unsigned long i = 0; i < objects.size(); ++i)
188 {
189 for (unsigned long j = 0; j < objects[i].size(); ++j)
190 {
191 temp[i].push_back(interocular_distance(objects[i][j]));
192 }
193 }
194 return temp;
195 }
196
197 // ----------------------------------------------------------------------------------------
198
199