1 // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
2 /*
3 
4     This example program shows how to use dlib's implementation of the paper:
5         One Millisecond Face Alignment with an Ensemble of Regression Trees by
6         Vahid Kazemi and Josephine Sullivan, CVPR 2014
7 
8     In particular, we will train a face landmarking model based on a small dataset
9     and then evaluate it.  If you want to visualize the output of the trained
10     model on some images then you can run the face_landmark_detection_ex.cpp
11     example program with sp.dat as the input model.
12 
13     It should also be noted that this kind of model, while often used for face
14     landmarking, is quite general and can be used for a variety of shape
15     prediction tasks.  But here we demonstrate it only on a simple face
16     landmarking task.
17 */
18 
19 
20 #include <dlib/image_processing.h>
21 #include <dlib/data_io.h>
22 #include <iostream>
23 
24 using namespace dlib;
25 using namespace std;
26 
27 // ----------------------------------------------------------------------------------------
28 
29 std::vector<std::vector<double> > get_interocular_distances (
30     const std::vector<std::vector<full_object_detection> >& objects
31 );
32 /*!
33     ensures
34         - returns an object D such that:
35             - D[i][j] == the distance, in pixels, between the eyes for the face represented
36               by objects[i][j].
37 !*/
38 
39 // ----------------------------------------------------------------------------------------
40 
main(int argc,char ** argv)41 int main(int argc, char** argv)
42 {
43     try
44     {
45         // In this example we are going to train a shape_predictor based on the
46         // small faces dataset in the examples/faces directory.  So the first
47         // thing we do is load that dataset.  This means you need to supply the
48         // path to this faces folder as a command line argument so we will know
49         // where it is.
50         if (argc != 2)
51         {
52             cout << "Give the path to the examples/faces directory as the argument to this" << endl;
53             cout << "program.  For example, if you are in the examples folder then execute " << endl;
54             cout << "this program by running: " << endl;
55             cout << "   ./train_shape_predictor_ex faces" << endl;
56             cout << endl;
57             return 0;
58         }
59         const std::string faces_directory = argv[1];
60         // The faces directory contains a training dataset and a separate
61         // testing dataset.  The training data consists of 4 images, each
62         // annotated with rectangles that bound each human face along with 68
63         // face landmarks on each face.  The idea is to use this training data
64         // to learn to identify the position of landmarks on human faces in new
65         // images.
66         //
67         // Once you have trained a shape_predictor it is always important to
68         // test it on data it wasn't trained on.  Therefore, we will also load
69         // a separate testing set of 5 images.  Once we have a shape_predictor
70         // created from the training data we will see how well it works by
71         // running it on the testing images.
72         //
73         // So here we create the variables that will hold our dataset.
74         // images_train will hold the 4 training images and faces_train holds
75         // the locations and poses of each face in the training images.  So for
76         // example, the image images_train[0] has the faces given by the
77         // full_object_detections in faces_train[0].
78         dlib::array<array2d<unsigned char> > images_train, images_test;
79         std::vector<std::vector<full_object_detection> > faces_train, faces_test;
80 
81         // Now we load the data.  These XML files list the images in each
82         // dataset and also contain the positions of the face boxes and
83         // landmarks (called parts in the XML file).  Obviously you can use any
84         // kind of input format you like so long as you store the data into
85         // images_train and faces_train.  But for convenience dlib comes with
86         // tools for creating and loading XML image dataset files.  Here you see
87         // how to load the data.  To create the XML files you can use the imglab
88         // tool which can be found in the tools/imglab folder.  It is a simple
89         // graphical tool for labeling objects in images.  To see how to use it
90         // read the tools/imglab/README.txt file.
91         load_image_dataset(images_train, faces_train, faces_directory+"/training_with_face_landmarks.xml");
92         load_image_dataset(images_test, faces_test, faces_directory+"/testing_with_face_landmarks.xml");
93 
94         // Now make the object responsible for training the model.
95         shape_predictor_trainer trainer;
96         // This algorithm has a bunch of parameters you can mess with.  The
97         // documentation for the shape_predictor_trainer explains all of them.
98         // You should also read Kazemi's paper which explains all the parameters
99         // in great detail.  However, here I'm just setting three of them
100         // differently than their default values.  I'm doing this because we
101         // have a very small dataset.  In particular, setting the oversampling
102         // to a high amount (300) effectively boosts the training set size, so
103         // that helps this example.
104         trainer.set_oversampling_amount(300);
105         // I'm also reducing the capacity of the model by explicitly increasing
106         // the regularization (making nu smaller) and by using trees with
107         // smaller depths.
108         trainer.set_nu(0.05);
109         trainer.set_tree_depth(2);
110 
111         // some parts of training process can be parallelized.
112         // Trainer will use this count of threads when possible
113         trainer.set_num_threads(2);
114 
115         // Tell the trainer to print status messages to the console so we can
116         // see how long the training will take.
117         trainer.be_verbose();
118 
119         // Now finally generate the shape model
120         shape_predictor sp = trainer.train(images_train, faces_train);
121 
122 
123         // Now that we have a model we can test it.  This function measures the
124         // average distance between a face landmark output by the
125         // shape_predictor and where it should be according to the truth data.
126         // Note that there is an optional 4th argument that lets us rescale the
127         // distances.  Here we are causing the output to scale each face's
128         // distances by the interocular distance, as is customary when
129         // evaluating face landmarking systems.
130         cout << "mean training error: "<<
131             test_shape_predictor(sp, images_train, faces_train, get_interocular_distances(faces_train)) << endl;
132 
133         // The real test is to see how well it does on data it wasn't trained
134         // on.  We trained it on a very small dataset so the accuracy is not
135         // extremely high, but it's still doing quite good.  Moreover, if you
136         // train it on one of the large face landmarking datasets you will
137         // obtain state-of-the-art results, as shown in the Kazemi paper.
138         cout << "mean testing error:  "<<
139             test_shape_predictor(sp, images_test, faces_test, get_interocular_distances(faces_test)) << endl;
140 
141         // Finally, we save the model to disk so we can use it later.
142         serialize("sp.dat") << sp;
143     }
144     catch (exception& e)
145     {
146         cout << "\nexception thrown!" << endl;
147         cout << e.what() << endl;
148     }
149 }
150 
151 // ----------------------------------------------------------------------------------------
152 
interocular_distance(const full_object_detection & det)153 double interocular_distance (
154     const full_object_detection& det
155 )
156 {
157     dlib::vector<double,2> l, r;
158     double cnt = 0;
159     // Find the center of the left eye by averaging the points around
160     // the eye.
161     for (unsigned long i = 36; i <= 41; ++i)
162     {
163         l += det.part(i);
164         ++cnt;
165     }
166     l /= cnt;
167 
168     // Find the center of the right eye by averaging the points around
169     // the eye.
170     cnt = 0;
171     for (unsigned long i = 42; i <= 47; ++i)
172     {
173         r += det.part(i);
174         ++cnt;
175     }
176     r /= cnt;
177 
178     // Now return the distance between the centers of the eyes
179     return length(l-r);
180 }
181 
get_interocular_distances(const std::vector<std::vector<full_object_detection>> & objects)182 std::vector<std::vector<double> > get_interocular_distances (
183     const std::vector<std::vector<full_object_detection> >& objects
184 )
185 {
186     std::vector<std::vector<double> > temp(objects.size());
187     for (unsigned long i = 0; i < objects.size(); ++i)
188     {
189         for (unsigned long j = 0; j < objects[i].size(); ++j)
190         {
191             temp[i].push_back(interocular_distance(objects[i][j]));
192         }
193     }
194     return temp;
195 }
196 
197 // ----------------------------------------------------------------------------------------
198 
199