1 // The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
2 /*
3     This is an example illustrating the use of the multiclass classification tools
4     from the dlib C++ Library.  Specifically, this example will make points from
5     three classes and show you how to train a multiclass classifier to recognize
6     these three classes.
7 
8     The classes are as follows:
9         - class 1: points very close to the origin
10         - class 2: points on the circle of radius 10 around the origin
11         - class 3: points that are on a circle of radius 4 but not around the origin at all
12 */
13 
14 #include <dlib/svm_threaded.h>
15 
16 #include <iostream>
17 #include <vector>
18 
19 #include <dlib/rand.h>
20 
21 using namespace std;
22 using namespace dlib;
23 
24 // Our data will be 2-dimensional data. So declare an appropriate type to contain these points.
25 typedef matrix<double,2,1> sample_type;
26 
27 // ----------------------------------------------------------------------------------------
28 
29 void generate_data (
30     std::vector<sample_type>& samples,
31     std::vector<double>& labels
32 );
33 /*!
34     ensures
35         - make some 3 class data as described above.
36         - Create 60 points from class 1
37         - Create 70 points from class 2
38         - Create 80 points from class 3
39 !*/
40 
41 // ----------------------------------------------------------------------------------------
42 
main()43 int main()
44 {
45     try
46     {
47         std::vector<sample_type> samples;
48         std::vector<double> labels;
49 
50         // First, get our labeled set of training data
51         generate_data(samples, labels);
52 
53         cout << "samples.size(): "<< samples.size() << endl;
54 
55         // The main object in this example program is the one_vs_one_trainer.  It is essentially
56         // a container class for regular binary classifier trainer objects.  In particular, it
57         // uses the any_trainer object to store any kind of trainer object that implements a
58         // .train(samples,labels) function which returns some kind of learned decision function.
59         // It uses these binary classifiers to construct a voting multiclass classifier.  If
60         // there are N classes then it trains N*(N-1)/2 binary classifiers, one for each pair of
61         // labels, which then vote on the label of a sample.
62         //
63         // In this example program we will work with a one_vs_one_trainer object which stores any
64         // kind of trainer that uses our sample_type samples.
65         typedef one_vs_one_trainer<any_trainer<sample_type> > ovo_trainer;
66 
67 
68         // Finally, make the one_vs_one_trainer.
69         ovo_trainer trainer;
70 
71 
72         // Next, we will make two different binary classification trainer objects.  One
73         // which uses kernel ridge regression and RBF kernels and another which uses a
74         // support vector machine and polynomial kernels.  The particular details don't matter.
75         // The point of this part of the example is that you can use any kind of trainer object
76         // with the one_vs_one_trainer.
77         typedef polynomial_kernel<sample_type> poly_kernel;
78         typedef radial_basis_kernel<sample_type> rbf_kernel;
79 
80         // make the binary trainers and set some parameters
81         krr_trainer<rbf_kernel> rbf_trainer;
82         svm_nu_trainer<poly_kernel> poly_trainer;
83         poly_trainer.set_kernel(poly_kernel(0.1, 1, 2));
84         rbf_trainer.set_kernel(rbf_kernel(0.1));
85 
86 
87         // Now tell the one_vs_one_trainer that, by default, it should use the rbf_trainer
88         // to solve the individual binary classification subproblems.
89         trainer.set_trainer(rbf_trainer);
90         // We can also get more specific.  Here we tell the one_vs_one_trainer to use the
91         // poly_trainer to solve the class 1 vs class 2 subproblem.  All the others will
92         // still be solved with the rbf_trainer.
93         trainer.set_trainer(poly_trainer, 1, 2);
94 
95         // Now let's do 5-fold cross-validation using the one_vs_one_trainer we just setup.
96         // As an aside, always shuffle the order of the samples before doing cross validation.
97         // For a discussion of why this is a good idea see the svm_ex.cpp example.
98         randomize_samples(samples, labels);
99         cout << "cross validation: \n" << cross_validate_multiclass_trainer(trainer, samples, labels, 5) << endl;
100         // The output is shown below.  It is the confusion matrix which describes the results.  Each row
101         // corresponds to a class of data and each column to a prediction.  Reading from top to bottom,
102         // the rows correspond to the class labels if the labels have been listed in sorted order.  So the
103         // top row corresponds to class 1, the middle row to class 2, and the bottom row to class 3.  The
104         // columns are organized similarly, with the left most column showing how many samples were predicted
105         // as members of class 1.
106         //
107         // So in the results below we can see that, for the class 1 samples, 60 of them were correctly predicted
108         // to be members of class 1 and 0 were incorrectly classified.  Similarly, the other two classes of data
109         // are perfectly classified.
110         /*
111             cross validation:
112             60  0  0
113             0 70  0
114             0  0 80
115         */
116 
117         // Next, if you wanted to obtain the decision rule learned by a one_vs_one_trainer you
118         // would store it into a one_vs_one_decision_function.
119         one_vs_one_decision_function<ovo_trainer> df = trainer.train(samples, labels);
120 
121         cout << "predicted label: "<< df(samples[0])  << ", true label: "<< labels[0] << endl;
122         cout << "predicted label: "<< df(samples[90]) << ", true label: "<< labels[90] << endl;
123         // The output is:
124         /*
125             predicted label: 2, true label: 2
126             predicted label: 1, true label: 1
127         */
128 
129 
130         // If you want to save a one_vs_one_decision_function to disk, you can do
131         // so.  However, you must declare what kind of decision functions it contains.
132         one_vs_one_decision_function<ovo_trainer,
133         decision_function<poly_kernel>,  // This is the output of the poly_trainer
134         decision_function<rbf_kernel>    // This is the output of the rbf_trainer
135         > df2, df3;
136 
137 
138         // Put df into df2 and then save df2 to disk.  Note that we could have also said
139         // df2 = trainer.train(samples, labels);  But doing it this way avoids retraining.
140         df2 = df;
141         serialize("df.dat") << df2;
142 
143         // load the function back in from disk and store it in df3.
144         deserialize("df.dat") >> df3;
145 
146 
147         // Test df3 to see that this worked.
148         cout << endl;
149         cout << "predicted label: "<< df3(samples[0])  << ", true label: "<< labels[0] << endl;
150         cout << "predicted label: "<< df3(samples[90]) << ", true label: "<< labels[90] << endl;
151         // Test df3 on the samples and labels and print the confusion matrix.
152         cout << "test deserialized function: \n" << test_multiclass_decision_function(df3, samples, labels) << endl;
153 
154 
155 
156 
157 
158         // Finally, if you want to get the binary classifiers from inside a multiclass decision
159         // function you can do it by calling get_binary_decision_functions() like so:
160         one_vs_one_decision_function<ovo_trainer>::binary_function_table functs;
161         functs = df.get_binary_decision_functions();
162         cout << "number of binary decision functions in df: " << functs.size() << endl;
163         // The functs object is a std::map which maps pairs of labels to binary decision
164         // functions.  So we can access the individual decision functions like so:
165         decision_function<poly_kernel> df_1_2 = any_cast<decision_function<poly_kernel> >(functs[make_unordered_pair(1,2)]);
166         decision_function<rbf_kernel>  df_1_3 = any_cast<decision_function<rbf_kernel>  >(functs[make_unordered_pair(1,3)]);
167         // df_1_2 contains the binary decision function that votes for class 1 vs. 2.
168         // Similarly, df_1_3 contains the classifier that votes for 1 vs. 3.
169 
170         // Note that the multiclass decision function doesn't know what kind of binary
171         // decision functions it contains.  So we have to use any_cast to explicitly cast
172         // them back into the concrete type.  If you make a mistake and try to any_cast a
173         // binary decision function into the wrong type of function any_cast will throw a
174         // bad_any_cast exception.
175     }
176     catch (std::exception& e)
177     {
178         cout << "exception thrown!" << endl;
179         cout << e.what() << endl;
180     }
181 }
182 
183 // ----------------------------------------------------------------------------------------
184 
generate_data(std::vector<sample_type> & samples,std::vector<double> & labels)185 void generate_data (
186     std::vector<sample_type>& samples,
187     std::vector<double>& labels
188 )
189 {
190     const long num = 50;
191 
192     sample_type m;
193 
194     dlib::rand rnd;
195 
196 
197     // make some samples near the origin
198     double radius = 0.5;
199     for (long i = 0; i < num+10; ++i)
200     {
201         double sign = 1;
202         if (rnd.get_random_double() < 0.5)
203             sign = -1;
204         m(0) = 2*radius*rnd.get_random_double()-radius;
205         m(1) = sign*sqrt(radius*radius - m(0)*m(0));
206 
207         // add this sample to our set of training samples
208         samples.push_back(m);
209         labels.push_back(1);
210     }
211 
212     // make some samples in a circle around the origin but far away
213     radius = 10.0;
214     for (long i = 0; i < num+20; ++i)
215     {
216         double sign = 1;
217         if (rnd.get_random_double() < 0.5)
218             sign = -1;
219         m(0) = 2*radius*rnd.get_random_double()-radius;
220         m(1) = sign*sqrt(radius*radius - m(0)*m(0));
221 
222         // add this sample to our set of training samples
223         samples.push_back(m);
224         labels.push_back(2);
225     }
226 
227     // make some samples in a circle around the point (25,25)
228     radius = 4.0;
229     for (long i = 0; i < num+30; ++i)
230     {
231         double sign = 1;
232         if (rnd.get_random_double() < 0.5)
233             sign = -1;
234         m(0) = 2*radius*rnd.get_random_double()-radius;
235         m(1) = sign*sqrt(radius*radius - m(0)*m(0));
236 
237         // translate this point away from the origin
238         m(0) += 25;
239         m(1) += 25;
240 
241         // add this sample to our set of training samples
242         samples.push_back(m);
243         labels.push_back(3);
244     }
245 }
246 
247 // ----------------------------------------------------------------------------------------
248 
249