1 // Copyright (C) 2006  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 
4 
5 #include <dlib/matrix.h>
6 #include <sstream>
7 #include <string>
8 #include <cstdlib>
9 #include <ctime>
10 #include <vector>
11 #include "../stl_checked.h"
12 #include "../array.h"
13 #include "../rand.h"
14 #include "checkerboard.h"
15 #include <dlib/statistics.h>
16 
17 #include "tester.h"
18 #include <dlib/svm_threaded.h>
19 
20 
21 namespace
22 {
23 
24     using namespace test;
25     using namespace dlib;
26     using namespace std;
27 
28     logger dlog("test.svm");
29 
30 // ----------------------------------------------------------------------------------------
31 
test_clutering()32     void test_clutering (
33     )
34     {
35         dlog << LINFO << "   being test_clutering()";
36         // Here we declare that our samples will be 2 dimensional column vectors.
37         typedef matrix<double,2,1> sample_type;
38 
39         // Now we are making a typedef for the kind of kernel we want to use.  I picked the
40         // radial basis kernel because it only has one parameter and generally gives good
41         // results without much fiddling.
42         typedef radial_basis_kernel<sample_type> kernel_type;
43 
44         // Here we declare an instance of the kcentroid object.  The first argument to the constructor
45         // is the kernel we wish to use.  The second is a parameter that determines the numerical
46         // accuracy with which the object will perform part of the learning algorithm.  Generally
47         // smaller values give better results but cause the algorithm to run slower.  You just have
48         // to play with it to decide what balance of speed and accuracy is right for your problem.
49         // Here we have set it to 0.01.
50         kcentroid<kernel_type> kc(kernel_type(0.1),0.01);
51 
52         // Now we make an instance of the kkmeans object and tell it to use kcentroid objects
53         // that are configured with the parameters from the kc object we defined above.
54         kkmeans<kernel_type> test(kc);
55 
56         std::vector<sample_type> samples;
57         std::vector<sample_type> initial_centers;
58 
59         sample_type m;
60 
61         dlib::rand rnd;
62 
63         print_spinner();
64         // we will make 50 points from each class
65         const long num = 50;
66 
67         // make some samples near the origin
68         double radius = 0.5;
69         for (long i = 0; i < num; ++i)
70         {
71             double sign = 1;
72             if (rnd.get_random_double() < 0.5)
73                 sign = -1;
74             m(0) = 2*radius*rnd.get_random_double()-radius;
75             m(1) = sign*sqrt(radius*radius - m(0)*m(0));
76 
77             // add this sample to our set of samples we will run k-means
78             samples.push_back(m);
79         }
80 
81         // make some samples in a circle around the origin but far away
82         radius = 10.0;
83         for (long i = 0; i < num; ++i)
84         {
85             double sign = 1;
86             if (rnd.get_random_double() < 0.5)
87                 sign = -1;
88             m(0) = 2*radius*rnd.get_random_double()-radius;
89             m(1) = sign*sqrt(radius*radius - m(0)*m(0));
90 
91             // add this sample to our set of samples we will run k-means
92             samples.push_back(m);
93         }
94 
95         // make some samples in a circle around the point (25,25)
96         radius = 4.0;
97         for (long i = 0; i < num; ++i)
98         {
99             double sign = 1;
100             if (rnd.get_random_double() < 0.5)
101                 sign = -1;
102             m(0) = 2*radius*rnd.get_random_double()-radius;
103             m(1) = sign*sqrt(radius*radius - m(0)*m(0));
104 
105             // translate this point away from the origin
106             m(0) += 25;
107             m(1) += 25;
108 
109             // add this sample to our set of samples we will run k-means
110             samples.push_back(m);
111         }
112         print_spinner();
113 
114         // tell the kkmeans object we made that we want to run k-means with k set to 3.
115         // (i.e. we want 3 clusters)
116         test.set_number_of_centers(3);
117 
118         // You need to pick some initial centers for the k-means algorithm.  So here
119         // we will use the dlib::pick_initial_centers() function which tries to find
120         // n points that are far apart (basically).
121         pick_initial_centers(3, initial_centers, samples, test.get_kernel());
122 
123         print_spinner();
124         // now run the k-means algorithm on our set of samples.
125         test.train(samples,initial_centers);
126         print_spinner();
127 
128         const unsigned long class1 = test(samples[0]);
129         const unsigned long class2 = test(samples[num]);
130         const unsigned long class3 = test(samples[2*num]);
131         // now loop over all our samples and print out their predicted class.  In this example
132         // all points are correctly identified.
133         for (unsigned long i = 0; i < samples.size()/3; ++i)
134         {
135             DLIB_TEST(test(samples[i]) == class1);
136             DLIB_TEST(test(samples[i+num]) == class2);
137             DLIB_TEST(test(samples[i+2*num]) == class3);
138         }
139 
140         dlog << LINFO << "   end test_clutering()";
141     }
142 
143 // ----------------------------------------------------------------------------------------
144 
145     // Here is the sinc function we will be trying to learn with the krls
146     // object.
sinc(double x)147     double sinc(double x)
148     {
149         if (x == 0)
150             return 1;
151         return sin(x)/x;
152     }
153 
154 
test_regression()155     void test_regression (
156     )
157     {
158         dlog << LINFO << "   being test_regression()";
159         // Here we declare that our samples will be 1 dimensional column vectors.  The reason for
160         // using a matrix here is that in general you can use N dimensional vectors as inputs to the
161         // krls object.  But here we only have 1 dimension to make the example simple.
162         typedef matrix<double,1,1> sample_type;
163 
164         // Now we are making a typedef for the kind of kernel we want to use.  I picked the
165         // radial basis kernel because it only has one parameter and generally gives good
166         // results without much fiddling.
167         typedef radial_basis_kernel<sample_type> kernel_type;
168 
169         // Here we declare an instance of the krls object.  The first argument to the constructor
170         // is the kernel we wish to use.  The second is a parameter that determines the numerical
171         // accuracy with which the object will perform part of the regression algorithm.  Generally
172         // smaller values give better results but cause the algorithm to run slower.  You just have
173         // to play with it to decide what balance of speed and accuracy is right for your problem.
174         // Here we have set it to 0.001.
175         krls<kernel_type> test(kernel_type(0.1),0.001);
176         rvm_regression_trainer<kernel_type> rvm_test;
177         rvm_test.set_kernel(test.get_kernel());
178 
179         krr_trainer<kernel_type> krr_test;
180         krr_test.set_kernel(test.get_kernel());
181 
182         svr_trainer<kernel_type> svr_test;
183         svr_test.set_kernel(test.get_kernel());
184         svr_test.set_epsilon_insensitivity(0.0001);
185         svr_test.set_c(10);
186 
187         rbf_network_trainer<kernel_type> rbf_test;
188         rbf_test.set_kernel(test.get_kernel());
189         rbf_test.set_num_centers(13);
190 
191         print_spinner();
192         std::vector<sample_type> samples;
193         std::vector<sample_type> samples2;
194         std::vector<double> labels;
195         std::vector<double> labels2;
196         // now we train our object on a few samples of the sinc function.
197         sample_type m;
198         for (double x = -10; x <= 5; x += 0.6)
199         {
200             m(0) = x;
201             test.train(m, sinc(x));
202 
203             samples.push_back(m);
204             samples2.push_back(m);
205             labels.push_back(sinc(x));
206             labels2.push_back(2);
207         }
208 
209         print_spinner();
210         decision_function<kernel_type> test2 = rvm_test.train(samples, labels);
211         print_spinner();
212         decision_function<kernel_type> test3 = rbf_test.train(samples, labels);
213         print_spinner();
214         decision_function<kernel_type> test4 = krr_test.train(samples, labels);
215         print_spinner();
216         decision_function<kernel_type> test5 = svr_test.train(samples, labels);
217         print_spinner();
218 
219         // now we output the value of the sinc function for a few test points as well as the
220         // value predicted by krls object.
221         m(0) = 2.5; dlog << LDEBUG << "krls: " << sinc(m(0)) << "   " << test(m); DLIB_TEST(abs(sinc(m(0)) - test(m)) < 0.01);
222         m(0) = 0.1; dlog << LDEBUG << "krls: " << sinc(m(0)) << "   " << test(m); DLIB_TEST(abs(sinc(m(0)) - test(m)) < 0.01);
223         m(0) = -4;  dlog << LDEBUG << "krls: " << sinc(m(0)) << "   " << test(m); DLIB_TEST(abs(sinc(m(0)) - test(m)) < 0.01);
224         m(0) = 5.0; dlog << LDEBUG << "krls: " << sinc(m(0)) << "   " << test(m); DLIB_TEST(abs(sinc(m(0)) - test(m)) < 0.01);
225 
226         m(0) = 2.5; dlog << LDEBUG << "rvm: " << sinc(m(0)) << "   " << test2(m); DLIB_TEST(abs(sinc(m(0)) - test2(m)) < 0.01);
227         m(0) = 0.1; dlog << LDEBUG << "rvm: " << sinc(m(0)) << "   " << test2(m); DLIB_TEST(abs(sinc(m(0)) - test2(m)) < 0.01);
228         m(0) = -4;  dlog << LDEBUG << "rvm: " << sinc(m(0)) << "   " << test2(m); DLIB_TEST(abs(sinc(m(0)) - test2(m)) < 0.01);
229         m(0) = 5.0; dlog << LDEBUG << "rvm: " << sinc(m(0)) << "   " << test2(m); DLIB_TEST(abs(sinc(m(0)) - test2(m)) < 0.01);
230 
231         m(0) = 2.5; dlog << LDEBUG << "rbf: " << sinc(m(0)) << "   " << test3(m); DLIB_TEST(abs(sinc(m(0)) - test3(m)) < 0.01);
232         m(0) = 0.1; dlog << LDEBUG << "rbf: " << sinc(m(0)) << "   " << test3(m); DLIB_TEST(abs(sinc(m(0)) - test3(m)) < 0.01);
233         m(0) = -4;  dlog << LDEBUG << "rbf: " << sinc(m(0)) << "   " << test3(m); DLIB_TEST(abs(sinc(m(0)) - test3(m)) < 0.01);
234         m(0) = 5.0; dlog << LDEBUG << "rbf: " << sinc(m(0)) << "   " << test3(m); DLIB_TEST(abs(sinc(m(0)) - test3(m)) < 0.01);
235 
236         m(0) = 2.5; dlog << LDEBUG << "krr: " << sinc(m(0)) << "   " << test4(m); DLIB_TEST(abs(sinc(m(0)) - test4(m)) < 0.01);
237         m(0) = 0.1; dlog << LDEBUG << "krr: " << sinc(m(0)) << "   " << test4(m); DLIB_TEST(abs(sinc(m(0)) - test4(m)) < 0.01);
238         m(0) = -4;  dlog << LDEBUG << "krr: " << sinc(m(0)) << "   " << test4(m); DLIB_TEST(abs(sinc(m(0)) - test4(m)) < 0.01);
239         m(0) = 5.0; dlog << LDEBUG << "krr: " << sinc(m(0)) << "   " << test4(m); DLIB_TEST(abs(sinc(m(0)) - test4(m)) < 0.01);
240 
241         m(0) = 2.5; dlog << LDEBUG << "svr: " << sinc(m(0)) << "   " << test5(m); DLIB_TEST(abs(sinc(m(0)) - test5(m)) < 0.01);
242         m(0) = 0.1; dlog << LDEBUG << "svr: " << sinc(m(0)) << "   " << test5(m); DLIB_TEST(abs(sinc(m(0)) - test5(m)) < 0.01);
243         m(0) = -4;  dlog << LDEBUG << "svr: " << sinc(m(0)) << "   " << test5(m); DLIB_TEST(abs(sinc(m(0)) - test5(m)) < 0.01);
244         m(0) = 5.0; dlog << LDEBUG << "svr: " << sinc(m(0)) << "   " << test5(m); DLIB_TEST(abs(sinc(m(0)) - test5(m)) < 0.01);
245 
246 
247         randomize_samples(samples, labels);
248         dlog << LINFO << "KRR MSE and R-squared: "<< cross_validate_regression_trainer(krr_test, samples, labels, 6);
249         dlog << LINFO << "SVR MSE and R-squared: "<< cross_validate_regression_trainer(svr_test, samples, labels, 6);
250         matrix<double,1,4> cv = cross_validate_regression_trainer(krr_test, samples, labels, 6);
251         DLIB_TEST(cv(0) < 1e-4);
252         DLIB_TEST(cv(1) > 0.99);
253         cv = cross_validate_regression_trainer(svr_test, samples, labels, 6);
254         DLIB_TEST(cv(0) < 1e-4);
255         DLIB_TEST(cv(1) > 0.99);
256 
257 
258 
259 
260         randomize_samples(samples2, labels2);
261         dlog << LINFO << "KRR MSE and R-squared: "<< cross_validate_regression_trainer(krr_test, samples2, labels2, 6);
262         dlog << LINFO << "SVR MSE and R-squared: "<< cross_validate_regression_trainer(svr_test, samples2, labels2, 6);
263         cv = cross_validate_regression_trainer(krr_test, samples2, labels2, 6);
264         DLIB_TEST(cv(0) < 1e-4);
265         cv = cross_validate_regression_trainer(svr_test, samples2, labels2, 6);
266         DLIB_TEST(cv(0) < 1e-4);
267 
268         dlog << LINFO << "   end test_regression()";
269     }
270 
271 // ----------------------------------------------------------------------------------------
272 
test_anomaly_detection()273     void test_anomaly_detection (
274     )
275     {
276         dlog << LINFO << "   begin test_anomaly_detection()";
277         // Here we declare that our samples will be 2 dimensional column vectors.
278         typedef matrix<double,2,1> sample_type;
279 
280         // Now we are making a typedef for the kind of kernel we want to use.  I picked the
281         // radial basis kernel because it only has one parameter and generally gives good
282         // results without much fiddling.
283         typedef radial_basis_kernel<sample_type> kernel_type;
284 
285         // Here we declare an instance of the kcentroid object.  The first argument to the constructor
286         // is the kernel we wish to use.  The second is a parameter that determines the numerical
287         // accuracy with which the object will perform part of the learning algorithm.  Generally
288         // smaller values give better results but cause the algorithm to run slower.  You just have
289         // to play with it to decide what balance of speed and accuracy is right for your problem.
290         // Here we have set it to 0.01.
291         kcentroid<kernel_type> test(kernel_type(0.1),0.01);
292 
293 
294         svm_one_class_trainer<kernel_type> one_class_trainer;
295         one_class_trainer.set_nu(0.4);
296         one_class_trainer.set_kernel(kernel_type(0.2));
297 
298         std::vector<sample_type> samples;
299 
300         // now we train our object on a few samples of the sinc function.
301         sample_type m;
302         for (double x = -15; x <= 8; x += 1)
303         {
304             m(0) = x;
305             m(1) = sinc(x);
306             test.train(m);
307             samples.push_back(m);
308         }
309 
310         decision_function<kernel_type> df = one_class_trainer.train(samples);
311 
312         running_stats<double> rs;
313 
314         // Now lets output the distance from the centroid to some points that are from the sinc function.
315         // These numbers should all be similar.  We will also calculate the statistics of these numbers
316         // by accumulating them into the running_stats object called rs.  This will let us easily
317         // find the mean and standard deviation of the distances for use below.
318         dlog << LDEBUG << "Points that are on the sinc function:\n";
319         m(0) = -1.5; m(1) = sinc(m(0)); dlog << LDEBUG << "   " << test(m);  rs.add(test(m));
320         m(0) = -1.5; m(1) = sinc(m(0)); dlog << LDEBUG << "   " << test(m);  rs.add(test(m));
321         m(0) = -0;   m(1) = sinc(m(0)); dlog << LDEBUG << "   " << test(m);  rs.add(test(m));
322         m(0) = -0.5; m(1) = sinc(m(0)); dlog << LDEBUG << "   " << test(m);  rs.add(test(m));
323         m(0) = -4.1; m(1) = sinc(m(0)); dlog << LDEBUG << "   " << test(m);  rs.add(test(m));
324         m(0) = -1.5; m(1) = sinc(m(0)); dlog << LDEBUG << "   " << test(m);  rs.add(test(m));
325         m(0) = -0.5; m(1) = sinc(m(0)); dlog << LDEBUG << "   " << test(m);  rs.add(test(m));
326 
327         m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m)));
328         m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m)));
329         m(0) = -0;   m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m)));
330         m(0) = -0.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m)));
331         m(0) = -4.1; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m)));
332         m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m)));
333         m(0) = -0.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m)));
334 
335         const double thresh = 0.01;
336         m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m));
337         m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m));
338         m(0) = -0;   m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m));
339         m(0) = -0.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m));
340         m(0) = -4.1; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m));
341         m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m));
342         m(0) = -0.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m));
343 
344         dlog << LDEBUG;
345         // Lets output the distance from the centroid to some points that are NOT from the sinc function.
346         // These numbers should all be significantly bigger than previous set of numbers.  We will also
347         // use the rs.scale() function to find out how many standard deviations they are away from the
348         // mean of the test points from the sinc function.  So in this case our criterion for "significantly bigger"
349         // is > 3 or 4 standard deviations away from the above points that actually are on the sinc function.
350         dlog << LDEBUG << "Points that are NOT on the sinc function:\n";
351         m(0) = -1.5; m(1) = sinc(m(0))+4;
352         dlog << LDEBUG << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc.";
353         DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m)));
354         DLIB_TEST_MSG(df(m) + thresh < 0, df(m));
355 
356         m(0) = -1.5; m(1) = sinc(m(0))+3;
357         dlog << LDEBUG << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc.";
358         DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m)));
359         DLIB_TEST_MSG(df(m) + thresh < 0, df(m));
360 
361         m(0) = -0;   m(1) = -sinc(m(0));
362         dlog << LDEBUG << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc.";
363         DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m)));
364         DLIB_TEST_MSG(df(m) + thresh < 0, df(m));
365 
366         m(0) = -0.5; m(1) = -sinc(m(0));
367         dlog << LDEBUG << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc.";
368         DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m)));
369         DLIB_TEST_MSG(df(m) + thresh < 0, df(m));
370 
371         m(0) = -4.1; m(1) = sinc(m(0))+2;
372         dlog << LDEBUG << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc.";
373         DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m)));
374         DLIB_TEST_MSG(df(m) + thresh < 0, df(m));
375 
376         m(0) = -1.5; m(1) = sinc(m(0))+0.9;
377         dlog << LDEBUG << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc.";
378         DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m)));
379         DLIB_TEST_MSG(df(m) + thresh < 0, df(m));
380 
381         m(0) = -0.5; m(1) = sinc(m(0))+1;
382         dlog << LDEBUG << "   " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc.";
383         DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m)));
384         DLIB_TEST_MSG(df(m) + thresh < 0, df(m));
385 
386         dlog << LINFO << "   end test_anomaly_detection()";
387     }
388 
389 // ----------------------------------------------------------------------------------------
390 
unittest_binary_classification()391     void unittest_binary_classification (
392     )
393     /*!
394         ensures
395             - runs tests on the svm stuff compliance with the specs
396     !*/
397     {
398         dlog << LINFO << "   begin unittest_binary_classification()";
399         print_spinner();
400 
401 
402         typedef double scalar_type;
403         typedef matrix<scalar_type,2,1> sample_type;
404 
405         std::vector<sample_type> x;
406         std::vector<matrix<double,0,1> > x_linearized;
407         std::vector<scalar_type> y;
408 
409         get_checkerboard_problem(x,y, 300, 2);
410         const scalar_type gamma = 1;
411 
412         typedef radial_basis_kernel<sample_type> kernel_type;
413 
414         rbf_network_trainer<kernel_type> rbf_trainer;
415         rbf_trainer.set_kernel(kernel_type(gamma));
416         rbf_trainer.set_num_centers(100);
417 
418         rvm_trainer<kernel_type> rvm_trainer;
419         rvm_trainer.set_kernel(kernel_type(gamma));
420 
421         krr_trainer<kernel_type> krr_trainer;
422         krr_trainer.use_classification_loss_for_loo_cv();
423         krr_trainer.set_kernel(kernel_type(gamma));
424 
425         svm_pegasos<kernel_type> pegasos_trainer;
426         pegasos_trainer.set_kernel(kernel_type(gamma));
427         pegasos_trainer.set_lambda(0.00001);
428 
429 
430         svm_c_ekm_trainer<kernel_type> ocas_ekm_trainer;
431         ocas_ekm_trainer.set_kernel(kernel_type(gamma));
432         ocas_ekm_trainer.set_c(100000);
433 
434         svm_nu_trainer<kernel_type> trainer;
435         trainer.set_kernel(kernel_type(gamma));
436         trainer.set_nu(0.05);
437 
438         svm_c_trainer<kernel_type> c_trainer;
439         c_trainer.set_kernel(kernel_type(gamma));
440         c_trainer.set_c(100);
441 
442         svm_c_linear_trainer<linear_kernel<matrix<double,0,1> > > lin_trainer;
443         lin_trainer.set_c(100000);
444         // use an ekm to linearize this dataset so we can use it with the lin_trainer
445         empirical_kernel_map<kernel_type> ekm;
446         ekm.load(kernel_type(gamma), x);
447         for (unsigned long i = 0; i < x.size(); ++i)
448             x_linearized.push_back(ekm.project(x[i]));
449 
450 
451         print_spinner();
452         matrix<scalar_type> rvm_cv = cross_validate_trainer_threaded(rvm_trainer, x,y, 4, 2);
453         print_spinner();
454         matrix<scalar_type> krr_cv = cross_validate_trainer_threaded(krr_trainer, x,y, 4, 2);
455         print_spinner();
456         matrix<scalar_type> svm_cv = cross_validate_trainer(trainer, x,y, 4);
457         print_spinner();
458         matrix<scalar_type> svm_c_cv = cross_validate_trainer(c_trainer, x,y, 4);
459         print_spinner();
460         matrix<scalar_type> rbf_cv = cross_validate_trainer_threaded(rbf_trainer, x,y, 10, 2);
461         print_spinner();
462         matrix<scalar_type> lin_cv = cross_validate_trainer_threaded(lin_trainer, x_linearized, y, 4, 2);
463         print_spinner();
464         matrix<scalar_type> ocas_ekm_cv = cross_validate_trainer_threaded(ocas_ekm_trainer, x, y, 4, 2);
465         print_spinner();
466         ocas_ekm_trainer.set_basis(randomly_subsample(x, 300));
467         matrix<scalar_type> ocas_ekm_cv2 = cross_validate_trainer_threaded(ocas_ekm_trainer, x, y, 4, 2);
468         print_spinner();
469         matrix<scalar_type> peg_cv = cross_validate_trainer_threaded(batch(pegasos_trainer,1.0), x,y, 4, 2);
470         print_spinner();
471         matrix<scalar_type> peg_c_cv = cross_validate_trainer_threaded(batch_cached(pegasos_trainer,1.0), x,y, 4, 2);
472         print_spinner();
473 
474         dlog << LDEBUG << "rvm cv:        " << rvm_cv;
475         dlog << LDEBUG << "krr cv:        " << krr_cv;
476         dlog << LDEBUG << "nu-svm cv:     " << svm_cv;
477         dlog << LDEBUG << "C-svm cv:      " << svm_c_cv;
478         dlog << LDEBUG << "rbf cv:        " << rbf_cv;
479         dlog << LDEBUG << "lin cv:        " << lin_cv;
480         dlog << LDEBUG << "ocas_ekm cv:   " << ocas_ekm_cv;
481         dlog << LDEBUG << "ocas_ekm cv2:  " << ocas_ekm_cv2;
482         dlog << LDEBUG << "peg cv:        " << peg_cv;
483         dlog << LDEBUG << "peg cached cv: " << peg_c_cv;
484 
485         // make sure the cached version of pegasos computes the same result
486         DLIB_TEST_MSG(sum(abs(peg_cv - peg_c_cv)) < std::sqrt(std::numeric_limits<double>::epsilon()),
487                       sum(abs(peg_cv - peg_c_cv)) << "   \n" << peg_cv << peg_c_cv  );
488 
489         DLIB_TEST_MSG(mean(rvm_cv) > 0.9, rvm_cv);
490         DLIB_TEST_MSG(mean(krr_cv) > 0.9, krr_cv);
491         DLIB_TEST_MSG(mean(svm_cv) > 0.9, svm_cv);
492         DLIB_TEST_MSG(mean(svm_c_cv) > 0.9, svm_c_cv);
493         DLIB_TEST_MSG(mean(rbf_cv) > 0.9, rbf_cv);
494         DLIB_TEST_MSG(mean(lin_cv) > 0.9, lin_cv);
495         DLIB_TEST_MSG(mean(peg_cv) > 0.9, peg_cv);
496         DLIB_TEST_MSG(mean(peg_c_cv) > 0.9, peg_c_cv);
497         DLIB_TEST_MSG(mean(ocas_ekm_cv) > 0.9, ocas_ekm_cv);
498         DLIB_TEST_MSG(mean(ocas_ekm_cv2) > 0.9, ocas_ekm_cv2);
499 
500         const long num_sv = trainer.train(x,y).basis_vectors.size();
501         print_spinner();
502         const long num_rv = rvm_trainer.train(x,y).basis_vectors.size();
503         print_spinner();
504         dlog << LDEBUG << "num sv: " << num_sv;
505         dlog << LDEBUG << "num rv: " << num_rv;
506         print_spinner();
507         ocas_ekm_trainer.clear_basis();
508         const long num_bv = ocas_ekm_trainer.train(x,y).basis_vectors.size();
509         dlog << LDEBUG << "num ekm bv: " << num_bv;
510 
511         DLIB_TEST(num_rv <= 17);
512         DLIB_TEST_MSG(num_sv <= 45, num_sv);
513         DLIB_TEST_MSG(num_bv <= 45, num_bv);
514 
515         decision_function<kernel_type> df = reduced2(trainer, 19).train(x,y);
516         print_spinner();
517 
518         matrix<scalar_type> svm_reduced_error = test_binary_decision_function(df, x, y);
519         print_spinner();
520         dlog << LDEBUG << "svm reduced test error: " << svm_reduced_error;
521         dlog << LDEBUG << "svm reduced num sv: " << df.basis_vectors.size();
522         DLIB_TEST(mean(svm_reduced_error) > 0.9);
523 
524         svm_cv = cross_validate_trainer(reduced(trainer,30), x,y, 4);
525         dlog << LDEBUG << "svm reduced cv: " << svm_cv;
526         DLIB_TEST_MSG(mean(svm_cv) > 0.9, svm_cv);
527 
528         DLIB_TEST(df.basis_vectors.size() <= 19);
529         dlog << LINFO << "   end unittest_binary_classification()";
530     }
531 
532 // ----------------------------------------------------------------------------------------
533 
534     template <typename kernel_type>
535     struct kernel_der_obj
536     {
537         typename kernel_type::sample_type x;
538         kernel_type k;
539 
operator ()__anon7474f2660111::kernel_der_obj540         double operator()(const typename kernel_type::sample_type& y) const { return k(x,y); }
541     };
542 
543 
544     template <typename kernel_type>
test_kernel_derivative(const kernel_type & k,const typename kernel_type::sample_type & x,const typename kernel_type::sample_type & y)545     void test_kernel_derivative (
546         const kernel_type& k,
547         const typename kernel_type::sample_type& x,
548         const typename kernel_type::sample_type& y
549     )
550     {
551         kernel_der_obj<kernel_type> obj;
552         obj.x = x;
553         obj.k = k;
554         kernel_derivative<kernel_type> der(obj.k);
555         DLIB_TEST(dlib::equal(derivative(obj)(y) , der(obj.x,y), 1e-5));
556     }
557 
test_kernel_derivative()558     void test_kernel_derivative (
559     )
560     {
561         typedef matrix<double, 2, 1> sample_type;
562 
563         sigmoid_kernel<sample_type> k1;
564         radial_basis_kernel<sample_type> k2;
565         linear_kernel<sample_type> k3;
566         polynomial_kernel<sample_type> k4(2,3,4);
567 
568         offset_kernel<sigmoid_kernel<sample_type> > k5;
569         offset_kernel<radial_basis_kernel<sample_type> > k6;
570 
571         dlib::rand rnd;
572 
573         sample_type x, y;
574         for (int i = 0; i < 10; ++i)
575         {
576             x = randm(2,1,rnd);
577             y = randm(2,1,rnd);
578             test_kernel_derivative(k1, x, y);
579             test_kernel_derivative(k2, x, y);
580             test_kernel_derivative(k3, x, y);
581             test_kernel_derivative(k4, x, y);
582             test_kernel_derivative(k5, x, y);
583             test_kernel_derivative(k6, x, y);
584         }
585     }
586 
587 // ----------------------------------------------------------------------------------------
588 
test_svm_trainer2()589     void test_svm_trainer2()
590     {
591         typedef matrix<double, 2, 1> sample_type;
592         typedef linear_kernel<sample_type> kernel_type;
593 
594 
595         std::vector<sample_type> samples;
596         std::vector<double> labels;
597 
598         sample_type samp;
599         samp(0) = 1;
600         samp(1) = 1;
601         samples.push_back(samp);
602         labels.push_back(+1);
603 
604         samp(0) = 1;
605         samp(1) = 2;
606         samples.push_back(samp);
607         labels.push_back(-1);
608 
609         svm_c_trainer<kernel_type> trainer;
610 
611         decision_function<kernel_type> df = trainer.train(samples, labels);
612 
613         samp(0) = 1;
614         samp(1) = 1;
615         dlog << LINFO << "test +1 : "<< df(samp);
616         DLIB_TEST(df(samp) > 0);
617         samp(0) = 1;
618         samp(1) = 2;
619         dlog << LINFO << "test -1 : "<< df(samp);
620         DLIB_TEST(df(samp) < 0);
621 
622         svm_nu_trainer<kernel_type> trainer2;
623         df = trainer2.train(samples, labels);
624 
625         samp(0) = 1;
626         samp(1) = 1;
627         dlog << LINFO << "test +1 : "<< df(samp);
628         DLIB_TEST(df(samp) > 0);
629         samp(0) = 1;
630         samp(1) = 2;
631         dlog << LINFO << "test -1 : "<< df(samp);
632         DLIB_TEST(df(samp) < 0);
633 
634     }
635 
636 // ----------------------------------------------------------------------------------------
637 
638     class svm_tester : public tester
639     {
640     public:
svm_tester()641         svm_tester (
642         ) :
643             tester ("test_svm",
644                     "Runs tests on the svm/kernel algorithm components.")
645         {}
646 
perform_test()647         void perform_test (
648         )
649         {
650             test_kernel_derivative();
651             unittest_binary_classification();
652             test_clutering();
653             test_regression();
654             test_anomaly_detection();
655             test_svm_trainer2();
656         }
657     } a;
658 
659 }
660 
661 
662