1 // Copyright (C) 2006 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 4 5 #include <dlib/matrix.h> 6 #include <sstream> 7 #include <string> 8 #include <cstdlib> 9 #include <ctime> 10 #include <vector> 11 #include "../stl_checked.h" 12 #include "../array.h" 13 #include "../rand.h" 14 #include "checkerboard.h" 15 #include <dlib/statistics.h> 16 17 #include "tester.h" 18 #include <dlib/svm_threaded.h> 19 20 21 namespace 22 { 23 24 using namespace test; 25 using namespace dlib; 26 using namespace std; 27 28 logger dlog("test.svm"); 29 30 // ---------------------------------------------------------------------------------------- 31 test_clutering()32 void test_clutering ( 33 ) 34 { 35 dlog << LINFO << " being test_clutering()"; 36 // Here we declare that our samples will be 2 dimensional column vectors. 37 typedef matrix<double,2,1> sample_type; 38 39 // Now we are making a typedef for the kind of kernel we want to use. I picked the 40 // radial basis kernel because it only has one parameter and generally gives good 41 // results without much fiddling. 42 typedef radial_basis_kernel<sample_type> kernel_type; 43 44 // Here we declare an instance of the kcentroid object. The first argument to the constructor 45 // is the kernel we wish to use. The second is a parameter that determines the numerical 46 // accuracy with which the object will perform part of the learning algorithm. Generally 47 // smaller values give better results but cause the algorithm to run slower. You just have 48 // to play with it to decide what balance of speed and accuracy is right for your problem. 49 // Here we have set it to 0.01. 50 kcentroid<kernel_type> kc(kernel_type(0.1),0.01); 51 52 // Now we make an instance of the kkmeans object and tell it to use kcentroid objects 53 // that are configured with the parameters from the kc object we defined above. 54 kkmeans<kernel_type> test(kc); 55 56 std::vector<sample_type> samples; 57 std::vector<sample_type> initial_centers; 58 59 sample_type m; 60 61 dlib::rand rnd; 62 63 print_spinner(); 64 // we will make 50 points from each class 65 const long num = 50; 66 67 // make some samples near the origin 68 double radius = 0.5; 69 for (long i = 0; i < num; ++i) 70 { 71 double sign = 1; 72 if (rnd.get_random_double() < 0.5) 73 sign = -1; 74 m(0) = 2*radius*rnd.get_random_double()-radius; 75 m(1) = sign*sqrt(radius*radius - m(0)*m(0)); 76 77 // add this sample to our set of samples we will run k-means 78 samples.push_back(m); 79 } 80 81 // make some samples in a circle around the origin but far away 82 radius = 10.0; 83 for (long i = 0; i < num; ++i) 84 { 85 double sign = 1; 86 if (rnd.get_random_double() < 0.5) 87 sign = -1; 88 m(0) = 2*radius*rnd.get_random_double()-radius; 89 m(1) = sign*sqrt(radius*radius - m(0)*m(0)); 90 91 // add this sample to our set of samples we will run k-means 92 samples.push_back(m); 93 } 94 95 // make some samples in a circle around the point (25,25) 96 radius = 4.0; 97 for (long i = 0; i < num; ++i) 98 { 99 double sign = 1; 100 if (rnd.get_random_double() < 0.5) 101 sign = -1; 102 m(0) = 2*radius*rnd.get_random_double()-radius; 103 m(1) = sign*sqrt(radius*radius - m(0)*m(0)); 104 105 // translate this point away from the origin 106 m(0) += 25; 107 m(1) += 25; 108 109 // add this sample to our set of samples we will run k-means 110 samples.push_back(m); 111 } 112 print_spinner(); 113 114 // tell the kkmeans object we made that we want to run k-means with k set to 3. 115 // (i.e. we want 3 clusters) 116 test.set_number_of_centers(3); 117 118 // You need to pick some initial centers for the k-means algorithm. So here 119 // we will use the dlib::pick_initial_centers() function which tries to find 120 // n points that are far apart (basically). 121 pick_initial_centers(3, initial_centers, samples, test.get_kernel()); 122 123 print_spinner(); 124 // now run the k-means algorithm on our set of samples. 125 test.train(samples,initial_centers); 126 print_spinner(); 127 128 const unsigned long class1 = test(samples[0]); 129 const unsigned long class2 = test(samples[num]); 130 const unsigned long class3 = test(samples[2*num]); 131 // now loop over all our samples and print out their predicted class. In this example 132 // all points are correctly identified. 133 for (unsigned long i = 0; i < samples.size()/3; ++i) 134 { 135 DLIB_TEST(test(samples[i]) == class1); 136 DLIB_TEST(test(samples[i+num]) == class2); 137 DLIB_TEST(test(samples[i+2*num]) == class3); 138 } 139 140 dlog << LINFO << " end test_clutering()"; 141 } 142 143 // ---------------------------------------------------------------------------------------- 144 145 // Here is the sinc function we will be trying to learn with the krls 146 // object. sinc(double x)147 double sinc(double x) 148 { 149 if (x == 0) 150 return 1; 151 return sin(x)/x; 152 } 153 154 test_regression()155 void test_regression ( 156 ) 157 { 158 dlog << LINFO << " being test_regression()"; 159 // Here we declare that our samples will be 1 dimensional column vectors. The reason for 160 // using a matrix here is that in general you can use N dimensional vectors as inputs to the 161 // krls object. But here we only have 1 dimension to make the example simple. 162 typedef matrix<double,1,1> sample_type; 163 164 // Now we are making a typedef for the kind of kernel we want to use. I picked the 165 // radial basis kernel because it only has one parameter and generally gives good 166 // results without much fiddling. 167 typedef radial_basis_kernel<sample_type> kernel_type; 168 169 // Here we declare an instance of the krls object. The first argument to the constructor 170 // is the kernel we wish to use. The second is a parameter that determines the numerical 171 // accuracy with which the object will perform part of the regression algorithm. Generally 172 // smaller values give better results but cause the algorithm to run slower. You just have 173 // to play with it to decide what balance of speed and accuracy is right for your problem. 174 // Here we have set it to 0.001. 175 krls<kernel_type> test(kernel_type(0.1),0.001); 176 rvm_regression_trainer<kernel_type> rvm_test; 177 rvm_test.set_kernel(test.get_kernel()); 178 179 krr_trainer<kernel_type> krr_test; 180 krr_test.set_kernel(test.get_kernel()); 181 182 svr_trainer<kernel_type> svr_test; 183 svr_test.set_kernel(test.get_kernel()); 184 svr_test.set_epsilon_insensitivity(0.0001); 185 svr_test.set_c(10); 186 187 rbf_network_trainer<kernel_type> rbf_test; 188 rbf_test.set_kernel(test.get_kernel()); 189 rbf_test.set_num_centers(13); 190 191 print_spinner(); 192 std::vector<sample_type> samples; 193 std::vector<sample_type> samples2; 194 std::vector<double> labels; 195 std::vector<double> labels2; 196 // now we train our object on a few samples of the sinc function. 197 sample_type m; 198 for (double x = -10; x <= 5; x += 0.6) 199 { 200 m(0) = x; 201 test.train(m, sinc(x)); 202 203 samples.push_back(m); 204 samples2.push_back(m); 205 labels.push_back(sinc(x)); 206 labels2.push_back(2); 207 } 208 209 print_spinner(); 210 decision_function<kernel_type> test2 = rvm_test.train(samples, labels); 211 print_spinner(); 212 decision_function<kernel_type> test3 = rbf_test.train(samples, labels); 213 print_spinner(); 214 decision_function<kernel_type> test4 = krr_test.train(samples, labels); 215 print_spinner(); 216 decision_function<kernel_type> test5 = svr_test.train(samples, labels); 217 print_spinner(); 218 219 // now we output the value of the sinc function for a few test points as well as the 220 // value predicted by krls object. 221 m(0) = 2.5; dlog << LDEBUG << "krls: " << sinc(m(0)) << " " << test(m); DLIB_TEST(abs(sinc(m(0)) - test(m)) < 0.01); 222 m(0) = 0.1; dlog << LDEBUG << "krls: " << sinc(m(0)) << " " << test(m); DLIB_TEST(abs(sinc(m(0)) - test(m)) < 0.01); 223 m(0) = -4; dlog << LDEBUG << "krls: " << sinc(m(0)) << " " << test(m); DLIB_TEST(abs(sinc(m(0)) - test(m)) < 0.01); 224 m(0) = 5.0; dlog << LDEBUG << "krls: " << sinc(m(0)) << " " << test(m); DLIB_TEST(abs(sinc(m(0)) - test(m)) < 0.01); 225 226 m(0) = 2.5; dlog << LDEBUG << "rvm: " << sinc(m(0)) << " " << test2(m); DLIB_TEST(abs(sinc(m(0)) - test2(m)) < 0.01); 227 m(0) = 0.1; dlog << LDEBUG << "rvm: " << sinc(m(0)) << " " << test2(m); DLIB_TEST(abs(sinc(m(0)) - test2(m)) < 0.01); 228 m(0) = -4; dlog << LDEBUG << "rvm: " << sinc(m(0)) << " " << test2(m); DLIB_TEST(abs(sinc(m(0)) - test2(m)) < 0.01); 229 m(0) = 5.0; dlog << LDEBUG << "rvm: " << sinc(m(0)) << " " << test2(m); DLIB_TEST(abs(sinc(m(0)) - test2(m)) < 0.01); 230 231 m(0) = 2.5; dlog << LDEBUG << "rbf: " << sinc(m(0)) << " " << test3(m); DLIB_TEST(abs(sinc(m(0)) - test3(m)) < 0.01); 232 m(0) = 0.1; dlog << LDEBUG << "rbf: " << sinc(m(0)) << " " << test3(m); DLIB_TEST(abs(sinc(m(0)) - test3(m)) < 0.01); 233 m(0) = -4; dlog << LDEBUG << "rbf: " << sinc(m(0)) << " " << test3(m); DLIB_TEST(abs(sinc(m(0)) - test3(m)) < 0.01); 234 m(0) = 5.0; dlog << LDEBUG << "rbf: " << sinc(m(0)) << " " << test3(m); DLIB_TEST(abs(sinc(m(0)) - test3(m)) < 0.01); 235 236 m(0) = 2.5; dlog << LDEBUG << "krr: " << sinc(m(0)) << " " << test4(m); DLIB_TEST(abs(sinc(m(0)) - test4(m)) < 0.01); 237 m(0) = 0.1; dlog << LDEBUG << "krr: " << sinc(m(0)) << " " << test4(m); DLIB_TEST(abs(sinc(m(0)) - test4(m)) < 0.01); 238 m(0) = -4; dlog << LDEBUG << "krr: " << sinc(m(0)) << " " << test4(m); DLIB_TEST(abs(sinc(m(0)) - test4(m)) < 0.01); 239 m(0) = 5.0; dlog << LDEBUG << "krr: " << sinc(m(0)) << " " << test4(m); DLIB_TEST(abs(sinc(m(0)) - test4(m)) < 0.01); 240 241 m(0) = 2.5; dlog << LDEBUG << "svr: " << sinc(m(0)) << " " << test5(m); DLIB_TEST(abs(sinc(m(0)) - test5(m)) < 0.01); 242 m(0) = 0.1; dlog << LDEBUG << "svr: " << sinc(m(0)) << " " << test5(m); DLIB_TEST(abs(sinc(m(0)) - test5(m)) < 0.01); 243 m(0) = -4; dlog << LDEBUG << "svr: " << sinc(m(0)) << " " << test5(m); DLIB_TEST(abs(sinc(m(0)) - test5(m)) < 0.01); 244 m(0) = 5.0; dlog << LDEBUG << "svr: " << sinc(m(0)) << " " << test5(m); DLIB_TEST(abs(sinc(m(0)) - test5(m)) < 0.01); 245 246 247 randomize_samples(samples, labels); 248 dlog << LINFO << "KRR MSE and R-squared: "<< cross_validate_regression_trainer(krr_test, samples, labels, 6); 249 dlog << LINFO << "SVR MSE and R-squared: "<< cross_validate_regression_trainer(svr_test, samples, labels, 6); 250 matrix<double,1,4> cv = cross_validate_regression_trainer(krr_test, samples, labels, 6); 251 DLIB_TEST(cv(0) < 1e-4); 252 DLIB_TEST(cv(1) > 0.99); 253 cv = cross_validate_regression_trainer(svr_test, samples, labels, 6); 254 DLIB_TEST(cv(0) < 1e-4); 255 DLIB_TEST(cv(1) > 0.99); 256 257 258 259 260 randomize_samples(samples2, labels2); 261 dlog << LINFO << "KRR MSE and R-squared: "<< cross_validate_regression_trainer(krr_test, samples2, labels2, 6); 262 dlog << LINFO << "SVR MSE and R-squared: "<< cross_validate_regression_trainer(svr_test, samples2, labels2, 6); 263 cv = cross_validate_regression_trainer(krr_test, samples2, labels2, 6); 264 DLIB_TEST(cv(0) < 1e-4); 265 cv = cross_validate_regression_trainer(svr_test, samples2, labels2, 6); 266 DLIB_TEST(cv(0) < 1e-4); 267 268 dlog << LINFO << " end test_regression()"; 269 } 270 271 // ---------------------------------------------------------------------------------------- 272 test_anomaly_detection()273 void test_anomaly_detection ( 274 ) 275 { 276 dlog << LINFO << " begin test_anomaly_detection()"; 277 // Here we declare that our samples will be 2 dimensional column vectors. 278 typedef matrix<double,2,1> sample_type; 279 280 // Now we are making a typedef for the kind of kernel we want to use. I picked the 281 // radial basis kernel because it only has one parameter and generally gives good 282 // results without much fiddling. 283 typedef radial_basis_kernel<sample_type> kernel_type; 284 285 // Here we declare an instance of the kcentroid object. The first argument to the constructor 286 // is the kernel we wish to use. The second is a parameter that determines the numerical 287 // accuracy with which the object will perform part of the learning algorithm. Generally 288 // smaller values give better results but cause the algorithm to run slower. You just have 289 // to play with it to decide what balance of speed and accuracy is right for your problem. 290 // Here we have set it to 0.01. 291 kcentroid<kernel_type> test(kernel_type(0.1),0.01); 292 293 294 svm_one_class_trainer<kernel_type> one_class_trainer; 295 one_class_trainer.set_nu(0.4); 296 one_class_trainer.set_kernel(kernel_type(0.2)); 297 298 std::vector<sample_type> samples; 299 300 // now we train our object on a few samples of the sinc function. 301 sample_type m; 302 for (double x = -15; x <= 8; x += 1) 303 { 304 m(0) = x; 305 m(1) = sinc(x); 306 test.train(m); 307 samples.push_back(m); 308 } 309 310 decision_function<kernel_type> df = one_class_trainer.train(samples); 311 312 running_stats<double> rs; 313 314 // Now lets output the distance from the centroid to some points that are from the sinc function. 315 // These numbers should all be similar. We will also calculate the statistics of these numbers 316 // by accumulating them into the running_stats object called rs. This will let us easily 317 // find the mean and standard deviation of the distances for use below. 318 dlog << LDEBUG << "Points that are on the sinc function:\n"; 319 m(0) = -1.5; m(1) = sinc(m(0)); dlog << LDEBUG << " " << test(m); rs.add(test(m)); 320 m(0) = -1.5; m(1) = sinc(m(0)); dlog << LDEBUG << " " << test(m); rs.add(test(m)); 321 m(0) = -0; m(1) = sinc(m(0)); dlog << LDEBUG << " " << test(m); rs.add(test(m)); 322 m(0) = -0.5; m(1) = sinc(m(0)); dlog << LDEBUG << " " << test(m); rs.add(test(m)); 323 m(0) = -4.1; m(1) = sinc(m(0)); dlog << LDEBUG << " " << test(m); rs.add(test(m)); 324 m(0) = -1.5; m(1) = sinc(m(0)); dlog << LDEBUG << " " << test(m); rs.add(test(m)); 325 m(0) = -0.5; m(1) = sinc(m(0)); dlog << LDEBUG << " " << test(m); rs.add(test(m)); 326 327 m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m))); 328 m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m))); 329 m(0) = -0; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m))); 330 m(0) = -0.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m))); 331 m(0) = -4.1; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m))); 332 m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m))); 333 m(0) = -0.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(rs.scale(test(m)) < 2, rs.scale(test(m))); 334 335 const double thresh = 0.01; 336 m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m)); 337 m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m)); 338 m(0) = -0; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m)); 339 m(0) = -0.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m)); 340 m(0) = -4.1; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m)); 341 m(0) = -1.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m)); 342 m(0) = -0.5; m(1) = sinc(m(0)); DLIB_TEST_MSG(df(m)+thresh > 0, df(m)); 343 344 dlog << LDEBUG; 345 // Lets output the distance from the centroid to some points that are NOT from the sinc function. 346 // These numbers should all be significantly bigger than previous set of numbers. We will also 347 // use the rs.scale() function to find out how many standard deviations they are away from the 348 // mean of the test points from the sinc function. So in this case our criterion for "significantly bigger" 349 // is > 3 or 4 standard deviations away from the above points that actually are on the sinc function. 350 dlog << LDEBUG << "Points that are NOT on the sinc function:\n"; 351 m(0) = -1.5; m(1) = sinc(m(0))+4; 352 dlog << LDEBUG << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc."; 353 DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m))); 354 DLIB_TEST_MSG(df(m) + thresh < 0, df(m)); 355 356 m(0) = -1.5; m(1) = sinc(m(0))+3; 357 dlog << LDEBUG << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc."; 358 DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m))); 359 DLIB_TEST_MSG(df(m) + thresh < 0, df(m)); 360 361 m(0) = -0; m(1) = -sinc(m(0)); 362 dlog << LDEBUG << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc."; 363 DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m))); 364 DLIB_TEST_MSG(df(m) + thresh < 0, df(m)); 365 366 m(0) = -0.5; m(1) = -sinc(m(0)); 367 dlog << LDEBUG << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc."; 368 DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m))); 369 DLIB_TEST_MSG(df(m) + thresh < 0, df(m)); 370 371 m(0) = -4.1; m(1) = sinc(m(0))+2; 372 dlog << LDEBUG << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc."; 373 DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m))); 374 DLIB_TEST_MSG(df(m) + thresh < 0, df(m)); 375 376 m(0) = -1.5; m(1) = sinc(m(0))+0.9; 377 dlog << LDEBUG << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc."; 378 DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m))); 379 DLIB_TEST_MSG(df(m) + thresh < 0, df(m)); 380 381 m(0) = -0.5; m(1) = sinc(m(0))+1; 382 dlog << LDEBUG << " " << test(m) << " is " << rs.scale(test(m)) << " standard deviations from sinc."; 383 DLIB_TEST_MSG(rs.scale(test(m)) > 6, rs.scale(test(m))); 384 DLIB_TEST_MSG(df(m) + thresh < 0, df(m)); 385 386 dlog << LINFO << " end test_anomaly_detection()"; 387 } 388 389 // ---------------------------------------------------------------------------------------- 390 unittest_binary_classification()391 void unittest_binary_classification ( 392 ) 393 /*! 394 ensures 395 - runs tests on the svm stuff compliance with the specs 396 !*/ 397 { 398 dlog << LINFO << " begin unittest_binary_classification()"; 399 print_spinner(); 400 401 402 typedef double scalar_type; 403 typedef matrix<scalar_type,2,1> sample_type; 404 405 std::vector<sample_type> x; 406 std::vector<matrix<double,0,1> > x_linearized; 407 std::vector<scalar_type> y; 408 409 get_checkerboard_problem(x,y, 300, 2); 410 const scalar_type gamma = 1; 411 412 typedef radial_basis_kernel<sample_type> kernel_type; 413 414 rbf_network_trainer<kernel_type> rbf_trainer; 415 rbf_trainer.set_kernel(kernel_type(gamma)); 416 rbf_trainer.set_num_centers(100); 417 418 rvm_trainer<kernel_type> rvm_trainer; 419 rvm_trainer.set_kernel(kernel_type(gamma)); 420 421 krr_trainer<kernel_type> krr_trainer; 422 krr_trainer.use_classification_loss_for_loo_cv(); 423 krr_trainer.set_kernel(kernel_type(gamma)); 424 425 svm_pegasos<kernel_type> pegasos_trainer; 426 pegasos_trainer.set_kernel(kernel_type(gamma)); 427 pegasos_trainer.set_lambda(0.00001); 428 429 430 svm_c_ekm_trainer<kernel_type> ocas_ekm_trainer; 431 ocas_ekm_trainer.set_kernel(kernel_type(gamma)); 432 ocas_ekm_trainer.set_c(100000); 433 434 svm_nu_trainer<kernel_type> trainer; 435 trainer.set_kernel(kernel_type(gamma)); 436 trainer.set_nu(0.05); 437 438 svm_c_trainer<kernel_type> c_trainer; 439 c_trainer.set_kernel(kernel_type(gamma)); 440 c_trainer.set_c(100); 441 442 svm_c_linear_trainer<linear_kernel<matrix<double,0,1> > > lin_trainer; 443 lin_trainer.set_c(100000); 444 // use an ekm to linearize this dataset so we can use it with the lin_trainer 445 empirical_kernel_map<kernel_type> ekm; 446 ekm.load(kernel_type(gamma), x); 447 for (unsigned long i = 0; i < x.size(); ++i) 448 x_linearized.push_back(ekm.project(x[i])); 449 450 451 print_spinner(); 452 matrix<scalar_type> rvm_cv = cross_validate_trainer_threaded(rvm_trainer, x,y, 4, 2); 453 print_spinner(); 454 matrix<scalar_type> krr_cv = cross_validate_trainer_threaded(krr_trainer, x,y, 4, 2); 455 print_spinner(); 456 matrix<scalar_type> svm_cv = cross_validate_trainer(trainer, x,y, 4); 457 print_spinner(); 458 matrix<scalar_type> svm_c_cv = cross_validate_trainer(c_trainer, x,y, 4); 459 print_spinner(); 460 matrix<scalar_type> rbf_cv = cross_validate_trainer_threaded(rbf_trainer, x,y, 10, 2); 461 print_spinner(); 462 matrix<scalar_type> lin_cv = cross_validate_trainer_threaded(lin_trainer, x_linearized, y, 4, 2); 463 print_spinner(); 464 matrix<scalar_type> ocas_ekm_cv = cross_validate_trainer_threaded(ocas_ekm_trainer, x, y, 4, 2); 465 print_spinner(); 466 ocas_ekm_trainer.set_basis(randomly_subsample(x, 300)); 467 matrix<scalar_type> ocas_ekm_cv2 = cross_validate_trainer_threaded(ocas_ekm_trainer, x, y, 4, 2); 468 print_spinner(); 469 matrix<scalar_type> peg_cv = cross_validate_trainer_threaded(batch(pegasos_trainer,1.0), x,y, 4, 2); 470 print_spinner(); 471 matrix<scalar_type> peg_c_cv = cross_validate_trainer_threaded(batch_cached(pegasos_trainer,1.0), x,y, 4, 2); 472 print_spinner(); 473 474 dlog << LDEBUG << "rvm cv: " << rvm_cv; 475 dlog << LDEBUG << "krr cv: " << krr_cv; 476 dlog << LDEBUG << "nu-svm cv: " << svm_cv; 477 dlog << LDEBUG << "C-svm cv: " << svm_c_cv; 478 dlog << LDEBUG << "rbf cv: " << rbf_cv; 479 dlog << LDEBUG << "lin cv: " << lin_cv; 480 dlog << LDEBUG << "ocas_ekm cv: " << ocas_ekm_cv; 481 dlog << LDEBUG << "ocas_ekm cv2: " << ocas_ekm_cv2; 482 dlog << LDEBUG << "peg cv: " << peg_cv; 483 dlog << LDEBUG << "peg cached cv: " << peg_c_cv; 484 485 // make sure the cached version of pegasos computes the same result 486 DLIB_TEST_MSG(sum(abs(peg_cv - peg_c_cv)) < std::sqrt(std::numeric_limits<double>::epsilon()), 487 sum(abs(peg_cv - peg_c_cv)) << " \n" << peg_cv << peg_c_cv ); 488 489 DLIB_TEST_MSG(mean(rvm_cv) > 0.9, rvm_cv); 490 DLIB_TEST_MSG(mean(krr_cv) > 0.9, krr_cv); 491 DLIB_TEST_MSG(mean(svm_cv) > 0.9, svm_cv); 492 DLIB_TEST_MSG(mean(svm_c_cv) > 0.9, svm_c_cv); 493 DLIB_TEST_MSG(mean(rbf_cv) > 0.9, rbf_cv); 494 DLIB_TEST_MSG(mean(lin_cv) > 0.9, lin_cv); 495 DLIB_TEST_MSG(mean(peg_cv) > 0.9, peg_cv); 496 DLIB_TEST_MSG(mean(peg_c_cv) > 0.9, peg_c_cv); 497 DLIB_TEST_MSG(mean(ocas_ekm_cv) > 0.9, ocas_ekm_cv); 498 DLIB_TEST_MSG(mean(ocas_ekm_cv2) > 0.9, ocas_ekm_cv2); 499 500 const long num_sv = trainer.train(x,y).basis_vectors.size(); 501 print_spinner(); 502 const long num_rv = rvm_trainer.train(x,y).basis_vectors.size(); 503 print_spinner(); 504 dlog << LDEBUG << "num sv: " << num_sv; 505 dlog << LDEBUG << "num rv: " << num_rv; 506 print_spinner(); 507 ocas_ekm_trainer.clear_basis(); 508 const long num_bv = ocas_ekm_trainer.train(x,y).basis_vectors.size(); 509 dlog << LDEBUG << "num ekm bv: " << num_bv; 510 511 DLIB_TEST(num_rv <= 17); 512 DLIB_TEST_MSG(num_sv <= 45, num_sv); 513 DLIB_TEST_MSG(num_bv <= 45, num_bv); 514 515 decision_function<kernel_type> df = reduced2(trainer, 19).train(x,y); 516 print_spinner(); 517 518 matrix<scalar_type> svm_reduced_error = test_binary_decision_function(df, x, y); 519 print_spinner(); 520 dlog << LDEBUG << "svm reduced test error: " << svm_reduced_error; 521 dlog << LDEBUG << "svm reduced num sv: " << df.basis_vectors.size(); 522 DLIB_TEST(mean(svm_reduced_error) > 0.9); 523 524 svm_cv = cross_validate_trainer(reduced(trainer,30), x,y, 4); 525 dlog << LDEBUG << "svm reduced cv: " << svm_cv; 526 DLIB_TEST_MSG(mean(svm_cv) > 0.9, svm_cv); 527 528 DLIB_TEST(df.basis_vectors.size() <= 19); 529 dlog << LINFO << " end unittest_binary_classification()"; 530 } 531 532 // ---------------------------------------------------------------------------------------- 533 534 template <typename kernel_type> 535 struct kernel_der_obj 536 { 537 typename kernel_type::sample_type x; 538 kernel_type k; 539 operator ()__anon7474f2660111::kernel_der_obj540 double operator()(const typename kernel_type::sample_type& y) const { return k(x,y); } 541 }; 542 543 544 template <typename kernel_type> test_kernel_derivative(const kernel_type & k,const typename kernel_type::sample_type & x,const typename kernel_type::sample_type & y)545 void test_kernel_derivative ( 546 const kernel_type& k, 547 const typename kernel_type::sample_type& x, 548 const typename kernel_type::sample_type& y 549 ) 550 { 551 kernel_der_obj<kernel_type> obj; 552 obj.x = x; 553 obj.k = k; 554 kernel_derivative<kernel_type> der(obj.k); 555 DLIB_TEST(dlib::equal(derivative(obj)(y) , der(obj.x,y), 1e-5)); 556 } 557 test_kernel_derivative()558 void test_kernel_derivative ( 559 ) 560 { 561 typedef matrix<double, 2, 1> sample_type; 562 563 sigmoid_kernel<sample_type> k1; 564 radial_basis_kernel<sample_type> k2; 565 linear_kernel<sample_type> k3; 566 polynomial_kernel<sample_type> k4(2,3,4); 567 568 offset_kernel<sigmoid_kernel<sample_type> > k5; 569 offset_kernel<radial_basis_kernel<sample_type> > k6; 570 571 dlib::rand rnd; 572 573 sample_type x, y; 574 for (int i = 0; i < 10; ++i) 575 { 576 x = randm(2,1,rnd); 577 y = randm(2,1,rnd); 578 test_kernel_derivative(k1, x, y); 579 test_kernel_derivative(k2, x, y); 580 test_kernel_derivative(k3, x, y); 581 test_kernel_derivative(k4, x, y); 582 test_kernel_derivative(k5, x, y); 583 test_kernel_derivative(k6, x, y); 584 } 585 } 586 587 // ---------------------------------------------------------------------------------------- 588 test_svm_trainer2()589 void test_svm_trainer2() 590 { 591 typedef matrix<double, 2, 1> sample_type; 592 typedef linear_kernel<sample_type> kernel_type; 593 594 595 std::vector<sample_type> samples; 596 std::vector<double> labels; 597 598 sample_type samp; 599 samp(0) = 1; 600 samp(1) = 1; 601 samples.push_back(samp); 602 labels.push_back(+1); 603 604 samp(0) = 1; 605 samp(1) = 2; 606 samples.push_back(samp); 607 labels.push_back(-1); 608 609 svm_c_trainer<kernel_type> trainer; 610 611 decision_function<kernel_type> df = trainer.train(samples, labels); 612 613 samp(0) = 1; 614 samp(1) = 1; 615 dlog << LINFO << "test +1 : "<< df(samp); 616 DLIB_TEST(df(samp) > 0); 617 samp(0) = 1; 618 samp(1) = 2; 619 dlog << LINFO << "test -1 : "<< df(samp); 620 DLIB_TEST(df(samp) < 0); 621 622 svm_nu_trainer<kernel_type> trainer2; 623 df = trainer2.train(samples, labels); 624 625 samp(0) = 1; 626 samp(1) = 1; 627 dlog << LINFO << "test +1 : "<< df(samp); 628 DLIB_TEST(df(samp) > 0); 629 samp(0) = 1; 630 samp(1) = 2; 631 dlog << LINFO << "test -1 : "<< df(samp); 632 DLIB_TEST(df(samp) < 0); 633 634 } 635 636 // ---------------------------------------------------------------------------------------- 637 638 class svm_tester : public tester 639 { 640 public: svm_tester()641 svm_tester ( 642 ) : 643 tester ("test_svm", 644 "Runs tests on the svm/kernel algorithm components.") 645 {} 646 perform_test()647 void perform_test ( 648 ) 649 { 650 test_kernel_derivative(); 651 unittest_binary_classification(); 652 test_clutering(); 653 test_regression(); 654 test_anomaly_detection(); 655 test_svm_trainer2(); 656 } 657 } a; 658 659 } 660 661 662