1 // Copyright (C) 2011 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 4 #include <sstream> 5 #include <string> 6 #include <cstdlib> 7 #include <ctime> 8 #include <dlib/svm_threaded.h> 9 10 #include "tester.h" 11 12 namespace 13 { 14 using namespace test; 15 using namespace dlib; 16 using namespace std; 17 18 logger dlog("test.svm_struct"); 19 20 21 template < 22 typename matrix_type, 23 typename sample_type, 24 typename label_type 25 > 26 class test_multiclass_svm_problem : public structural_svm_problem_threaded<matrix_type, 27 std::vector<std::pair<unsigned long,typename matrix_type::type> > > 28 { 29 30 public: 31 typedef typename matrix_type::type scalar_type; 32 typedef std::vector<std::pair<unsigned long,scalar_type> > feature_vector_type; 33 test_multiclass_svm_problem(const std::vector<sample_type> & samples_,const std::vector<label_type> & labels_)34 test_multiclass_svm_problem ( 35 const std::vector<sample_type>& samples_, 36 const std::vector<label_type>& labels_ 37 ) : 38 structural_svm_problem_threaded<matrix_type, 39 std::vector<std::pair<unsigned long,typename matrix_type::type> > >(2), 40 samples(samples_), 41 labels(labels_), 42 dims(10+1) // +1 for the bias 43 { 44 for (int i = 0; i < 10; ++i) 45 { 46 distinct_labels.push_back(i); 47 } 48 } 49 get_num_dimensions() const50 virtual long get_num_dimensions ( 51 ) const 52 { 53 return dims*10; 54 } 55 get_num_samples() const56 virtual long get_num_samples ( 57 ) const 58 { 59 return static_cast<long>(samples.size()); 60 } 61 get_truth_joint_feature_vector(long idx,feature_vector_type & psi) const62 virtual void get_truth_joint_feature_vector ( 63 long idx, 64 feature_vector_type& psi 65 ) const 66 { 67 assign(psi, samples[idx]); 68 // Add a constant -1 to account for the bias term. 69 psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1))); 70 71 // Find which distinct label goes with this psi. 72 const long label_idx = index_of_max(mat(distinct_labels) == labels[idx]); 73 74 offset_feature_vector(psi, dims*label_idx); 75 } 76 separation_oracle(const long idx,const matrix_type & current_solution,scalar_type & loss,feature_vector_type & psi) const77 virtual void separation_oracle ( 78 const long idx, 79 const matrix_type& current_solution, 80 scalar_type& loss, 81 feature_vector_type& psi 82 ) const 83 { 84 scalar_type best_val = -std::numeric_limits<scalar_type>::infinity(); 85 unsigned long best_idx = 0; 86 87 // Figure out which label is the best. That is, what label maximizes 88 // LOSS(idx,y) + F(x,y). Note that y in this case is given by distinct_labels[i]. 89 for (unsigned long i = 0; i < distinct_labels.size(); ++i) 90 { 91 // Compute the F(x,y) part: 92 // perform: temp == dot(relevant part of current solution, samples[idx]) - current_bias 93 scalar_type temp = dot(rowm(current_solution, range(i*dims, (i+1)*dims-2)), samples[idx]) - current_solution((i+1)*dims-1); 94 95 // Add the LOSS(idx,y) part: 96 if (labels[idx] != distinct_labels[i]) 97 temp += 1; 98 99 // Now temp == LOSS(idx,y) + F(x,y). Check if it is the biggest we have seen. 100 if (temp > best_val) 101 { 102 best_val = temp; 103 best_idx = i; 104 } 105 } 106 107 assign(psi, samples[idx]); 108 // add a constant -1 to account for the bias term 109 psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1))); 110 111 offset_feature_vector(psi, dims*best_idx); 112 113 if (distinct_labels[best_idx] == labels[idx]) 114 loss = 0; 115 else 116 loss = 1; 117 } 118 119 private: 120 offset_feature_vector(feature_vector_type & sample,const unsigned long val) const121 void offset_feature_vector ( 122 feature_vector_type& sample, 123 const unsigned long val 124 ) const 125 { 126 if (val != 0) 127 { 128 for (typename feature_vector_type::iterator i = sample.begin(); i != sample.end(); ++i) 129 { 130 i->first += val; 131 } 132 } 133 } 134 135 136 const std::vector<sample_type>& samples; 137 const std::vector<label_type>& labels; 138 std::vector<label_type> distinct_labels; 139 const long dims; 140 }; 141 142 // ---------------------------------------------------------------------------------------- 143 144 template < 145 typename K, 146 typename label_type_ = typename K::scalar_type 147 > 148 class test_svm_multiclass_linear_trainer2 149 { 150 public: 151 typedef label_type_ label_type; 152 typedef K kernel_type; 153 typedef typename kernel_type::scalar_type scalar_type; 154 typedef typename kernel_type::sample_type sample_type; 155 typedef typename kernel_type::mem_manager_type mem_manager_type; 156 157 typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type; 158 159 test_svm_multiclass_linear_trainer2()160 test_svm_multiclass_linear_trainer2 ( 161 ) : 162 C(10), 163 eps(1e-4), 164 verbose(false) 165 { 166 } 167 train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels) const168 trained_function_type train ( 169 const std::vector<sample_type>& all_samples, 170 const std::vector<label_type>& all_labels 171 ) const 172 { 173 scalar_type svm_objective = 0; 174 return train(all_samples, all_labels, svm_objective); 175 } 176 train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels,scalar_type & svm_objective) const177 trained_function_type train ( 178 const std::vector<sample_type>& all_samples, 179 const std::vector<label_type>& all_labels, 180 scalar_type& svm_objective 181 ) const 182 { 183 // make sure requires clause is not broken 184 DLIB_ASSERT(is_learning_problem(all_samples,all_labels), 185 "\t trained_function_type test_svm_multiclass_linear_trainer2::train(all_samples,all_labels)" 186 << "\n\t invalid inputs were given to this function" 187 << "\n\t all_samples.size(): " << all_samples.size() 188 << "\n\t all_labels.size(): " << all_labels.size() 189 ); 190 191 typedef matrix<scalar_type,0,1> w_type; 192 w_type weights; 193 std::vector<sample_type> samples1(all_samples.begin(), all_samples.begin()+all_samples.size()/2); 194 std::vector<sample_type> samples2(all_samples.begin()+all_samples.size()/2, all_samples.end()); 195 196 std::vector<label_type> labels1(all_labels.begin(), all_labels.begin()+all_labels.size()/2); 197 std::vector<label_type> labels2(all_labels.begin()+all_labels.size()/2, all_labels.end()); 198 test_multiclass_svm_problem<w_type, sample_type, label_type> problem1(samples1, labels1); 199 test_multiclass_svm_problem<w_type, sample_type, label_type> problem2(samples2, labels2); 200 problem1.set_max_cache_size(3); 201 problem2.set_max_cache_size(0); 202 203 svm_struct_processing_node node1(problem1, 12345, 3); 204 svm_struct_processing_node node2(problem2, 12346, 0); 205 206 solver.set_inactive_plane_threshold(50); 207 solver.set_subproblem_epsilon(1e-4); 208 209 svm_struct_controller_node controller; 210 controller.set_c(C); 211 controller.set_epsilon(eps); 212 if (verbose) 213 controller.be_verbose(); 214 controller.add_processing_node("127.0.0.1", 12345); 215 controller.add_processing_node("localhost:12346"); 216 svm_objective = controller(solver, weights); 217 218 219 220 trained_function_type df; 221 222 const long dims = max_index_plus_one(all_samples); 223 df.labels = select_all_distinct_labels(all_labels); 224 df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1)); 225 df.b = colm(reshape(weights, df.labels.size(), dims+1), dims); 226 return df; 227 } 228 229 private: 230 scalar_type C; 231 scalar_type eps; 232 bool verbose; 233 mutable oca solver; 234 }; 235 236 // ---------------------------------------------------------------------------------------- 237 238 template < 239 typename K, 240 typename label_type_ = typename K::scalar_type 241 > 242 class test_svm_multiclass_linear_trainer3 243 { 244 public: 245 typedef label_type_ label_type; 246 typedef K kernel_type; 247 typedef typename kernel_type::scalar_type scalar_type; 248 typedef typename kernel_type::sample_type sample_type; 249 typedef typename kernel_type::mem_manager_type mem_manager_type; 250 251 typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type; 252 253 test_svm_multiclass_linear_trainer3()254 test_svm_multiclass_linear_trainer3 ( 255 ) : 256 C(10), 257 eps(1e-4), 258 verbose(false) 259 { 260 } 261 train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels) const262 trained_function_type train ( 263 const std::vector<sample_type>& all_samples, 264 const std::vector<label_type>& all_labels 265 ) const 266 { 267 scalar_type svm_objective = 0; 268 return train(all_samples, all_labels, svm_objective); 269 } 270 train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels,scalar_type & svm_objective) const271 trained_function_type train ( 272 const std::vector<sample_type>& all_samples, 273 const std::vector<label_type>& all_labels, 274 scalar_type& svm_objective 275 ) const 276 { 277 // make sure requires clause is not broken 278 DLIB_ASSERT(is_learning_problem(all_samples,all_labels), 279 "\t trained_function_type test_svm_multiclass_linear_trainer3::train(all_samples,all_labels)" 280 << "\n\t invalid inputs were given to this function" 281 << "\n\t all_samples.size(): " << all_samples.size() 282 << "\n\t all_labels.size(): " << all_labels.size() 283 ); 284 285 typedef matrix<scalar_type,0,1> w_type; 286 w_type weights; 287 test_multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels); 288 problem.set_max_cache_size(0); 289 290 problem.set_c(C); 291 problem.set_epsilon(eps); 292 293 if (verbose) 294 problem.be_verbose(); 295 296 solver.set_inactive_plane_threshold(50); 297 solver.set_subproblem_epsilon(1e-4); 298 svm_objective = solver(problem, weights); 299 300 301 trained_function_type df; 302 303 const long dims = max_index_plus_one(all_samples); 304 df.labels = select_all_distinct_labels(all_labels); 305 df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1)); 306 df.b = colm(reshape(weights, df.labels.size(), dims+1), dims); 307 return df; 308 } 309 310 private: 311 scalar_type C; 312 scalar_type eps; 313 bool verbose; 314 mutable oca solver; 315 }; 316 317 // ---------------------------------------------------------------------------------------- 318 319 template < 320 typename K, 321 typename label_type_ = typename K::scalar_type 322 > 323 class test_svm_multiclass_linear_trainer4 324 { 325 public: 326 typedef label_type_ label_type; 327 typedef K kernel_type; 328 typedef typename kernel_type::scalar_type scalar_type; 329 typedef typename kernel_type::sample_type sample_type; 330 typedef typename kernel_type::mem_manager_type mem_manager_type; 331 332 typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type; 333 334 test_svm_multiclass_linear_trainer4()335 test_svm_multiclass_linear_trainer4 ( 336 ) : 337 C(10), 338 eps(1e-4), 339 verbose(false) 340 { 341 } 342 train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels) const343 trained_function_type train ( 344 const std::vector<sample_type>& all_samples, 345 const std::vector<label_type>& all_labels 346 ) const 347 { 348 scalar_type svm_objective = 0; 349 return train(all_samples, all_labels, svm_objective); 350 } 351 train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels,scalar_type & svm_objective) const352 trained_function_type train ( 353 const std::vector<sample_type>& all_samples, 354 const std::vector<label_type>& all_labels, 355 scalar_type& svm_objective 356 ) const 357 { 358 // make sure requires clause is not broken 359 DLIB_ASSERT(is_learning_problem(all_samples,all_labels), 360 "\t trained_function_type test_svm_multiclass_linear_trainer4::train(all_samples,all_labels)" 361 << "\n\t invalid inputs were given to this function" 362 << "\n\t all_samples.size(): " << all_samples.size() 363 << "\n\t all_labels.size(): " << all_labels.size() 364 ); 365 366 typedef matrix<scalar_type,0,1> w_type; 367 w_type weights; 368 test_multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels); 369 problem.set_max_cache_size(3); 370 371 problem.set_c(C); 372 problem.set_epsilon(eps); 373 374 if (verbose) 375 problem.be_verbose(); 376 377 solver.set_inactive_plane_threshold(50); 378 solver.set_subproblem_epsilon(1e-4); 379 svm_objective = solver(problem, weights); 380 381 382 trained_function_type df; 383 384 const long dims = max_index_plus_one(all_samples); 385 df.labels = select_all_distinct_labels(all_labels); 386 df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1)); 387 df.b = colm(reshape(weights, df.labels.size(), dims+1), dims); 388 return df; 389 } 390 391 private: 392 scalar_type C; 393 scalar_type eps; 394 bool verbose; 395 mutable oca solver; 396 }; 397 398 // ---------------------------------------------------------------------------------------- 399 400 template < 401 typename K, 402 typename label_type_ = typename K::scalar_type 403 > 404 class test_svm_multiclass_linear_trainer5 405 { 406 public: 407 typedef label_type_ label_type; 408 typedef K kernel_type; 409 typedef typename kernel_type::scalar_type scalar_type; 410 typedef typename kernel_type::sample_type sample_type; 411 typedef typename kernel_type::mem_manager_type mem_manager_type; 412 413 typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type; 414 415 test_svm_multiclass_linear_trainer5()416 test_svm_multiclass_linear_trainer5 ( 417 ) : 418 C(10), 419 eps(1e-4), 420 verbose(false) 421 { 422 } 423 train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels) const424 trained_function_type train ( 425 const std::vector<sample_type>& all_samples, 426 const std::vector<label_type>& all_labels 427 ) const 428 { 429 scalar_type svm_objective = 0; 430 return train(all_samples, all_labels, svm_objective); 431 } 432 train(const std::vector<sample_type> & all_samples,const std::vector<label_type> & all_labels,scalar_type & svm_objective) const433 trained_function_type train ( 434 const std::vector<sample_type>& all_samples, 435 const std::vector<label_type>& all_labels, 436 scalar_type& svm_objective 437 ) const 438 { 439 // make sure requires clause is not broken 440 DLIB_ASSERT(is_learning_problem(all_samples,all_labels), 441 "\t trained_function_type test_svm_multiclass_linear_trainer5::train(all_samples,all_labels)" 442 << "\n\t invalid inputs were given to this function" 443 << "\n\t all_samples.size(): " << all_samples.size() 444 << "\n\t all_labels.size(): " << all_labels.size() 445 ); 446 447 typedef matrix<scalar_type,0,1> w_type; 448 w_type weights; 449 const long dims = max_index_plus_one(all_samples); 450 trained_function_type df; 451 df.labels = select_all_distinct_labels(all_labels); 452 multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels, df.labels, dims, 4); 453 problem.set_max_cache_size(3); 454 455 problem.set_c(C); 456 problem.set_epsilon(eps); 457 458 if (verbose) 459 problem.be_verbose(); 460 461 solver.set_inactive_plane_threshold(50); 462 solver.set_subproblem_epsilon(1e-4); 463 svm_objective = solver(problem, weights); 464 465 466 467 df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1)); 468 df.b = colm(reshape(weights, df.labels.size(), dims+1), dims); 469 return df; 470 } 471 472 private: 473 scalar_type C; 474 scalar_type eps; 475 bool verbose; 476 mutable oca solver; 477 }; 478 479 480 // ---------------------------------------------------------------------------------------- 481 482 typedef matrix<double,10,1> sample_type; 483 typedef double scalar_type; 484 make_dataset(std::vector<sample_type> & samples,std::vector<scalar_type> & labels,int num,dlib::rand & rnd)485 void make_dataset ( 486 std::vector<sample_type>& samples, 487 std::vector<scalar_type>& labels, 488 int num, 489 dlib::rand& rnd 490 ) 491 { 492 samples.clear(); 493 labels.clear(); 494 for (int i = 0; i < 10; ++i) 495 { 496 for (int j = 0; j < num; ++j) 497 { 498 sample_type samp; 499 samp = 0; 500 samp(i) = 10*rnd.get_random_double()+1; 501 502 samples.push_back(samp); 503 labels.push_back(i); 504 } 505 } 506 } 507 508 // ---------------------------------------------------------------------------------------- 509 510 class test_svm_struct : public tester 511 { 512 public: test_svm_struct()513 test_svm_struct ( 514 ) : 515 tester ("test_svm_struct", 516 "Runs tests on the structural svm components.") 517 {} 518 run_test(const std::vector<sample_type> & samples,const std::vector<scalar_type> & labels,const double true_obj)519 void run_test ( 520 const std::vector<sample_type>& samples, 521 const std::vector<scalar_type>& labels, 522 const double true_obj 523 ) 524 { 525 typedef linear_kernel<sample_type> kernel_type; 526 svm_multiclass_linear_trainer<kernel_type> trainer1; 527 test_svm_multiclass_linear_trainer2<kernel_type> trainer2; 528 test_svm_multiclass_linear_trainer3<kernel_type> trainer3; 529 test_svm_multiclass_linear_trainer4<kernel_type> trainer4; 530 test_svm_multiclass_linear_trainer5<kernel_type> trainer5; 531 532 trainer1.set_epsilon(1e-4); 533 trainer1.set_c(10); 534 535 536 multiclass_linear_decision_function<kernel_type,double> df1, df2, df3, df4, df5; 537 double obj1, obj2, obj3, obj4, obj5; 538 539 // Solve a multiclass SVM a whole bunch of different ways and make sure 540 // they all give the same answer. 541 print_spinner(); 542 df1 = trainer1.train(samples, labels, obj1); 543 print_spinner(); 544 df2 = trainer2.train(samples, labels, obj2); 545 print_spinner(); 546 df3 = trainer3.train(samples, labels, obj3); 547 print_spinner(); 548 df4 = trainer4.train(samples, labels, obj4); 549 print_spinner(); 550 df5 = trainer5.train(samples, labels, obj5); 551 print_spinner(); 552 553 dlog << LINFO << "obj1: "<< obj1; 554 dlog << LINFO << "obj2: "<< obj2; 555 dlog << LINFO << "obj3: "<< obj3; 556 dlog << LINFO << "obj4: "<< obj4; 557 dlog << LINFO << "obj5: "<< obj5; 558 DLIB_TEST(std::abs(obj1 - obj2) < 1e-2); 559 DLIB_TEST(std::abs(obj1 - obj3) < 1e-2); 560 DLIB_TEST(std::abs(obj1 - obj4) < 1e-2); 561 DLIB_TEST(std::abs(obj1 - obj5) < 1e-2); 562 DLIB_TEST(std::abs(obj1 - true_obj) < 1e-2); 563 DLIB_TEST(std::abs(obj2 - true_obj) < 1e-2); 564 DLIB_TEST(std::abs(obj3 - true_obj) < 1e-2); 565 DLIB_TEST(std::abs(obj4 - true_obj) < 1e-2); 566 DLIB_TEST(std::abs(obj5 - true_obj) < 1e-2); 567 568 dlog << LINFO << "weight error: "<< max(abs(df1.weights - df2.weights)); 569 dlog << LINFO << "weight error: "<< max(abs(df1.weights - df3.weights)); 570 dlog << LINFO << "weight error: "<< max(abs(df1.weights - df4.weights)); 571 dlog << LINFO << "weight error: "<< max(abs(df1.weights - df5.weights)); 572 573 DLIB_TEST(max(abs(df1.weights - df2.weights)) < 1e-2); 574 DLIB_TEST(max(abs(df1.weights - df3.weights)) < 1e-2); 575 DLIB_TEST(max(abs(df1.weights - df4.weights)) < 1e-2); 576 DLIB_TEST(max(abs(df1.weights - df5.weights)) < 1e-2); 577 578 dlog << LINFO << "b error: "<< max(abs(df1.b - df2.b)); 579 dlog << LINFO << "b error: "<< max(abs(df1.b - df3.b)); 580 dlog << LINFO << "b error: "<< max(abs(df1.b - df4.b)); 581 dlog << LINFO << "b error: "<< max(abs(df1.b - df5.b)); 582 DLIB_TEST(max(abs(df1.b - df2.b)) < 1e-2); 583 DLIB_TEST(max(abs(df1.b - df3.b)) < 1e-2); 584 DLIB_TEST(max(abs(df1.b - df4.b)) < 1e-2); 585 DLIB_TEST(max(abs(df1.b - df5.b)) < 1e-2); 586 587 matrix<double> res = test_multiclass_decision_function(df1, samples, labels); 588 dlog << LINFO << res; 589 dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res); 590 DLIB_TEST(sum(diag(res)) == samples.size()); 591 592 res = test_multiclass_decision_function(df2, samples, labels); 593 dlog << LINFO << res; 594 dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res); 595 DLIB_TEST(sum(diag(res)) == samples.size()); 596 597 res = test_multiclass_decision_function(df3, samples, labels); 598 dlog << LINFO << res; 599 dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res); 600 DLIB_TEST(sum(diag(res)) == samples.size()); 601 602 res = test_multiclass_decision_function(df4, samples, labels); 603 dlog << LINFO << res; 604 dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res); 605 DLIB_TEST(sum(diag(res)) == samples.size()); 606 607 res = test_multiclass_decision_function(df5, samples, labels); 608 dlog << LINFO << res; 609 dlog << LINFO << "accuracy: " << sum(diag(res))/sum(res); 610 DLIB_TEST(sum(diag(res)) == samples.size()); 611 } 612 perform_test()613 void perform_test ( 614 ) 615 { 616 std::vector<sample_type> samples; 617 std::vector<scalar_type> labels; 618 619 dlib::rand rnd; 620 621 dlog << LINFO << "test with 100 samples per class"; 622 make_dataset(samples, labels, 100, rnd); 623 run_test(samples, labels, 1.155); 624 625 dlog << LINFO << "test with 1 sample per class"; 626 make_dataset(samples, labels, 1, rnd); 627 run_test(samples, labels, 0.251); 628 629 dlog << LINFO << "test with 2 sample per class"; 630 make_dataset(samples, labels, 2, rnd); 631 run_test(samples, labels, 0.444); 632 } 633 } a; 634 635 636 637 } 638 639 640 641 642