1 // Copyright (C) 2010 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 4 5 #include <sstream> 6 #include <string> 7 #include <cstdlib> 8 #include <ctime> 9 #include <dlib/statistics.h> 10 #include <dlib/statistics/running_gradient.h> 11 #include <dlib/rand.h> 12 #include <dlib/svm.h> 13 #include <algorithm> 14 #include <dlib/matrix.h> 15 #include <cmath> 16 17 #include "tester.h" 18 19 namespace 20 { 21 22 using namespace test; 23 using namespace dlib; 24 using namespace std; 25 26 logger dlog("test.statistics"); 27 28 29 30 class statistics_tester : public tester 31 { 32 public: statistics_tester()33 statistics_tester ( 34 ) : 35 tester ("test_statistics", 36 "Runs tests on the statistics component.") 37 {} 38 test_random_subset_selector()39 void test_random_subset_selector () 40 { 41 random_subset_selector<double> rand_set; 42 43 for (int j = 0; j < 30; ++j) 44 { 45 print_spinner(); 46 47 running_stats<double> rs, rs2; 48 49 rand_set.set_max_size(1000); 50 51 for (double i = 0; i < 100000; ++i) 52 { 53 rs.add(i); 54 rand_set.add(i); 55 } 56 57 58 for (unsigned long i = 0; i < rand_set.size(); ++i) 59 rs2.add(rand_set[i]); 60 61 62 dlog << LDEBUG << "true mean: " << rs.mean(); 63 dlog << LDEBUG << "true sampled: " << rs2.mean(); 64 double ratio = rs.mean()/rs2.mean(); 65 DLIB_TEST_MSG(0.96 < ratio && ratio < 1.04, " ratio: " << ratio); 66 } 67 68 69 { 70 random_subset_selector<int> r1, r2; 71 r1.set_max_size(300); 72 for (int i = 0; i < 4000; ++i) 73 r1.add(i); 74 75 ostringstream sout; 76 serialize(r1, sout); 77 istringstream sin(sout.str()); 78 deserialize(r2, sin); 79 80 DLIB_TEST(r1.size() == r2.size()); 81 DLIB_TEST(r1.max_size() == r2.max_size()); 82 DLIB_TEST(r1.next_add_accepts() == r2.next_add_accepts()); 83 DLIB_TEST(std::equal(r1.begin(), r1.end(), r2.begin())); 84 85 for (int i = 0; i < 4000; ++i) 86 { 87 r1.add(i); 88 r2.add(i); 89 } 90 91 DLIB_TEST(r1.size() == r2.size()); 92 DLIB_TEST(r1.max_size() == r2.max_size()); 93 DLIB_TEST(r1.next_add_accepts() == r2.next_add_accepts()); 94 DLIB_TEST(std::equal(r1.begin(), r1.end(), r2.begin())); 95 } 96 } 97 test_random_subset_selector2()98 void test_random_subset_selector2 () 99 { 100 random_subset_selector<double> rand_set; 101 DLIB_TEST(rand_set.next_add_accepts() == false); 102 DLIB_TEST(rand_set.size() == 0); 103 DLIB_TEST(rand_set.max_size() == 0); 104 105 for (int j = 0; j < 30; ++j) 106 { 107 print_spinner(); 108 109 running_stats<double> rs, rs2; 110 111 rand_set.set_max_size(1000); 112 DLIB_TEST(rand_set.next_add_accepts() == true); 113 114 for (double i = 0; i < 100000; ++i) 115 { 116 rs.add(i); 117 if (rand_set.next_add_accepts()) 118 rand_set.add(i); 119 else 120 rand_set.add(); 121 } 122 123 DLIB_TEST(rand_set.size() == 1000); 124 DLIB_TEST(rand_set.max_size() == 1000); 125 126 for (unsigned long i = 0; i < rand_set.size(); ++i) 127 rs2.add(rand_set[i]); 128 129 130 dlog << LDEBUG << "true mean: " << rs.mean(); 131 dlog << LDEBUG << "true sampled: " << rs2.mean(); 132 double ratio = rs.mean()/rs2.mean(); 133 DLIB_TEST_MSG(0.96 < ratio && ratio < 1.04, " ratio: " << ratio); 134 } 135 } 136 test_running_cross_covariance()137 void test_running_cross_covariance () 138 { 139 running_cross_covariance<matrix<double> > rcc1, rcc2; 140 141 matrix<double,0,1> xm, ym; 142 const int num = 40; 143 144 dlib::rand rnd; 145 for (int i = 0; i < num; ++i) 146 { 147 matrix<double,0,1> x = randm(4,1,rnd); 148 matrix<double,0,1> y = randm(4,1,rnd); 149 150 xm += x/num; 151 ym += y/num; 152 153 if (i < 15) 154 rcc1.add(x,y); 155 else 156 rcc2.add(x,y); 157 } 158 159 rnd.clear(); 160 matrix<double> cov; 161 for (int i = 0; i < num; ++i) 162 { 163 matrix<double,0,1> x = randm(4,1,rnd); 164 matrix<double,0,1> y = randm(4,1,rnd); 165 cov += (x-xm)*trans(y-ym); 166 } 167 cov /= num-1; 168 169 running_cross_covariance<matrix<double> > rcc = rcc1 + rcc2; 170 DLIB_TEST(max(abs(rcc.covariance_xy()-cov)) < 1e-14); 171 DLIB_TEST(max(abs(rcc.mean_x()-xm)) < 1e-14); 172 DLIB_TEST(max(abs(rcc.mean_y()-ym)) < 1e-14); 173 } 174 dense_to_sparse(const matrix<double,0,1> & x)175 std::map<unsigned long,double> dense_to_sparse ( 176 const matrix<double,0,1>& x 177 ) 178 { 179 std::map<unsigned long,double> temp; 180 for (long i = 0; i < x.size(); ++i) 181 temp[i] = x(i); 182 return temp; 183 } 184 test_running_cross_covariance_sparse()185 void test_running_cross_covariance_sparse() 186 { 187 running_cross_covariance<matrix<double> > rcc1, rcc2; 188 189 running_covariance<matrix<double> > rc1, rc2; 190 191 matrix<double,0,1> xm, ym; 192 const int num = 40; 193 194 rc1.set_dimension(4); 195 rc2.set_dimension(4); 196 197 rcc1.set_dimensions(4,5); 198 rcc2.set_dimensions(4,5); 199 200 dlib::rand rnd; 201 for (int i = 0; i < num; ++i) 202 { 203 matrix<double,0,1> x = randm(4,1,rnd); 204 matrix<double,0,1> y = randm(5,1,rnd); 205 206 xm += x/num; 207 ym += y/num; 208 209 if (i < 15) 210 { 211 rcc1.add(x,dense_to_sparse(y)); 212 rc1.add(x); 213 } 214 else if (i < 30) 215 { 216 rcc2.add(dense_to_sparse(x),y); 217 rc2.add(dense_to_sparse(x)); 218 } 219 else 220 { 221 rcc2.add(dense_to_sparse(x),dense_to_sparse(y)); 222 rc2.add(x); 223 } 224 } 225 226 rnd.clear(); 227 matrix<double> cov, cov2; 228 for (int i = 0; i < num; ++i) 229 { 230 matrix<double,0,1> x = randm(4,1,rnd); 231 matrix<double,0,1> y = randm(5,1,rnd); 232 cov += (x-xm)*trans(y-ym); 233 cov2 += (x-xm)*trans(x-xm); 234 } 235 cov /= num-1; 236 cov2 /= num-1; 237 238 running_cross_covariance<matrix<double> > rcc = rcc1 + rcc2; 239 DLIB_TEST_MSG(max(abs(rcc.covariance_xy()-cov)) < 1e-14, max(abs(rcc.covariance_xy()-cov))); 240 DLIB_TEST(max(abs(rcc.mean_x()-xm)) < 1e-14); 241 DLIB_TEST(max(abs(rcc.mean_y()-ym)) < 1e-14); 242 243 running_covariance<matrix<double> > rc = rc1 + rc2; 244 DLIB_TEST(max(abs(rc.covariance()-cov2)) < 1e-14); 245 DLIB_TEST(max(abs(rc.mean()-xm)) < 1e-14); 246 } 247 test_running_covariance()248 void test_running_covariance ( 249 ) 250 { 251 dlib::rand rnd; 252 std::vector<matrix<double,0,1> > vects; 253 254 running_covariance<matrix<double,0,1> > cov, cov2; 255 DLIB_TEST(cov.in_vector_size() == 0); 256 257 for (unsigned long dims = 1; dims < 5; ++dims) 258 { 259 for (unsigned long samps = 2; samps < 10; ++samps) 260 { 261 vects.clear(); 262 cov.clear(); 263 DLIB_TEST(cov.in_vector_size() == 0); 264 for (unsigned long i = 0; i < samps; ++i) 265 { 266 vects.push_back(randm(dims,1,rnd)); 267 cov.add(vects.back()); 268 269 } 270 DLIB_TEST(cov.in_vector_size() == (long)dims); 271 272 DLIB_TEST(equal(mean(mat(vects)), cov.mean())); 273 DLIB_TEST_MSG(equal(covariance(mat(vects)), cov.covariance()), 274 max(abs(covariance(mat(vects)) - cov.covariance())) 275 << " dims = " << dims << " samps = " << samps 276 ); 277 } 278 } 279 280 for (unsigned long dims = 1; dims < 5; ++dims) 281 { 282 for (unsigned long samps = 2; samps < 10; ++samps) 283 { 284 vects.clear(); 285 cov.clear(); 286 cov2.clear(); 287 DLIB_TEST(cov.in_vector_size() == 0); 288 for (unsigned long i = 0; i < samps; ++i) 289 { 290 vects.push_back(randm(dims,1,rnd)); 291 if ((i%2) == 0) 292 cov.add(vects.back()); 293 else 294 cov2.add(vects.back()); 295 296 } 297 DLIB_TEST((cov+cov2).in_vector_size() == (long)dims); 298 299 DLIB_TEST(equal(mean(mat(vects)), (cov+cov2).mean())); 300 DLIB_TEST_MSG(equal(covariance(mat(vects)), (cov+cov2).covariance()), 301 max(abs(covariance(mat(vects)) - (cov+cov2).covariance())) 302 << " dims = " << dims << " samps = " << samps 303 ); 304 } 305 } 306 307 } 308 test_running_stats()309 void test_running_stats() 310 { 311 print_spinner(); 312 313 running_stats<double> rs, rs2; 314 315 running_scalar_covariance<double> rsc1, rsc2; 316 running_scalar_covariance_decayed<double> rscd1(1000000), rscd2(1000000); 317 318 for (double i = 0; i < 100; ++i) 319 { 320 rs.add(i); 321 322 rsc1.add(i,i); 323 rsc2.add(i,i); 324 rsc2.add(i,-i); 325 326 rscd1.add(i,i); 327 rscd2.add(i,i); 328 rscd2.add(i,-i); 329 } 330 331 // make sure the running_stats and running_scalar_covariance agree 332 DLIB_TEST_MSG(std::abs(rs.mean() - rsc1.mean_x()) < 1e-10, std::abs(rs.mean() - rsc1.mean_x())); 333 DLIB_TEST(std::abs(rs.mean() - rsc1.mean_y()) < 1e-10); 334 DLIB_TEST(std::abs(rs.stddev() - rsc1.stddev_x()) < 1e-10); 335 DLIB_TEST(std::abs(rs.stddev() - rsc1.stddev_y()) < 1e-10); 336 DLIB_TEST(std::abs(rs.variance() - rsc1.variance_x()) < 1e-10); 337 DLIB_TEST(std::abs(rs.variance() - rsc1.variance_y()) < 1e-10); 338 DLIB_TEST(rs.current_n() == rsc1.current_n()); 339 340 DLIB_TEST(std::abs(rsc1.correlation() - 1) < 1e-10); 341 DLIB_TEST(std::abs(rsc2.correlation() - 0) < 1e-10); 342 343 344 DLIB_TEST_MSG(std::abs(rs.mean() - rscd1.mean_x()) < 1e-2, std::abs(rs.mean() - rscd1.mean_x()) << " " << rscd1.mean_x()); 345 DLIB_TEST(std::abs(rs.mean() - rscd1.mean_y()) < 1e-2); 346 DLIB_TEST_MSG(std::abs(rs.stddev() - rscd1.stddev_x()) < 1e-2, std::abs(rs.stddev() - rscd1.stddev_x())); 347 DLIB_TEST(std::abs(rs.stddev() - rscd1.stddev_y()) < 1e-2); 348 DLIB_TEST_MSG(std::abs(rs.variance() - rscd1.variance_x()) < 1e-2, std::abs(rs.variance() - rscd1.variance_x())); 349 DLIB_TEST(std::abs(rs.variance() - rscd1.variance_y()) < 1e-2); 350 DLIB_TEST(std::abs(rscd1.correlation() - 1) < 1e-2); 351 DLIB_TEST(std::abs(rscd2.correlation() - 0) < 1e-2); 352 353 354 355 // test serialization of running_stats 356 ostringstream sout; 357 serialize(rs, sout); 358 istringstream sin(sout.str()); 359 deserialize(rs2, sin); 360 // make sure the running_stats and running_scalar_covariance agree 361 DLIB_TEST_MSG(std::abs(rs2.mean() - rsc1.mean_x()) < 1e-10, std::abs(rs2.mean() - rsc1.mean_x())); 362 DLIB_TEST(std::abs(rs2.mean() - rsc1.mean_y()) < 1e-10); 363 DLIB_TEST(std::abs(rs2.stddev() - rsc1.stddev_x()) < 1e-10); 364 DLIB_TEST(std::abs(rs2.stddev() - rsc1.stddev_y()) < 1e-10); 365 DLIB_TEST(std::abs(rs2.variance() - rsc1.variance_x()) < 1e-10); 366 DLIB_TEST(std::abs(rs2.variance() - rsc1.variance_y()) < 1e-10); 367 DLIB_TEST(rs2.current_n() == rsc1.current_n()); 368 369 rsc1.clear(); 370 rsc1.add(1, -1); 371 rsc1.add(0, 0); 372 rsc1.add(1, -1); 373 rsc1.add(0, 0); 374 rsc1.add(1, -1); 375 rsc1.add(0, 0); 376 377 DLIB_TEST(std::abs(rsc1.covariance() - -0.3) < 1e-10); 378 } 379 test_skewness_and_kurtosis_1()380 void test_skewness_and_kurtosis_1() 381 { 382 383 dlib::rand rnum; 384 running_stats<double> rs1; 385 386 double tp = 0; 387 388 rnum.set_seed("DlibRocks"); 389 390 for(int i = 0; i< 1000000; i++) 391 { 392 tp = rnum.get_random_gaussian(); 393 rs1.add(tp); 394 } 395 396 // check the unbiased skewness and excess kurtosis of one million Gaussian 397 // draws are both near_vects zero. 398 DLIB_TEST(abs(rs1.skewness()) < 0.1); 399 DLIB_TEST(abs(rs1.ex_kurtosis()) < 0.1); 400 } 401 test_skewness_and_kurtosis_2()402 void test_skewness_and_kurtosis_2() 403 { 404 405 string str = "DlibRocks"; 406 407 for(int j = 0; j<5 ; j++) 408 { 409 matrix<double,1,100000> dat; 410 dlib::rand rnum; 411 running_stats<double> rs1; 412 413 double tp = 0; 414 double n = 100000; 415 double xb = 0; 416 417 double sknum = 0; 418 double skdenom = 0; 419 double unbi_skew = 0; 420 421 double exkurnum = 0; 422 double exkurdenom = 0; 423 double unbi_exkur = 0; 424 425 random_shuffle(str.begin(), str.end()); 426 rnum.set_seed(str); 427 428 for(int i = 0; i<n; i++) 429 { 430 tp = rnum.get_random_gaussian(); 431 rs1.add(tp); 432 dat(i)=tp; 433 xb += dat(i); 434 } 435 436 xb = xb/n; 437 438 for(int i = 0; i < n; i++ ) 439 { 440 sknum += pow(dat(i) - xb,3); 441 skdenom += pow(dat(i) - xb,2); 442 exkurnum += pow(dat(i) - xb,4); 443 exkurdenom += pow(dat(i)-xb,2); 444 } 445 446 sknum = sknum/n; 447 skdenom = pow(skdenom/n,1.5); 448 exkurnum = exkurnum/n; 449 exkurdenom = pow(exkurdenom/n,2); 450 451 unbi_skew = sqrt(n*(n-1))/(n-2)*sknum/skdenom; 452 unbi_exkur = (n-1)*((n+1)*(exkurnum/exkurdenom-3)+6)/((n-2)*(n-3)); 453 454 dlog << LINFO << "Skew Diff: " << unbi_skew - rs1.skewness(); 455 dlog << LINFO << "Kur Diff: " << unbi_exkur - rs1.ex_kurtosis(); 456 457 // Test an alternative implementation of the unbiased skewness and excess 458 // kurtosis against the one in running_stats. 459 DLIB_TEST(abs(unbi_skew - rs1.skewness()) < 1e-10); 460 DLIB_TEST(abs(unbi_exkur - rs1.ex_kurtosis()) < 1e-10); 461 } 462 } 463 test_randomize_samples()464 void test_randomize_samples() 465 { 466 std::vector<unsigned int> t(15),u(15),v(15); 467 468 for (unsigned long i = 0; i < t.size(); ++i) 469 { 470 t[i] = i; 471 u[i] = i+1; 472 v[i] = i+2; 473 } 474 randomize_samples(t,u,v); 475 476 DLIB_TEST(t.size() == 15); 477 DLIB_TEST(u.size() == 15); 478 DLIB_TEST(v.size() == 15); 479 480 for (unsigned long i = 0; i < t.size(); ++i) 481 { 482 const unsigned long val = t[i]; 483 DLIB_TEST(u[i] == val+1); 484 DLIB_TEST(v[i] == val+2); 485 } 486 } test_randomize_samples2()487 void test_randomize_samples2() 488 { 489 dlib::matrix<int,15,1> t(15),u(15),v(15); 490 491 for (long i = 0; i < t.size(); ++i) 492 { 493 t(i) = i; 494 u(i) = i+1; 495 v(i) = i+2; 496 } 497 randomize_samples(t,u,v); 498 499 DLIB_TEST(t.size() == 15); 500 DLIB_TEST(u.size() == 15); 501 DLIB_TEST(v.size() == 15); 502 503 for (long i = 0; i < t.size(); ++i) 504 { 505 const long val = t(i); 506 DLIB_TEST(u(i) == val+1); 507 DLIB_TEST(v(i) == val+2); 508 } 509 } 510 another_test()511 void another_test() 512 { 513 std::vector<double> a; 514 515 running_stats<double> rs1, rs2; 516 517 for (int i = 0; i < 10; ++i) 518 { 519 rs1.add(i); 520 a.push_back(i); 521 } 522 523 DLIB_TEST(std::abs(variance(mat(a)) - rs1.variance()) < 1e-13); 524 DLIB_TEST(std::abs(stddev(mat(a)) - rs1.stddev()) < 1e-13); 525 DLIB_TEST(std::abs(mean(mat(a)) - rs1.mean()) < 1e-13); 526 527 for (int i = 10; i < 20; ++i) 528 { 529 rs2.add(i); 530 a.push_back(i); 531 } 532 533 DLIB_TEST(std::abs(variance(mat(a)) - (rs1+rs2).variance()) < 1e-13); 534 DLIB_TEST(std::abs(mean(mat(a)) - (rs1+rs2).mean()) < 1e-13); 535 DLIB_TEST((rs1+rs2).current_n() == 20); 536 537 running_scalar_covariance<double> rc1, rc2, rc3; 538 dlib::rand rnd; 539 for (double i = 0; i < 10; ++i) 540 { 541 const double a = i + rnd.get_random_gaussian(); 542 const double b = i + rnd.get_random_gaussian(); 543 rc1.add(a,b); 544 rc3.add(a,b); 545 } 546 for (double i = 11; i < 20; ++i) 547 { 548 const double a = i + rnd.get_random_gaussian(); 549 const double b = i + rnd.get_random_gaussian(); 550 rc2.add(a,b); 551 rc3.add(a,b); 552 } 553 554 DLIB_TEST(std::abs((rc1+rc2).mean_x() - rc3.mean_x()) < 1e-13); 555 DLIB_TEST(std::abs((rc1+rc2).mean_y() - rc3.mean_y()) < 1e-13); 556 DLIB_TEST_MSG(std::abs((rc1+rc2).variance_x() - rc3.variance_x()) < 1e-13, std::abs((rc1+rc2).variance_x() - rc3.variance_x())); 557 DLIB_TEST(std::abs((rc1+rc2).variance_y() - rc3.variance_y()) < 1e-13); 558 DLIB_TEST(std::abs((rc1+rc2).covariance() - rc3.covariance()) < 1e-13); 559 DLIB_TEST((rc1+rc2).current_n() == rc3.current_n()); 560 561 } 562 test_average_precision()563 void test_average_precision() 564 { 565 std::vector<bool> items; 566 DLIB_TEST(average_precision(items) == 1); 567 DLIB_TEST(average_precision(items,1) == 0); 568 569 items.push_back(true); 570 DLIB_TEST(average_precision(items) == 1); 571 DLIB_TEST(std::abs(average_precision(items,1) - 0.5) < 1e-14); 572 573 items.push_back(true); 574 DLIB_TEST(average_precision(items) == 1); 575 DLIB_TEST(std::abs(average_precision(items,1) - 2.0/3.0) < 1e-14); 576 577 items.push_back(false); 578 579 DLIB_TEST(average_precision(items) == 1); 580 DLIB_TEST(std::abs(average_precision(items,1) - 2.0/3.0) < 1e-14); 581 582 items.push_back(true); 583 584 DLIB_TEST(std::abs(average_precision(items) - (2.0+3.0/4.0)/3.0) < 1e-14); 585 586 items.push_back(true); 587 588 DLIB_TEST(std::abs(average_precision(items) - (2.0 + 4.0/5.0 + 4.0/5.0)/4.0) < 1e-14); 589 DLIB_TEST(std::abs(average_precision(items,1) - (2.0 + 4.0/5.0 + 4.0/5.0)/5.0) < 1e-14); 590 } 591 592 593 template <typename sample_type> check_distance_metrics(const std::vector<frobmetric_training_sample<sample_type>> & samples)594 void check_distance_metrics ( 595 const std::vector<frobmetric_training_sample<sample_type> >& samples 596 ) 597 { 598 running_stats<double> rs; 599 for (unsigned long i = 0; i < samples.size(); ++i) 600 { 601 for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) 602 { 603 const double d1 = length_squared(samples[i].anchor_vect - samples[i].near_vects[j]); 604 for (unsigned long k = 0; k < samples[i].far_vects.size(); ++k) 605 { 606 const double d2 = length_squared(samples[i].anchor_vect - samples[i].far_vects[k]); 607 rs.add(d2-d1); 608 } 609 } 610 } 611 612 dlog << LINFO << "dist gap max: "<< rs.max(); 613 dlog << LINFO << "dist gap min: "<< rs.min(); 614 dlog << LINFO << "dist gap mean: "<< rs.mean(); 615 dlog << LINFO << "dist gap stddev: "<< rs.stddev(); 616 DLIB_TEST(rs.min() >= 0.99); 617 DLIB_TEST(rs.mean() >= 0.9999); 618 } 619 test_vector_normalizer_frobmetric(dlib::rand & rnd)620 void test_vector_normalizer_frobmetric(dlib::rand& rnd) 621 { 622 print_spinner(); 623 typedef matrix<double,0,1> sample_type; 624 vector_normalizer_frobmetric<sample_type> normalizer; 625 626 std::vector<frobmetric_training_sample<sample_type> > samples; 627 frobmetric_training_sample<sample_type> samp; 628 629 const long key = 1; 630 const long dims = 5; 631 // Lets make some two class training data. Each sample will have dims dimensions but 632 // only the one with index equal to key will be meaningful. In particular, if the key 633 // dimension is > 0 then the sample is class +1 and -1 otherwise. 634 635 long k = 0; 636 for (int i = 0; i < 50; ++i) 637 { 638 samp.clear(); 639 samp.anchor_vect = gaussian_randm(dims,1,k++); 640 if (samp.anchor_vect(key) > 0) 641 samp.anchor_vect(key) = rnd.get_random_double() + 5; 642 else 643 samp.anchor_vect(key) = -(rnd.get_random_double() + 5); 644 645 matrix<double,0,1> temp; 646 647 for (int j = 0; j < 5; ++j) 648 { 649 // Don't always put an equal number of near_vects and far_vects vectors into the 650 // training samples. 651 const int numa = rnd.get_random_32bit_number()%2 + 1; 652 const int numb = rnd.get_random_32bit_number()%2 + 1; 653 654 for (int num = 0; num < numa; ++num) 655 { 656 temp = gaussian_randm(dims,1,k++); temp(key) = 0.1; 657 //temp = gaussian_randm(dims,1,k++); temp(key) = std::abs(temp(key)); 658 if (samp.anchor_vect(key) > 0) samp.near_vects.push_back(temp); 659 else samp.far_vects.push_back(temp); 660 } 661 662 for (int num = 0; num < numb; ++num) 663 { 664 temp = gaussian_randm(dims,1,k++); temp(key) = -0.1; 665 //temp = gaussian_randm(dims,1,k++); temp(key) = -std::abs(temp(key)); 666 if (samp.anchor_vect(key) < 0) samp.near_vects.push_back(temp); 667 else samp.far_vects.push_back(temp); 668 } 669 } 670 samples.push_back(samp); 671 } 672 673 normalizer.set_epsilon(0.0001); 674 normalizer.set_c(100); 675 normalizer.set_max_iterations(6000); 676 normalizer.train(samples); 677 678 dlog << LINFO << "learned transform: \n" << normalizer.transform(); 679 680 matrix<double,0,1> total; 681 682 for (unsigned long i = 0; i < samples.size(); ++i) 683 { 684 samples[i].anchor_vect = normalizer(samples[i].anchor_vect); 685 total += samples[i].anchor_vect; 686 for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) 687 samples[i].near_vects[j] = normalizer(samples[i].near_vects[j]); 688 for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j) 689 samples[i].far_vects[j] = normalizer(samples[i].far_vects[j]); 690 } 691 total /= samples.size(); 692 dlog << LINFO << "sample transformed means: "<< trans(total); 693 DLIB_TEST(length(total) < 1e-9); 694 check_distance_metrics(samples); 695 696 // make sure serialization works 697 stringstream os; 698 serialize(normalizer, os); 699 vector_normalizer_frobmetric<sample_type> normalizer2; 700 deserialize(normalizer2, os); 701 DLIB_TEST(equal(normalizer.transform(), normalizer2.transform())); 702 DLIB_TEST(equal(normalizer.transformed_means(), normalizer2.transformed_means())); 703 DLIB_TEST(normalizer.in_vector_size() == normalizer2.in_vector_size()); 704 DLIB_TEST(normalizer.out_vector_size() == normalizer2.out_vector_size()); 705 DLIB_TEST(normalizer.get_max_iterations() == normalizer2.get_max_iterations()); 706 DLIB_TEST(std::abs(normalizer.get_c() - normalizer2.get_c()) < 1e-14); 707 DLIB_TEST(std::abs(normalizer.get_epsilon() - normalizer2.get_epsilon()) < 1e-14); 708 709 } 710 prior_frobnorm_test()711 void prior_frobnorm_test() 712 { 713 frobmetric_training_sample<matrix<double,0,1> > sample; 714 std::vector<frobmetric_training_sample<matrix<double,0,1> > > samples; 715 716 matrix<double,3,1> x, near_, far_; 717 x = 0,0,0; 718 near_ = 1,0,0; 719 far_ = 0,1,0; 720 721 sample.anchor_vect = x; 722 sample.near_vects.push_back(near_); 723 sample.far_vects.push_back(far_); 724 725 samples.push_back(sample); 726 727 vector_normalizer_frobmetric<matrix<double,0,1> > trainer; 728 trainer.set_c(100); 729 print_spinner(); 730 trainer.train(samples); 731 732 matrix<double,3,3> correct; 733 correct = 0, 0, 0, 734 0, 1, 0, 735 0, 0, 0; 736 737 dlog << LDEBUG << trainer.transform(); 738 DLIB_TEST(max(abs(trainer.transform()-correct)) < 1e-8); 739 740 trainer.set_uses_identity_matrix_prior(true); 741 print_spinner(); 742 trainer.train(samples); 743 correct = 1, 0, 0, 744 0, 2, 0, 745 0, 0, 1; 746 747 dlog << LDEBUG << trainer.transform(); 748 DLIB_TEST(max(abs(trainer.transform()-correct)) < 1e-8); 749 750 } 751 test_lda()752 void test_lda () 753 { 754 // This test makes sure we pick the right direction in a simple 2D -> 1D LDA 755 typedef matrix<double,2,1> sample_type; 756 757 std::vector<unsigned long> labels; 758 std::vector<sample_type> samples; 759 for (int i=0; i<4; i++) 760 { 761 sample_type s; 762 s(0) = i; 763 s(1) = i+1; 764 samples.push_back(s); 765 labels.push_back(1); 766 767 sample_type s1; 768 s1(0) = i+1; 769 s1(1) = i; 770 samples.push_back(s1); 771 labels.push_back(2); 772 } 773 774 matrix<double> X; 775 X.set_size(8,2); 776 for (int i=0; i<8; i++){ 777 X(i,0) = samples[i](0); 778 X(i,1) = samples[i](1); 779 } 780 781 matrix<double,0,1> mean; 782 783 dlib::compute_lda_transform(X,mean,labels,1); 784 785 std::vector<double> vals1, vals2; 786 for (unsigned long i = 0; i < samples.size(); ++i) 787 { 788 double val = X*samples[i]-mean; 789 if (i%2 == 0) 790 vals1.push_back(val); 791 else 792 vals2.push_back(val); 793 dlog << LINFO << "1D LDA output: " << val; 794 } 795 796 if (vals1[0] > vals2[0]) 797 swap(vals1, vals2); 798 799 const double err = equal_error_rate(vals1, vals2).first; 800 dlog << LINFO << "LDA ERR: " << err; 801 DLIB_TEST(err == 0); 802 DLIB_TEST(equal_error_rate(vals2, vals1).first == 1); 803 } 804 test_equal_error_rate()805 void test_equal_error_rate() 806 { 807 auto result = equal_error_rate({}, {}); 808 DLIB_TEST(result.first == 0); 809 DLIB_TEST(result.second == 0); 810 811 // no error case 812 result = equal_error_rate({1,1,1}, {2,2,2}); 813 DLIB_TEST_MSG(result.first == 0, result.first); 814 DLIB_TEST_MSG(result.second == 2, result.second); 815 816 // max error case 817 result = equal_error_rate({2,2,2}, {1,1,1}); 818 DLIB_TEST_MSG(result.first == 1, result.first); 819 DLIB_TEST_MSG(result.second == 2, result.second); 820 // Another way to have max error 821 result = equal_error_rate({1,1,1}, {1,1,1}); 822 DLIB_TEST_MSG(result.second == 1, result.second); 823 DLIB_TEST_MSG(result.first == 1, result.first); 824 825 // wildly unbalanced 826 result = equal_error_rate({}, {1,1,1}); 827 DLIB_TEST_MSG(result.first == 0, result.first); 828 829 // wildly unbalanced 830 result = equal_error_rate({1,1,1}, {}); 831 DLIB_TEST_MSG(result.first == 0, result.first); 832 833 // 25% error case 834 result = equal_error_rate({1,1,1,3}, {2, 2, 0, 2}); 835 DLIB_TEST_MSG(result.first == 0.25, result.first); 836 DLIB_TEST_MSG(result.second == 2, result.second); 837 } 838 test_running_stats_decayed()839 void test_running_stats_decayed() 840 { 841 print_spinner(); 842 std::vector<double> tmp(300); 843 std::vector<double> tmp_var(tmp.size()); 844 dlib::rand rnd; 845 const int num_rounds = 100000; 846 for (int rounds = 0; rounds < num_rounds; ++rounds) 847 { 848 running_stats_decayed<double> rs(100); 849 850 for (size_t i = 0; i < tmp.size(); ++i) 851 { 852 rs.add(rnd.get_random_gaussian() + 1); 853 tmp[i] += rs.mean(); 854 if (i > 0) 855 tmp_var[i] += rs.variance(); 856 } 857 } 858 859 // should print all 1s basically since the mean and variance should always be 1. 860 for (size_t i = 0; i < tmp.size(); ++i) 861 { 862 DLIB_TEST(std::abs(1-tmp[i]/num_rounds) < 0.001); 863 if (i > 1) 864 DLIB_TEST(std::abs(1-tmp_var[i]/num_rounds) < 0.01); 865 } 866 } 867 test_running_scalar_covariance_decayed()868 void test_running_scalar_covariance_decayed() 869 { 870 print_spinner(); 871 std::vector<double> tmp(300); 872 std::vector<double> tmp_var(tmp.size()); 873 std::vector<double> tmp_covar(tmp.size()); 874 dlib::rand rnd; 875 const int num_rounds = 500000; 876 for (int rounds = 0; rounds < num_rounds; ++rounds) 877 { 878 running_scalar_covariance_decayed<double> rs(100); 879 880 for (size_t i = 0; i < tmp.size(); ++i) 881 { 882 rs.add(rnd.get_random_gaussian() + 1, rnd.get_random_gaussian() + 1); 883 tmp[i] += (rs.mean_y()+rs.mean_x())/2; 884 if (i > 0) 885 { 886 tmp_var[i] += (rs.variance_y()+rs.variance_x())/2; 887 tmp_covar[i] += rs.covariance(); 888 } 889 } 890 } 891 892 // should print all 1s basically since the mean and variance should always be 1. 893 for (size_t i = 0; i < tmp.size(); ++i) 894 { 895 DLIB_TEST(std::abs(1-tmp[i]/num_rounds) < 0.001); 896 if (i > 1) 897 { 898 DLIB_TEST(std::abs(1-tmp_var[i]/num_rounds) < 0.01); 899 DLIB_TEST(std::abs(tmp_covar[i]/num_rounds) < 0.001); 900 } 901 } 902 } 903 test_probability_values_are_increasing()904 void test_probability_values_are_increasing() { 905 DLIB_TEST(probability_values_are_increasing(std::vector<double>{1,2,3,4,5,6,7,8}) > 0.99); 906 DLIB_TEST(probability_values_are_increasing(std::vector<double>{8,7,6,5,4,4,3,2}) < 0.01); 907 DLIB_TEST(probability_values_are_increasing_robust(std::vector<double>{1,2,3,4,5,6,7,8}) > 0.99); 908 DLIB_TEST(probability_values_are_increasing_robust(std::vector<double>{8,7,6,5,4,4,3,2}) < 0.01); 909 DLIB_TEST(probability_values_are_increasing(std::vector<double>{1,2,1e10,3,4,5,6,7,8}) < 0.3); 910 DLIB_TEST(probability_values_are_increasing_robust(std::vector<double>{1,2,1e100,3,4,5,6,7,8}) > 0.99); 911 } 912 test_event_corr()913 void test_event_corr() 914 { 915 print_spinner(); 916 DLIB_TEST(event_correlation(1000,1000,500,2000) == 0); 917 DLIB_TEST(std::abs(event_correlation(1000,1000,300,2000) + 164.565757010104) < 1e-11); 918 DLIB_TEST(std::abs(event_correlation(1000,1000,700,2000) - 164.565757010104) < 1e-11); 919 920 DLIB_TEST(event_correlation(10,1000,5,2000) == 0); 921 DLIB_TEST(event_correlation(1000,10,5,2000) == 0); 922 DLIB_TEST(std::abs(event_correlation(10,1000,1,2000) - event_correlation(1000,10,1,2000)) < 1e-11); 923 DLIB_TEST(std::abs(event_correlation(10,1000,9,2000) - event_correlation(1000,10,9,2000)) < 1e-11); 924 925 DLIB_TEST(std::abs(event_correlation(10,1000,1,2000) + 3.69672251700842) < 1e-11); 926 DLIB_TEST(std::abs(event_correlation(10,1000,9,2000) - 3.69672251700842) < 1e-11); 927 } 928 perform_test()929 void perform_test ( 930 ) 931 { 932 prior_frobnorm_test(); 933 dlib::rand rnd; 934 for (int i = 0; i < 5; ++i) 935 test_vector_normalizer_frobmetric(rnd); 936 937 test_random_subset_selector(); 938 test_random_subset_selector2(); 939 test_running_covariance(); 940 test_running_cross_covariance(); 941 test_running_cross_covariance_sparse(); 942 test_running_stats(); 943 test_skewness_and_kurtosis_1(); 944 test_skewness_and_kurtosis_2(); 945 test_randomize_samples(); 946 test_randomize_samples2(); 947 another_test(); 948 test_average_precision(); 949 test_lda(); 950 test_event_corr(); 951 test_running_stats_decayed(); 952 test_running_scalar_covariance_decayed(); 953 test_equal_error_rate(); 954 test_probability_values_are_increasing(); 955 } 956 } a; 957 958 } 959 960 961