1 // Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_STATISTICs_ 4 #define DLIB_STATISTICs_ 5 6 #include "statistics_abstract.h" 7 #include <limits> 8 #include <cmath> 9 #include "../algs.h" 10 #include "../matrix.h" 11 #include "../sparse_vector.h" 12 13 namespace dlib 14 { 15 16 // ---------------------------------------------------------------------------------------- 17 18 template < 19 typename T 20 > 21 class running_stats 22 { 23 public: 24 running_stats()25 running_stats() 26 { 27 clear(); 28 29 COMPILE_TIME_ASSERT (( 30 is_same_type<float,T>::value || 31 is_same_type<double,T>::value || 32 is_same_type<long double,T>::value 33 )); 34 } 35 clear()36 void clear() 37 { 38 sum = 0; 39 sum_sqr = 0; 40 sum_cub = 0; 41 sum_four = 0; 42 43 n = 0; 44 min_value = std::numeric_limits<T>::infinity(); 45 max_value = -std::numeric_limits<T>::infinity(); 46 } 47 add(const T & val)48 void add ( 49 const T& val 50 ) 51 { 52 sum += val; 53 sum_sqr += val*val; 54 sum_cub += cubed(val); 55 sum_four += quaded(val); 56 57 if (val < min_value) 58 min_value = val; 59 if (val > max_value) 60 max_value = val; 61 62 ++n; 63 } 64 current_n()65 T current_n ( 66 ) const 67 { 68 return n; 69 } 70 mean()71 T mean ( 72 ) const 73 { 74 if (n != 0) 75 return sum/n; 76 else 77 return 0; 78 } 79 max()80 T max ( 81 ) const 82 { 83 // make sure requires clause is not broken 84 DLIB_ASSERT(current_n() > 0, 85 "\tT running_stats::max" 86 << "\n\tyou have to add some numbers to this object first" 87 << "\n\tthis: " << this 88 ); 89 90 return max_value; 91 } 92 min()93 T min ( 94 ) const 95 { 96 // make sure requires clause is not broken 97 DLIB_ASSERT(current_n() > 0, 98 "\tT running_stats::min" 99 << "\n\tyou have to add some numbers to this object first" 100 << "\n\tthis: " << this 101 ); 102 103 return min_value; 104 } 105 variance()106 T variance ( 107 ) const 108 { 109 // make sure requires clause is not broken 110 DLIB_ASSERT(current_n() > 1, 111 "\tT running_stats::variance" 112 << "\n\tyou have to add some numbers to this object first" 113 << "\n\tthis: " << this 114 ); 115 116 T temp = 1/(n-1); 117 temp = temp*(sum_sqr - sum*sum/n); 118 // make sure the variance is never negative. This might 119 // happen due to numerical errors. 120 if (temp >= 0) 121 return temp; 122 else 123 return 0; 124 } 125 stddev()126 T stddev ( 127 ) const 128 { 129 // make sure requires clause is not broken 130 DLIB_ASSERT(current_n() > 1, 131 "\tT running_stats::stddev" 132 << "\n\tyou have to add some numbers to this object first" 133 << "\n\tthis: " << this 134 ); 135 136 return std::sqrt(variance()); 137 } 138 skewness()139 T skewness ( 140 ) const 141 { 142 // make sure requires clause is not broken 143 DLIB_ASSERT(current_n() > 2, 144 "\tT running_stats::skewness" 145 << "\n\tyou have to add some numbers to this object first" 146 << "\n\tthis: " << this 147 ); 148 149 T temp = 1/n; 150 T temp1 = std::sqrt(n*(n-1))/(n-2); 151 temp = temp1*temp*(sum_cub - 3*sum_sqr*sum*temp + 2*cubed(sum)*temp*temp)/ 152 (std::sqrt(std::pow(temp*(sum_sqr-sum*sum*temp),3))); 153 154 return temp; 155 } 156 ex_kurtosis()157 T ex_kurtosis ( 158 ) const 159 { 160 // make sure requires clause is not broken 161 DLIB_ASSERT(current_n() > 3, 162 "\tT running_stats::kurtosis" 163 << "\n\tyou have to add some numbers to this object first" 164 << "\n\tthis: " << this 165 ); 166 167 T temp = 1/n; 168 T m4 = temp*(sum_four - 4*sum_cub*sum*temp+6*sum_sqr*sum*sum*temp*temp 169 -3*quaded(sum)*cubed(temp)); 170 T m2 = temp*(sum_sqr-sum*sum*temp); 171 temp = (n-1)*((n+1)*m4/(m2*m2)-3*(n-1))/((n-2)*(n-3)); 172 173 return temp; 174 } 175 scale(const T & val)176 T scale ( 177 const T& val 178 ) const 179 { 180 // make sure requires clause is not broken 181 DLIB_ASSERT(current_n() > 1, 182 "\tT running_stats::variance" 183 << "\n\tyou have to add some numbers to this object first" 184 << "\n\tthis: " << this 185 ); 186 return (val-mean())/std::sqrt(variance()); 187 } 188 189 running_stats operator+ ( 190 const running_stats& rhs 191 ) const 192 { 193 running_stats temp(*this); 194 195 temp.sum += rhs.sum; 196 temp.sum_sqr += rhs.sum_sqr; 197 temp.sum_cub += rhs.sum_cub; 198 temp.sum_four += rhs.sum_four; 199 temp.n += rhs.n; 200 temp.min_value = std::min(rhs.min_value, min_value); 201 temp.max_value = std::max(rhs.max_value, max_value); 202 return temp; 203 } 204 205 template <typename U> 206 friend void serialize ( 207 const running_stats<U>& item, 208 std::ostream& out 209 ); 210 211 template <typename U> 212 friend void deserialize ( 213 running_stats<U>& item, 214 std::istream& in 215 ); 216 217 private: 218 T sum; 219 T sum_sqr; 220 T sum_cub; 221 T sum_four; 222 T n; 223 T min_value; 224 T max_value; 225 cubed(const T & val)226 T cubed (const T& val) const {return val*val*val; } quaded(const T & val)227 T quaded (const T& val) const {return val*val*val*val; } 228 }; 229 230 template <typename T> serialize(const running_stats<T> & item,std::ostream & out)231 void serialize ( 232 const running_stats<T>& item, 233 std::ostream& out 234 ) 235 { 236 int version = 2; 237 serialize(version, out); 238 239 serialize(item.sum, out); 240 serialize(item.sum_sqr, out); 241 serialize(item.sum_cub, out); 242 serialize(item.sum_four, out); 243 serialize(item.n, out); 244 serialize(item.min_value, out); 245 serialize(item.max_value, out); 246 } 247 248 template <typename T> deserialize(running_stats<T> & item,std::istream & in)249 void deserialize ( 250 running_stats<T>& item, 251 std::istream& in 252 ) 253 { 254 int version = 0; 255 deserialize(version, in); 256 if (version != 2) 257 throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats object."); 258 259 deserialize(item.sum, in); 260 deserialize(item.sum_sqr, in); 261 deserialize(item.sum_cub, in); 262 deserialize(item.sum_four, in); 263 deserialize(item.n, in); 264 deserialize(item.min_value, in); 265 deserialize(item.max_value, in); 266 } 267 268 // ---------------------------------------------------------------------------------------- 269 270 template < 271 typename T 272 > 273 class running_scalar_covariance 274 { 275 public: 276 running_scalar_covariance()277 running_scalar_covariance() 278 { 279 clear(); 280 281 COMPILE_TIME_ASSERT (( 282 is_same_type<float,T>::value || 283 is_same_type<double,T>::value || 284 is_same_type<long double,T>::value 285 )); 286 } 287 clear()288 void clear() 289 { 290 sum_xy = 0; 291 sum_x = 0; 292 sum_y = 0; 293 sum_xx = 0; 294 sum_yy = 0; 295 n = 0; 296 } 297 add(const T & x,const T & y)298 void add ( 299 const T& x, 300 const T& y 301 ) 302 { 303 sum_xy += x*y; 304 305 sum_xx += x*x; 306 sum_yy += y*y; 307 308 sum_x += x; 309 sum_y += y; 310 311 n += 1; 312 } 313 current_n()314 T current_n ( 315 ) const 316 { 317 return n; 318 } 319 mean_x()320 T mean_x ( 321 ) const 322 { 323 if (n != 0) 324 return sum_x/n; 325 else 326 return 0; 327 } 328 mean_y()329 T mean_y ( 330 ) const 331 { 332 if (n != 0) 333 return sum_y/n; 334 else 335 return 0; 336 } 337 covariance()338 T covariance ( 339 ) const 340 { 341 // make sure requires clause is not broken 342 DLIB_ASSERT(current_n() > 1, 343 "\tT running_scalar_covariance::covariance()" 344 << "\n\tyou have to add some numbers to this object first" 345 << "\n\tthis: " << this 346 ); 347 348 return 1/(n-1) * (sum_xy - sum_y*sum_x/n); 349 } 350 correlation()351 T correlation ( 352 ) const 353 { 354 // make sure requires clause is not broken 355 DLIB_ASSERT(current_n() > 1, 356 "\tT running_scalar_covariance::correlation()" 357 << "\n\tyou have to add some numbers to this object first" 358 << "\n\tthis: " << this 359 ); 360 361 return covariance() / std::sqrt(variance_x()*variance_y()); 362 } 363 variance_x()364 T variance_x ( 365 ) const 366 { 367 // make sure requires clause is not broken 368 DLIB_ASSERT(current_n() > 1, 369 "\tT running_scalar_covariance::variance_x()" 370 << "\n\tyou have to add some numbers to this object first" 371 << "\n\tthis: " << this 372 ); 373 374 T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); 375 // make sure the variance is never negative. This might 376 // happen due to numerical errors. 377 if (temp >= 0) 378 return temp; 379 else 380 return 0; 381 } 382 variance_y()383 T variance_y ( 384 ) const 385 { 386 // make sure requires clause is not broken 387 DLIB_ASSERT(current_n() > 1, 388 "\tT running_scalar_covariance::variance_y()" 389 << "\n\tyou have to add some numbers to this object first" 390 << "\n\tthis: " << this 391 ); 392 393 T temp = 1/(n-1) * (sum_yy - sum_y*sum_y/n); 394 // make sure the variance is never negative. This might 395 // happen due to numerical errors. 396 if (temp >= 0) 397 return temp; 398 else 399 return 0; 400 } 401 stddev_x()402 T stddev_x ( 403 ) const 404 { 405 // make sure requires clause is not broken 406 DLIB_ASSERT(current_n() > 1, 407 "\tT running_scalar_covariance::stddev_x()" 408 << "\n\tyou have to add some numbers to this object first" 409 << "\n\tthis: " << this 410 ); 411 412 return std::sqrt(variance_x()); 413 } 414 stddev_y()415 T stddev_y ( 416 ) const 417 { 418 // make sure requires clause is not broken 419 DLIB_ASSERT(current_n() > 1, 420 "\tT running_scalar_covariance::stddev_y()" 421 << "\n\tyou have to add some numbers to this object first" 422 << "\n\tthis: " << this 423 ); 424 425 return std::sqrt(variance_y()); 426 } 427 428 running_scalar_covariance operator+ ( 429 const running_scalar_covariance& rhs 430 ) const 431 { 432 running_scalar_covariance temp(rhs); 433 434 temp.sum_xy += sum_xy; 435 temp.sum_x += sum_x; 436 temp.sum_y += sum_y; 437 temp.sum_xx += sum_xx; 438 temp.sum_yy += sum_yy; 439 temp.n += n; 440 return temp; 441 } 442 443 private: 444 445 T sum_xy; 446 T sum_x; 447 T sum_y; 448 T sum_xx; 449 T sum_yy; 450 T n; 451 }; 452 453 // ---------------------------------------------------------------------------------------- 454 455 template < 456 typename T 457 > 458 class running_scalar_covariance_decayed 459 { 460 public: 461 462 explicit running_scalar_covariance_decayed( 463 T decay_halflife = 1000 464 ) 465 { 466 DLIB_ASSERT(decay_halflife > 0); 467 468 sum_xy = 0; 469 sum_x = 0; 470 sum_y = 0; 471 sum_xx = 0; 472 sum_yy = 0; 473 forget = std::pow(0.5, 1/decay_halflife); 474 n = 0; 475 476 COMPILE_TIME_ASSERT (( 477 is_same_type<float,T>::value || 478 is_same_type<double,T>::value || 479 is_same_type<long double,T>::value 480 )); 481 } 482 forget_factor()483 T forget_factor ( 484 ) const 485 { 486 return forget; 487 } 488 add(const T & x,const T & y)489 void add ( 490 const T& x, 491 const T& y 492 ) 493 { 494 sum_xy = sum_xy*forget + x*y; 495 496 sum_xx = sum_xx*forget + x*x; 497 sum_yy = sum_yy*forget + y*y; 498 499 sum_x = sum_x*forget + x; 500 sum_y = sum_y*forget + y; 501 502 n = n*forget + 1; 503 } 504 current_n()505 T current_n ( 506 ) const 507 { 508 return n; 509 } 510 mean_x()511 T mean_x ( 512 ) const 513 { 514 if (n != 0) 515 return sum_x/n; 516 else 517 return 0; 518 } 519 mean_y()520 T mean_y ( 521 ) const 522 { 523 if (n != 0) 524 return sum_y/n; 525 else 526 return 0; 527 } 528 covariance()529 T covariance ( 530 ) const 531 { 532 // make sure requires clause is not broken 533 DLIB_ASSERT(current_n() > 1, 534 "\tT running_scalar_covariance_decayed::covariance()" 535 << "\n\tyou have to add some numbers to this object first" 536 << "\n\tthis: " << this 537 ); 538 539 return 1/(n-1) * (sum_xy - sum_y*sum_x/n); 540 } 541 correlation()542 T correlation ( 543 ) const 544 { 545 // make sure requires clause is not broken 546 DLIB_ASSERT(current_n() > 1, 547 "\tT running_scalar_covariance_decayed::correlation()" 548 << "\n\tyou have to add some numbers to this object first" 549 << "\n\tthis: " << this 550 ); 551 552 T temp = std::sqrt(variance_x()*variance_y()); 553 if (temp != 0) 554 return covariance() / temp; 555 else 556 return 0; // just say it's zero if there isn't any variance in x or y. 557 } 558 variance_x()559 T variance_x ( 560 ) const 561 { 562 // make sure requires clause is not broken 563 DLIB_ASSERT(current_n() > 1, 564 "\tT running_scalar_covariance_decayed::variance_x()" 565 << "\n\tyou have to add some numbers to this object first" 566 << "\n\tthis: " << this 567 ); 568 569 T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); 570 // make sure the variance is never negative. This might 571 // happen due to numerical errors. 572 if (temp >= 0) 573 return temp; 574 else 575 return 0; 576 } 577 variance_y()578 T variance_y ( 579 ) const 580 { 581 // make sure requires clause is not broken 582 DLIB_ASSERT(current_n() > 1, 583 "\tT running_scalar_covariance_decayed::variance_y()" 584 << "\n\tyou have to add some numbers to this object first" 585 << "\n\tthis: " << this 586 ); 587 588 T temp = 1/(n-1) * (sum_yy - sum_y*sum_y/n); 589 // make sure the variance is never negative. This might 590 // happen due to numerical errors. 591 if (temp >= 0) 592 return temp; 593 else 594 return 0; 595 } 596 stddev_x()597 T stddev_x ( 598 ) const 599 { 600 // make sure requires clause is not broken 601 DLIB_ASSERT(current_n() > 1, 602 "\tT running_scalar_covariance_decayed::stddev_x()" 603 << "\n\tyou have to add some numbers to this object first" 604 << "\n\tthis: " << this 605 ); 606 607 return std::sqrt(variance_x()); 608 } 609 stddev_y()610 T stddev_y ( 611 ) const 612 { 613 // make sure requires clause is not broken 614 DLIB_ASSERT(current_n() > 1, 615 "\tT running_scalar_covariance_decayed::stddev_y()" 616 << "\n\tyou have to add some numbers to this object first" 617 << "\n\tthis: " << this 618 ); 619 620 return std::sqrt(variance_y()); 621 } 622 623 private: 624 625 T sum_xy; 626 T sum_x; 627 T sum_y; 628 T sum_xx; 629 T sum_yy; 630 T n; 631 T forget; 632 }; 633 634 // ---------------------------------------------------------------------------------------- 635 636 template < 637 typename T 638 > 639 class running_stats_decayed 640 { 641 public: 642 643 explicit running_stats_decayed( 644 T decay_halflife = 1000 645 ) 646 { 647 DLIB_ASSERT(decay_halflife > 0); 648 649 sum_x = 0; 650 sum_xx = 0; 651 forget = std::pow(0.5, 1/decay_halflife); 652 n = 0; 653 654 COMPILE_TIME_ASSERT (( 655 is_same_type<float,T>::value || 656 is_same_type<double,T>::value || 657 is_same_type<long double,T>::value 658 )); 659 } 660 forget_factor()661 T forget_factor ( 662 ) const 663 { 664 return forget; 665 } 666 add(const T & x)667 void add ( 668 const T& x 669 ) 670 { 671 672 sum_xx = sum_xx*forget + x*x; 673 674 sum_x = sum_x*forget + x; 675 676 n = n*forget + 1; 677 } 678 current_n()679 T current_n ( 680 ) const 681 { 682 return n; 683 } 684 mean()685 T mean ( 686 ) const 687 { 688 if (n != 0) 689 return sum_x/n; 690 else 691 return 0; 692 } 693 variance()694 T variance ( 695 ) const 696 { 697 // make sure requires clause is not broken 698 DLIB_ASSERT(current_n() > 1, 699 "\tT running_stats_decayed::variance()" 700 << "\n\tyou have to add some numbers to this object first" 701 << "\n\tthis: " << this 702 ); 703 704 T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); 705 // make sure the variance is never negative. This might 706 // happen due to numerical errors. 707 if (temp >= 0) 708 return temp; 709 else 710 return 0; 711 } 712 stddev()713 T stddev ( 714 ) const 715 { 716 // make sure requires clause is not broken 717 DLIB_ASSERT(current_n() > 1, 718 "\tT running_stats_decayed::stddev()" 719 << "\n\tyou have to add some numbers to this object first" 720 << "\n\tthis: " << this 721 ); 722 723 return std::sqrt(variance()); 724 } 725 726 template <typename U> 727 friend void serialize ( 728 const running_stats_decayed<U>& item, 729 std::ostream& out 730 ); 731 732 template <typename U> 733 friend void deserialize ( 734 running_stats_decayed<U>& item, 735 std::istream& in 736 ); 737 738 private: 739 740 T sum_x; 741 T sum_xx; 742 T n; 743 T forget; 744 }; 745 746 template <typename T> serialize(const running_stats_decayed<T> & item,std::ostream & out)747 void serialize ( 748 const running_stats_decayed<T>& item, 749 std::ostream& out 750 ) 751 { 752 int version = 1; 753 serialize(version, out); 754 755 serialize(item.sum_x, out); 756 serialize(item.sum_xx, out); 757 serialize(item.n, out); 758 serialize(item.forget, out); 759 } 760 761 template <typename T> deserialize(running_stats_decayed<T> & item,std::istream & in)762 void deserialize ( 763 running_stats_decayed<T>& item, 764 std::istream& in 765 ) 766 { 767 int version = 0; 768 deserialize(version, in); 769 if (version != 1) 770 throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats_decayed object."); 771 772 deserialize(item.sum_x, in); 773 deserialize(item.sum_xx, in); 774 deserialize(item.n, in); 775 deserialize(item.forget, in); 776 } 777 778 // ---------------------------------------------------------------------------------------- 779 780 template < 781 typename T, 782 typename alloc 783 > mean_sign_agreement(const std::vector<T,alloc> & a,const std::vector<T,alloc> & b)784 double mean_sign_agreement ( 785 const std::vector<T,alloc>& a, 786 const std::vector<T,alloc>& b 787 ) 788 { 789 // make sure requires clause is not broken 790 DLIB_ASSERT(a.size() == b.size(), 791 "\t double mean_sign_agreement(a,b)" 792 << "\n\t a and b must be the same length." 793 << "\n\t a.size(): " << a.size() 794 << "\n\t b.size(): " << b.size() 795 ); 796 797 798 double temp = 0; 799 for (unsigned long i = 0; i < a.size(); ++i) 800 { 801 if ((a[i] >= 0 && b[i] >= 0) || 802 (a[i] < 0 && b[i] < 0)) 803 { 804 temp += 1; 805 } 806 } 807 808 return temp/a.size(); 809 } 810 811 // ---------------------------------------------------------------------------------------- 812 813 template < 814 typename T, 815 typename alloc 816 > correlation(const std::vector<T,alloc> & a,const std::vector<T,alloc> & b)817 double correlation ( 818 const std::vector<T,alloc>& a, 819 const std::vector<T,alloc>& b 820 ) 821 { 822 // make sure requires clause is not broken 823 DLIB_ASSERT(a.size() == b.size() && a.size() > 1, 824 "\t double correlation(a,b)" 825 << "\n\t a and b must be the same length and have more than one element." 826 << "\n\t a.size(): " << a.size() 827 << "\n\t b.size(): " << b.size() 828 ); 829 830 running_scalar_covariance<double> rs; 831 for (unsigned long i = 0; i < a.size(); ++i) 832 { 833 rs.add(a[i], b[i]); 834 } 835 return rs.correlation(); 836 } 837 838 // ---------------------------------------------------------------------------------------- 839 840 template < 841 typename T, 842 typename alloc 843 > covariance(const std::vector<T,alloc> & a,const std::vector<T,alloc> & b)844 double covariance ( 845 const std::vector<T,alloc>& a, 846 const std::vector<T,alloc>& b 847 ) 848 { 849 // make sure requires clause is not broken 850 DLIB_ASSERT(a.size() == b.size() && a.size() > 1, 851 "\t double covariance(a,b)" 852 << "\n\t a and b must be the same length and have more than one element." 853 << "\n\t a.size(): " << a.size() 854 << "\n\t b.size(): " << b.size() 855 ); 856 857 running_scalar_covariance<double> rs; 858 for (unsigned long i = 0; i < a.size(); ++i) 859 { 860 rs.add(a[i], b[i]); 861 } 862 return rs.covariance(); 863 } 864 865 // ---------------------------------------------------------------------------------------- 866 867 template < 868 typename T, 869 typename alloc 870 > r_squared(const std::vector<T,alloc> & a,const std::vector<T,alloc> & b)871 double r_squared ( 872 const std::vector<T,alloc>& a, 873 const std::vector<T,alloc>& b 874 ) 875 { 876 // make sure requires clause is not broken 877 DLIB_ASSERT(a.size() == b.size() && a.size() > 1, 878 "\t double r_squared(a,b)" 879 << "\n\t a and b must be the same length and have more than one element." 880 << "\n\t a.size(): " << a.size() 881 << "\n\t b.size(): " << b.size() 882 ); 883 884 return std::pow(correlation(a,b),2.0); 885 } 886 887 // ---------------------------------------------------------------------------------------- 888 889 template < 890 typename T, 891 typename alloc 892 > mean_squared_error(const std::vector<T,alloc> & a,const std::vector<T,alloc> & b)893 double mean_squared_error ( 894 const std::vector<T,alloc>& a, 895 const std::vector<T,alloc>& b 896 ) 897 { 898 // make sure requires clause is not broken 899 DLIB_ASSERT(a.size() == b.size(), 900 "\t double mean_squared_error(a,b)" 901 << "\n\t a and b must be the same length." 902 << "\n\t a.size(): " << a.size() 903 << "\n\t b.size(): " << b.size() 904 ); 905 906 return mean(squared(matrix_cast<double>(mat(a))-matrix_cast<double>(mat(b)))); 907 } 908 909 // ---------------------------------------------------------------------------------------- 910 911 template < 912 typename matrix_type 913 > 914 class running_covariance 915 { 916 /*! 917 INITIAL VALUE 918 - vect_size == 0 919 - total_count == 0 920 921 CONVENTION 922 - vect_size == in_vector_size() 923 - total_count == current_n() 924 925 - if (total_count != 0) 926 - total_sum == the sum of all vectors given to add() 927 - the covariance of all the elements given to add() is given 928 by: 929 - let avg == total_sum/total_count 930 - covariance == total_cov/total_count - avg*trans(avg) 931 !*/ 932 public: 933 934 typedef typename matrix_type::mem_manager_type mem_manager_type; 935 typedef typename matrix_type::type scalar_type; 936 typedef typename matrix_type::layout_type layout_type; 937 typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; 938 typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; 939 running_covariance()940 running_covariance( 941 ) 942 { 943 clear(); 944 } 945 clear()946 void clear( 947 ) 948 { 949 total_count = 0; 950 951 vect_size = 0; 952 953 total_sum.set_size(0); 954 total_cov.set_size(0,0); 955 } 956 in_vector_size()957 long in_vector_size ( 958 ) const 959 { 960 return vect_size; 961 } 962 current_n()963 long current_n ( 964 ) const 965 { 966 return static_cast<long>(total_count); 967 } 968 set_dimension(long size)969 void set_dimension ( 970 long size 971 ) 972 { 973 // make sure requires clause is not broken 974 DLIB_ASSERT( size > 0, 975 "\t void running_covariance::set_dimension()" 976 << "\n\t Invalid inputs were given to this function" 977 << "\n\t size: " << size 978 << "\n\t this: " << this 979 ); 980 981 clear(); 982 vect_size = size; 983 total_sum.set_size(size); 984 total_cov.set_size(size,size); 985 total_sum = 0; 986 total_cov = 0; 987 } 988 989 template <typename T> add(const T & val)990 typename disable_if<is_matrix<T> >::type add ( 991 const T& val 992 ) 993 { 994 // make sure requires clause is not broken 995 DLIB_ASSERT(((long)max_index_plus_one(val) <= in_vector_size() && in_vector_size() > 0), 996 "\t void running_covariance::add()" 997 << "\n\t Invalid inputs were given to this function" 998 << "\n\t max_index_plus_one(val): " << max_index_plus_one(val) 999 << "\n\t in_vector_size(): " << in_vector_size() 1000 << "\n\t this: " << this 1001 ); 1002 1003 for (typename T::const_iterator i = val.begin(); i != val.end(); ++i) 1004 { 1005 total_sum(i->first) += i->second; 1006 for (typename T::const_iterator j = val.begin(); j != val.end(); ++j) 1007 { 1008 total_cov(i->first, j->first) += i->second*j->second; 1009 } 1010 } 1011 1012 ++total_count; 1013 } 1014 1015 template <typename T> add(const T & val)1016 typename enable_if<is_matrix<T> >::type add ( 1017 const T& val 1018 ) 1019 { 1020 // make sure requires clause is not broken 1021 DLIB_ASSERT(is_col_vector(val) && (in_vector_size() == 0 || val.size() == in_vector_size()), 1022 "\t void running_covariance::add()" 1023 << "\n\t Invalid inputs were given to this function" 1024 << "\n\t is_col_vector(val): " << is_col_vector(val) 1025 << "\n\t in_vector_size(): " << in_vector_size() 1026 << "\n\t val.size(): " << val.size() 1027 << "\n\t this: " << this 1028 ); 1029 1030 vect_size = val.size(); 1031 if (total_count == 0) 1032 { 1033 total_cov = val*trans(val); 1034 total_sum = val; 1035 } 1036 else 1037 { 1038 total_cov += val*trans(val); 1039 total_sum += val; 1040 } 1041 ++total_count; 1042 } 1043 mean()1044 const column_matrix mean ( 1045 ) const 1046 { 1047 // make sure requires clause is not broken 1048 DLIB_ASSERT( in_vector_size() != 0, 1049 "\t running_covariance::mean()" 1050 << "\n\t This object can not execute this function in its current state." 1051 << "\n\t in_vector_size(): " << in_vector_size() 1052 << "\n\t current_n(): " << current_n() 1053 << "\n\t this: " << this 1054 ); 1055 1056 return total_sum/total_count; 1057 } 1058 covariance()1059 const general_matrix covariance ( 1060 ) const 1061 { 1062 // make sure requires clause is not broken 1063 DLIB_ASSERT( in_vector_size() != 0 && current_n() > 1, 1064 "\t running_covariance::covariance()" 1065 << "\n\t This object can not execute this function in its current state." 1066 << "\n\t in_vector_size(): " << in_vector_size() 1067 << "\n\t current_n(): " << current_n() 1068 << "\n\t this: " << this 1069 ); 1070 1071 return (total_cov - total_sum*trans(total_sum)/total_count)/(total_count-1); 1072 } 1073 1074 const running_covariance operator+ ( 1075 const running_covariance& item 1076 ) const 1077 { 1078 // make sure requires clause is not broken 1079 DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()), 1080 "\t running_covariance running_covariance::operator+()" 1081 << "\n\t The two running_covariance objects being added must have compatible parameters" 1082 << "\n\t in_vector_size(): " << in_vector_size() 1083 << "\n\t item.in_vector_size(): " << item.in_vector_size() 1084 << "\n\t this: " << this 1085 ); 1086 1087 running_covariance temp(item); 1088 1089 // make sure we ignore empty matrices 1090 if (total_count != 0 && temp.total_count != 0) 1091 { 1092 temp.total_cov += total_cov; 1093 temp.total_sum += total_sum; 1094 temp.total_count += total_count; 1095 } 1096 else if (total_count != 0) 1097 { 1098 temp.total_cov = total_cov; 1099 temp.total_sum = total_sum; 1100 temp.total_count = total_count; 1101 } 1102 1103 return temp; 1104 } 1105 1106 1107 private: 1108 1109 general_matrix total_cov; 1110 column_matrix total_sum; 1111 scalar_type total_count; 1112 1113 long vect_size; 1114 }; 1115 1116 // ---------------------------------------------------------------------------------------- 1117 1118 template < 1119 typename matrix_type 1120 > 1121 class running_cross_covariance 1122 { 1123 /*! 1124 INITIAL VALUE 1125 - x_vect_size == 0 1126 - y_vect_size == 0 1127 - total_count == 0 1128 1129 CONVENTION 1130 - x_vect_size == x_vector_size() 1131 - y_vect_size == y_vector_size() 1132 - total_count == current_n() 1133 1134 - if (total_count != 0) 1135 - sum_x == the sum of all x vectors given to add() 1136 - sum_y == the sum of all y vectors given to add() 1137 - total_cov == sum of all x*trans(y) given to add() 1138 !*/ 1139 1140 public: 1141 1142 typedef typename matrix_type::mem_manager_type mem_manager_type; 1143 typedef typename matrix_type::type scalar_type; 1144 typedef typename matrix_type::layout_type layout_type; 1145 typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; 1146 typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; 1147 running_cross_covariance()1148 running_cross_covariance( 1149 ) 1150 { 1151 clear(); 1152 } 1153 clear()1154 void clear( 1155 ) 1156 { 1157 total_count = 0; 1158 1159 x_vect_size = 0; 1160 y_vect_size = 0; 1161 1162 sum_x.set_size(0); 1163 sum_y.set_size(0); 1164 total_cov.set_size(0,0); 1165 } 1166 x_vector_size()1167 long x_vector_size ( 1168 ) const 1169 { 1170 return x_vect_size; 1171 } 1172 y_vector_size()1173 long y_vector_size ( 1174 ) const 1175 { 1176 return y_vect_size; 1177 } 1178 set_dimensions(long x_size,long y_size)1179 void set_dimensions ( 1180 long x_size, 1181 long y_size 1182 ) 1183 { 1184 // make sure requires clause is not broken 1185 DLIB_ASSERT( x_size > 0 && y_size > 0, 1186 "\t void running_cross_covariance::set_dimensions()" 1187 << "\n\t Invalid inputs were given to this function" 1188 << "\n\t x_size: " << x_size 1189 << "\n\t y_size: " << y_size 1190 << "\n\t this: " << this 1191 ); 1192 1193 clear(); 1194 x_vect_size = x_size; 1195 y_vect_size = y_size; 1196 sum_x.set_size(x_size); 1197 sum_y.set_size(y_size); 1198 total_cov.set_size(x_size,y_size); 1199 1200 sum_x = 0; 1201 sum_y = 0; 1202 total_cov = 0; 1203 } 1204 current_n()1205 long current_n ( 1206 ) const 1207 { 1208 return static_cast<long>(total_count); 1209 } 1210 1211 template <typename T, typename U> add(const T & x,const U & y)1212 typename enable_if_c<!is_matrix<T>::value && !is_matrix<U>::value>::type add ( 1213 const T& x, 1214 const U& y 1215 ) 1216 { 1217 // make sure requires clause is not broken 1218 DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) && 1219 ((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) , 1220 "\t void running_cross_covariance::add()" 1221 << "\n\t Invalid inputs were given to this function" 1222 << "\n\t max_index_plus_one(x): " << max_index_plus_one(x) 1223 << "\n\t max_index_plus_one(y): " << max_index_plus_one(y) 1224 << "\n\t x_vector_size(): " << x_vector_size() 1225 << "\n\t y_vector_size(): " << y_vector_size() 1226 << "\n\t this: " << this 1227 ); 1228 1229 for (typename T::const_iterator i = x.begin(); i != x.end(); ++i) 1230 { 1231 sum_x(i->first) += i->second; 1232 for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) 1233 { 1234 total_cov(i->first, j->first) += i->second*j->second; 1235 } 1236 } 1237 1238 // do sum_y += y 1239 for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) 1240 { 1241 sum_y(j->first) += j->second; 1242 } 1243 1244 ++total_count; 1245 } 1246 1247 template <typename T, typename U> add(const T & x,const U & y)1248 typename enable_if_c<is_matrix<T>::value && !is_matrix<U>::value>::type add ( 1249 const T& x, 1250 const U& y 1251 ) 1252 { 1253 // make sure requires clause is not broken 1254 DLIB_ASSERT( (is_col_vector(x) && x.size() == x_vector_size() && x_vector_size() > 0) && 1255 ((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) , 1256 "\t void running_cross_covariance::add()" 1257 << "\n\t Invalid inputs were given to this function" 1258 << "\n\t is_col_vector(x): " << is_col_vector(x) 1259 << "\n\t x.size(): " << x.size() 1260 << "\n\t max_index_plus_one(y): " << max_index_plus_one(y) 1261 << "\n\t x_vector_size(): " << x_vector_size() 1262 << "\n\t y_vector_size(): " << y_vector_size() 1263 << "\n\t this: " << this 1264 ); 1265 1266 sum_x += x; 1267 1268 for (long i = 0; i < x.size(); ++i) 1269 { 1270 for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) 1271 { 1272 total_cov(i, j->first) += x(i)*j->second; 1273 } 1274 } 1275 1276 // do sum_y += y 1277 for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) 1278 { 1279 sum_y(j->first) += j->second; 1280 } 1281 1282 ++total_count; 1283 } 1284 1285 template <typename T, typename U> add(const T & x,const U & y)1286 typename enable_if_c<!is_matrix<T>::value && is_matrix<U>::value>::type add ( 1287 const T& x, 1288 const U& y 1289 ) 1290 { 1291 // make sure requires clause is not broken 1292 DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) && 1293 (is_col_vector(y) && y.size() == (long)y_vector_size() && y_vector_size() > 0) , 1294 "\t void running_cross_covariance::add()" 1295 << "\n\t Invalid inputs were given to this function" 1296 << "\n\t max_index_plus_one(x): " << max_index_plus_one(x) 1297 << "\n\t is_col_vector(y): " << is_col_vector(y) 1298 << "\n\t y.size(): " << y.size() 1299 << "\n\t x_vector_size(): " << x_vector_size() 1300 << "\n\t y_vector_size(): " << y_vector_size() 1301 << "\n\t this: " << this 1302 ); 1303 1304 for (typename T::const_iterator i = x.begin(); i != x.end(); ++i) 1305 { 1306 sum_x(i->first) += i->second; 1307 for (long j = 0; j < y.size(); ++j) 1308 { 1309 total_cov(i->first, j) += i->second*y(j); 1310 } 1311 } 1312 1313 sum_y += y; 1314 1315 ++total_count; 1316 } 1317 1318 template <typename T, typename U> add(const T & x,const U & y)1319 typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value>::type add ( 1320 const T& x, 1321 const U& y 1322 ) 1323 { 1324 // make sure requires clause is not broken 1325 DLIB_ASSERT(is_col_vector(x) && (x_vector_size() == 0 || x.size() == x_vector_size()) && 1326 is_col_vector(y) && (y_vector_size() == 0 || y.size() == y_vector_size()) && 1327 x.size() != 0 && 1328 y.size() != 0, 1329 "\t void running_cross_covariance::add()" 1330 << "\n\t Invalid inputs were given to this function" 1331 << "\n\t is_col_vector(x): " << is_col_vector(x) 1332 << "\n\t x_vector_size(): " << x_vector_size() 1333 << "\n\t x.size(): " << x.size() 1334 << "\n\t is_col_vector(y): " << is_col_vector(y) 1335 << "\n\t y_vector_size(): " << y_vector_size() 1336 << "\n\t y.size(): " << y.size() 1337 << "\n\t this: " << this 1338 ); 1339 1340 x_vect_size = x.size(); 1341 y_vect_size = y.size(); 1342 if (total_count == 0) 1343 { 1344 total_cov = x*trans(y); 1345 sum_x = x; 1346 sum_y = y; 1347 } 1348 else 1349 { 1350 total_cov += x*trans(y); 1351 sum_x += x; 1352 sum_y += y; 1353 } 1354 ++total_count; 1355 } 1356 mean_x()1357 const column_matrix mean_x ( 1358 ) const 1359 { 1360 // make sure requires clause is not broken 1361 DLIB_ASSERT( current_n() != 0, 1362 "\t running_cross_covariance::mean()" 1363 << "\n\t This object can not execute this function in its current state." 1364 << "\n\t current_n(): " << current_n() 1365 << "\n\t this: " << this 1366 ); 1367 1368 return sum_x/total_count; 1369 } 1370 mean_y()1371 const column_matrix mean_y ( 1372 ) const 1373 { 1374 // make sure requires clause is not broken 1375 DLIB_ASSERT( current_n() != 0, 1376 "\t running_cross_covariance::mean()" 1377 << "\n\t This object can not execute this function in its current state." 1378 << "\n\t current_n(): " << current_n() 1379 << "\n\t this: " << this 1380 ); 1381 1382 return sum_y/total_count; 1383 } 1384 covariance_xy()1385 const general_matrix covariance_xy ( 1386 ) const 1387 { 1388 // make sure requires clause is not broken 1389 DLIB_ASSERT( current_n() > 1, 1390 "\t running_cross_covariance::covariance()" 1391 << "\n\t This object can not execute this function in its current state." 1392 << "\n\t x_vector_size(): " << x_vector_size() 1393 << "\n\t y_vector_size(): " << y_vector_size() 1394 << "\n\t current_n(): " << current_n() 1395 << "\n\t this: " << this 1396 ); 1397 1398 return (total_cov - sum_x*trans(sum_y)/total_count)/(total_count-1); 1399 } 1400 1401 const running_cross_covariance operator+ ( 1402 const running_cross_covariance& item 1403 ) const 1404 { 1405 // make sure requires clause is not broken 1406 DLIB_ASSERT((x_vector_size() == 0 || item.x_vector_size() == 0 || x_vector_size() == item.x_vector_size()) && 1407 (y_vector_size() == 0 || item.y_vector_size() == 0 || y_vector_size() == item.y_vector_size()), 1408 "\t running_cross_covariance running_cross_covariance::operator+()" 1409 << "\n\t The two running_cross_covariance objects being added must have compatible parameters" 1410 << "\n\t x_vector_size(): " << x_vector_size() 1411 << "\n\t item.x_vector_size(): " << item.x_vector_size() 1412 << "\n\t y_vector_size(): " << y_vector_size() 1413 << "\n\t item.y_vector_size(): " << item.y_vector_size() 1414 << "\n\t this: " << this 1415 ); 1416 1417 running_cross_covariance temp(item); 1418 1419 // make sure we ignore empty matrices 1420 if (total_count != 0 && temp.total_count != 0) 1421 { 1422 temp.total_cov += total_cov; 1423 temp.sum_x += sum_x; 1424 temp.sum_y += sum_y; 1425 temp.total_count += total_count; 1426 } 1427 else if (total_count != 0) 1428 { 1429 temp.total_cov = total_cov; 1430 temp.sum_x = sum_x; 1431 temp.sum_y = sum_y; 1432 temp.total_count = total_count; 1433 } 1434 1435 return temp; 1436 } 1437 1438 1439 private: 1440 1441 general_matrix total_cov; 1442 column_matrix sum_x; 1443 column_matrix sum_y; 1444 scalar_type total_count; 1445 1446 long x_vect_size; 1447 long y_vect_size; 1448 }; 1449 1450 // ---------------------------------------------------------------------------------------- 1451 1452 template < 1453 typename matrix_type 1454 > 1455 class vector_normalizer 1456 { 1457 public: 1458 typedef typename matrix_type::mem_manager_type mem_manager_type; 1459 typedef typename matrix_type::type scalar_type; 1460 typedef matrix_type result_type; 1461 1462 template <typename vector_type> train(const vector_type & samples)1463 void train ( 1464 const vector_type& samples 1465 ) 1466 { 1467 // make sure requires clause is not broken 1468 DLIB_ASSERT(samples.size() > 0, 1469 "\tvoid vector_normalizer::train()" 1470 << "\n\tyou have to give a nonempty set of samples to this function" 1471 << "\n\tthis: " << this 1472 ); 1473 1474 m = mean(mat(samples)); 1475 sd = reciprocal(sqrt(variance(mat(samples)))); 1476 1477 DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer::train() have infinite or NaN values"); 1478 } 1479 in_vector_size()1480 long in_vector_size ( 1481 ) const 1482 { 1483 return m.nr(); 1484 } 1485 out_vector_size()1486 long out_vector_size ( 1487 ) const 1488 { 1489 return m.nr(); 1490 } 1491 means()1492 const matrix_type& means ( 1493 ) const 1494 { 1495 return m; 1496 } 1497 std_devs()1498 const matrix_type& std_devs ( 1499 ) const 1500 { 1501 return sd; 1502 } 1503 operator()1504 const result_type& operator() ( 1505 const matrix_type& x 1506 ) const 1507 { 1508 // make sure requires clause is not broken 1509 DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1, 1510 "\tmatrix vector_normalizer::operator()" 1511 << "\n\t you have given invalid arguments to this function" 1512 << "\n\t x.nr(): " << x.nr() 1513 << "\n\t in_vector_size(): " << in_vector_size() 1514 << "\n\t x.nc(): " << x.nc() 1515 << "\n\t this: " << this 1516 ); 1517 1518 temp_out = pointwise_multiply(x-m, sd); 1519 return temp_out; 1520 } 1521 swap(vector_normalizer & item)1522 void swap ( 1523 vector_normalizer& item 1524 ) 1525 { 1526 m.swap(item.m); 1527 sd.swap(item.sd); 1528 temp_out.swap(item.temp_out); 1529 } 1530 1531 template <typename mt> 1532 friend void deserialize ( 1533 vector_normalizer<mt>& item, 1534 std::istream& in 1535 ); 1536 1537 template <typename mt> 1538 friend void serialize ( 1539 const vector_normalizer<mt>& item, 1540 std::ostream& out 1541 ); 1542 1543 private: 1544 1545 // ------------------- private data members ------------------- 1546 1547 matrix_type m, sd; 1548 1549 // This is just a temporary variable that doesn't contribute to the 1550 // state of this object. 1551 mutable matrix_type temp_out; 1552 }; 1553 1554 // ---------------------------------------------------------------------------------------- 1555 1556 template < 1557 typename matrix_type 1558 > swap(vector_normalizer<matrix_type> & a,vector_normalizer<matrix_type> & b)1559 inline void swap ( 1560 vector_normalizer<matrix_type>& a, 1561 vector_normalizer<matrix_type>& b 1562 ) { a.swap(b); } 1563 1564 // ---------------------------------------------------------------------------------------- 1565 1566 template < 1567 typename matrix_type 1568 > deserialize(vector_normalizer<matrix_type> & item,std::istream & in)1569 void deserialize ( 1570 vector_normalizer<matrix_type>& item, 1571 std::istream& in 1572 ) 1573 { 1574 deserialize(item.m, in); 1575 deserialize(item.sd, in); 1576 // Keep deserializing the pca matrix for backwards compatibility. 1577 matrix<double> pca; 1578 deserialize(pca, in); 1579 1580 if (pca.size() != 0) 1581 throw serialization_error("Error deserializing object of type vector_normalizer\n" 1582 "It looks like a serialized vector_normalizer_pca was accidentally deserialized into \n" 1583 "a vector_normalizer object."); 1584 } 1585 1586 // ---------------------------------------------------------------------------------------- 1587 1588 template < 1589 typename matrix_type 1590 > serialize(const vector_normalizer<matrix_type> & item,std::ostream & out)1591 void serialize ( 1592 const vector_normalizer<matrix_type>& item, 1593 std::ostream& out 1594 ) 1595 { 1596 serialize(item.m, out); 1597 serialize(item.sd, out); 1598 // Keep serializing the pca matrix for backwards compatibility. 1599 matrix<double> pca; 1600 serialize(pca, out); 1601 } 1602 1603 // ---------------------------------------------------------------------------------------- 1604 // ---------------------------------------------------------------------------------------- 1605 // ---------------------------------------------------------------------------------------- 1606 1607 template < 1608 typename matrix_type 1609 > 1610 class vector_normalizer_pca 1611 { 1612 public: 1613 typedef typename matrix_type::mem_manager_type mem_manager_type; 1614 typedef typename matrix_type::type scalar_type; 1615 typedef matrix<scalar_type,0,1,mem_manager_type> result_type; 1616 1617 template <typename vector_type> 1618 void train ( 1619 const vector_type& samples, 1620 const double eps = 0.99 1621 ) 1622 { 1623 // You are getting an error here because you are trying to apply PCA 1624 // to a vector of fixed length. But PCA is going to try and do 1625 // dimensionality reduction so you can't use a vector with a fixed dimension. 1626 COMPILE_TIME_ASSERT(matrix_type::NR == 0); 1627 1628 // make sure requires clause is not broken 1629 DLIB_ASSERT(samples.size() > 0, 1630 "\tvoid vector_normalizer_pca::train_pca()" 1631 << "\n\tyou have to give a nonempty set of samples to this function" 1632 << "\n\tthis: " << this 1633 ); 1634 DLIB_ASSERT(0 < eps && eps <= 1, 1635 "\tvoid vector_normalizer_pca::train_pca()" 1636 << "\n\tyou have to give a nonempty set of samples to this function" 1637 << "\n\tthis: " << this 1638 ); 1639 train_pca_impl(mat(samples),eps); 1640 1641 DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_pca::train() have infinite or NaN values"); 1642 } 1643 in_vector_size()1644 long in_vector_size ( 1645 ) const 1646 { 1647 return m.nr(); 1648 } 1649 out_vector_size()1650 long out_vector_size ( 1651 ) const 1652 { 1653 return pca.nr(); 1654 } 1655 means()1656 const matrix<scalar_type,0,1,mem_manager_type>& means ( 1657 ) const 1658 { 1659 return m; 1660 } 1661 std_devs()1662 const matrix<scalar_type,0,1,mem_manager_type>& std_devs ( 1663 ) const 1664 { 1665 return sd; 1666 } 1667 pca_matrix()1668 const matrix<scalar_type,0,0,mem_manager_type>& pca_matrix ( 1669 ) const 1670 { 1671 return pca; 1672 } 1673 operator()1674 const result_type& operator() ( 1675 const matrix_type& x 1676 ) const 1677 { 1678 // make sure requires clause is not broken 1679 DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1, 1680 "\tmatrix vector_normalizer_pca::operator()" 1681 << "\n\t you have given invalid arguments to this function" 1682 << "\n\t x.nr(): " << x.nr() 1683 << "\n\t in_vector_size(): " << in_vector_size() 1684 << "\n\t x.nc(): " << x.nc() 1685 << "\n\t this: " << this 1686 ); 1687 1688 // If we have a pca transform matrix on hand then 1689 // also apply that. 1690 temp_out = pca*pointwise_multiply(x-m, sd); 1691 1692 return temp_out; 1693 } 1694 swap(vector_normalizer_pca & item)1695 void swap ( 1696 vector_normalizer_pca& item 1697 ) 1698 { 1699 m.swap(item.m); 1700 sd.swap(item.sd); 1701 pca.swap(item.pca); 1702 temp_out.swap(item.temp_out); 1703 } 1704 1705 template <typename T> 1706 friend void deserialize ( 1707 vector_normalizer_pca<T>& item, 1708 std::istream& in 1709 ); 1710 1711 template <typename T> 1712 friend void serialize ( 1713 const vector_normalizer_pca<T>& item, 1714 std::ostream& out 1715 ); 1716 1717 private: 1718 1719 template <typename mat_type> train_pca_impl(const mat_type & samples,const double eps)1720 void train_pca_impl ( 1721 const mat_type& samples, 1722 const double eps 1723 ) 1724 { 1725 m = mean(samples); 1726 sd = reciprocal(sqrt(variance(samples))); 1727 1728 // fill x with the normalized version of the input samples 1729 matrix<typename mat_type::type,0,1,mem_manager_type> x(samples); 1730 for (long r = 0; r < x.size(); ++r) 1731 x(r) = pointwise_multiply(x(r)-m, sd); 1732 1733 matrix<scalar_type,0,0,mem_manager_type> temp, eigen; 1734 matrix<scalar_type,0,1,mem_manager_type> eigenvalues; 1735 1736 // Compute the svd of the covariance matrix of the normalized inputs 1737 svd(covariance(x), temp, eigen, pca); 1738 eigenvalues = diag(eigen); 1739 1740 rsort_columns(pca, eigenvalues); 1741 1742 // figure out how many eigenvectors we want in our pca matrix 1743 const double thresh = sum(eigenvalues)*eps; 1744 long num_vectors = 0; 1745 double total = 0; 1746 for (long r = 0; r < eigenvalues.size() && total < thresh; ++r) 1747 { 1748 ++num_vectors; 1749 total += eigenvalues(r); 1750 } 1751 1752 // So now we know we want to use num_vectors of the first eigenvectors. So 1753 // pull those out and discard the rest. 1754 pca = trans(colm(pca,range(0,num_vectors-1))); 1755 1756 // Apply the pca transform to the data in x. Then we will normalize the 1757 // pca matrix below. 1758 for (long r = 0; r < x.nr(); ++r) 1759 { 1760 x(r) = pca*x(r); 1761 } 1762 1763 // Here we just scale the output features from the pca transform so 1764 // that the variance of each feature is 1. So this doesn't really change 1765 // what the pca is doing, it just makes sure the output features are 1766 // normalized. 1767 pca = trans(scale_columns(trans(pca), reciprocal(sqrt(variance(x))))); 1768 } 1769 1770 1771 // ------------------- private data members ------------------- 1772 1773 matrix<scalar_type,0,1,mem_manager_type> m, sd; 1774 matrix<scalar_type,0,0,mem_manager_type> pca; 1775 1776 // This is just a temporary variable that doesn't contribute to the 1777 // state of this object. 1778 mutable result_type temp_out; 1779 }; 1780 1781 template < 1782 typename matrix_type 1783 > swap(vector_normalizer_pca<matrix_type> & a,vector_normalizer_pca<matrix_type> & b)1784 inline void swap ( 1785 vector_normalizer_pca<matrix_type>& a, 1786 vector_normalizer_pca<matrix_type>& b 1787 ) { a.swap(b); } 1788 1789 // ---------------------------------------------------------------------------------------- 1790 1791 template < 1792 typename matrix_type 1793 > deserialize(vector_normalizer_pca<matrix_type> & item,std::istream & in)1794 void deserialize ( 1795 vector_normalizer_pca<matrix_type>& item, 1796 std::istream& in 1797 ) 1798 { 1799 deserialize(item.m, in); 1800 deserialize(item.sd, in); 1801 deserialize(item.pca, in); 1802 if (item.pca.nc() != item.m.nr()) 1803 throw serialization_error("Error deserializing object of type vector_normalizer_pca\n" 1804 "It looks like a serialized vector_normalizer was accidentally deserialized into \n" 1805 "a vector_normalizer_pca object."); 1806 } 1807 1808 template < 1809 typename matrix_type 1810 > serialize(const vector_normalizer_pca<matrix_type> & item,std::ostream & out)1811 void serialize ( 1812 const vector_normalizer_pca<matrix_type>& item, 1813 std::ostream& out 1814 ) 1815 { 1816 serialize(item.m, out); 1817 serialize(item.sd, out); 1818 serialize(item.pca, out); 1819 } 1820 1821 // ---------------------------------------------------------------------------------------- 1822 binomial_random_vars_are_different(uint64_t k1,uint64_t n1,uint64_t k2,uint64_t n2)1823 inline double binomial_random_vars_are_different ( 1824 uint64_t k1, 1825 uint64_t n1, 1826 uint64_t k2, 1827 uint64_t n2 1828 ) 1829 { 1830 DLIB_ASSERT(k1 <= n1, "k1: "<< k1 << " n1: "<< n1); 1831 DLIB_ASSERT(k2 <= n2, "k2: "<< k2 << " n2: "<< n2); 1832 1833 const double p1 = k1/(double)n1; 1834 const double p2 = k2/(double)n2; 1835 const double p = (k1+k2)/(double)(n1+n2); 1836 1837 auto ll = [](double p, uint64_t k, uint64_t n) { 1838 if (p == 0 || p == 1) 1839 return 0.0; 1840 return k*std::log(p) + (n-k)*std::log(1-p); 1841 }; 1842 1843 auto logll = ll(p1,k1,n1) + ll(p2,k2,n2) - ll(p,k1,n1) - ll(p,k2,n2); 1844 1845 // The basic statistic only tells you if the random variables are different. But 1846 // it's nice to know which way they are different, i.e., which one is bigger. So 1847 // stuff that information into the sign bit of the return value. 1848 if (p1>=p2) 1849 return logll; 1850 else 1851 return -logll; 1852 } 1853 1854 // ---------------------------------------------------------------------------------------- 1855 event_correlation(uint64_t A_count,uint64_t B_count,uint64_t AB_count,uint64_t total_num_observations)1856 inline double event_correlation ( 1857 uint64_t A_count, 1858 uint64_t B_count, 1859 uint64_t AB_count, 1860 uint64_t total_num_observations 1861 ) 1862 { 1863 DLIB_ASSERT(AB_count <= A_count && A_count <= total_num_observations, 1864 "AB_count: " << AB_count << ", A_count: "<< A_count << ", total_num_observations: " << total_num_observations); 1865 DLIB_ASSERT(AB_count <= B_count && B_count <= total_num_observations, 1866 "AB_count: " << AB_count << ", B_count: "<< B_count << ", total_num_observations: " << total_num_observations); 1867 1868 if (total_num_observations == 0) 1869 return 0; 1870 1871 DLIB_ASSERT(A_count + B_count - AB_count <= total_num_observations, 1872 "AB_count: " << AB_count << " A_count: " << A_count << ", B_count: "<< B_count << ", total_num_observations: " << total_num_observations); 1873 1874 1875 const auto AnotB_count = A_count - AB_count; 1876 const auto notB_count = total_num_observations - B_count; 1877 // How likely is it that the odds of A happening is different when conditioned on 1878 // whether or not B happened? 1879 return binomial_random_vars_are_different( 1880 AB_count, B_count, // A conditional on the presence of B 1881 AnotB_count, notB_count // A conditional on the absence of B 1882 ); 1883 } 1884 1885 // ---------------------------------------------------------------------------------------- 1886 1887 } 1888 1889 #endif // DLIB_STATISTICs_ 1890 1891