1 // Copyright (C) 2010  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 
4 
5 #include <sstream>
6 #include <string>
7 #include <cstdlib>
8 #include <ctime>
9 #include <dlib/statistics.h>
10 #include <dlib/statistics/running_gradient.h>
11 #include <dlib/rand.h>
12 #include <dlib/svm.h>
13 #include <algorithm>
14 #include <dlib/matrix.h>
15 #include <cmath>
16 
17 #include "tester.h"
18 
19 namespace
20 {
21 
22     using namespace test;
23     using namespace dlib;
24     using namespace std;
25 
26     logger dlog("test.statistics");
27 
28 
29 
30     class statistics_tester : public tester
31     {
32     public:
statistics_tester()33         statistics_tester (
34         ) :
35             tester ("test_statistics",
36                     "Runs tests on the statistics component.")
37         {}
38 
test_random_subset_selector()39         void test_random_subset_selector ()
40         {
41             random_subset_selector<double> rand_set;
42 
43             for (int j = 0; j < 30; ++j)
44             {
45                 print_spinner();
46 
47                 running_stats<double> rs, rs2;
48 
49                 rand_set.set_max_size(1000);
50 
51                 for (double i = 0; i < 100000; ++i)
52                 {
53                     rs.add(i);
54                     rand_set.add(i);
55                 }
56 
57 
58                 for (unsigned long i = 0; i < rand_set.size(); ++i)
59                     rs2.add(rand_set[i]);
60 
61 
62                 dlog << LDEBUG << "true mean:    " << rs.mean();
63                 dlog << LDEBUG << "true sampled: " << rs2.mean();
64                 double ratio = rs.mean()/rs2.mean();
65                 DLIB_TEST_MSG(0.96 < ratio  && ratio < 1.04, " ratio: " << ratio);
66             }
67 
68 
69             {
70                 random_subset_selector<int> r1, r2;
71                 r1.set_max_size(300);
72                 for (int i = 0; i < 4000; ++i)
73                     r1.add(i);
74 
75                 ostringstream sout;
76                 serialize(r1, sout);
77                 istringstream sin(sout.str());
78                 deserialize(r2, sin);
79 
80                 DLIB_TEST(r1.size() == r2.size());
81                 DLIB_TEST(r1.max_size() == r2.max_size());
82                 DLIB_TEST(r1.next_add_accepts() == r2.next_add_accepts());
83                 DLIB_TEST(std::equal(r1.begin(), r1.end(), r2.begin()));
84 
85                 for (int i = 0; i < 4000; ++i)
86                 {
87                     r1.add(i);
88                     r2.add(i);
89                 }
90 
91                 DLIB_TEST(r1.size() == r2.size());
92                 DLIB_TEST(r1.max_size() == r2.max_size());
93                 DLIB_TEST(r1.next_add_accepts() == r2.next_add_accepts());
94                 DLIB_TEST(std::equal(r1.begin(), r1.end(), r2.begin()));
95             }
96         }
97 
test_random_subset_selector2()98         void test_random_subset_selector2 ()
99         {
100             random_subset_selector<double> rand_set;
101             DLIB_TEST(rand_set.next_add_accepts() == false);
102             DLIB_TEST(rand_set.size() == 0);
103             DLIB_TEST(rand_set.max_size() == 0);
104 
105             for (int j = 0; j < 30; ++j)
106             {
107                 print_spinner();
108 
109                 running_stats<double> rs, rs2;
110 
111                 rand_set.set_max_size(1000);
112                 DLIB_TEST(rand_set.next_add_accepts() == true);
113 
114                 for (double i = 0; i < 100000; ++i)
115                 {
116                     rs.add(i);
117                     if (rand_set.next_add_accepts())
118                         rand_set.add(i);
119                     else
120                         rand_set.add();
121                 }
122 
123                 DLIB_TEST(rand_set.size() == 1000);
124                 DLIB_TEST(rand_set.max_size() == 1000);
125 
126                 for (unsigned long i = 0; i < rand_set.size(); ++i)
127                     rs2.add(rand_set[i]);
128 
129 
130                 dlog << LDEBUG << "true mean:    " << rs.mean();
131                 dlog << LDEBUG << "true sampled: " << rs2.mean();
132                 double ratio = rs.mean()/rs2.mean();
133                 DLIB_TEST_MSG(0.96 < ratio  && ratio < 1.04, " ratio: " << ratio);
134             }
135         }
136 
test_running_cross_covariance()137         void test_running_cross_covariance ()
138         {
139             running_cross_covariance<matrix<double> > rcc1, rcc2;
140 
141             matrix<double,0,1> xm, ym;
142             const int num = 40;
143 
144             dlib::rand rnd;
145             for (int i = 0; i < num; ++i)
146             {
147                 matrix<double,0,1> x = randm(4,1,rnd);
148                 matrix<double,0,1> y = randm(4,1,rnd);
149 
150                 xm += x/num;
151                 ym += y/num;
152 
153                 if (i < 15)
154                     rcc1.add(x,y);
155                 else
156                     rcc2.add(x,y);
157             }
158 
159             rnd.clear();
160             matrix<double> cov;
161             for (int i = 0; i < num; ++i)
162             {
163                 matrix<double,0,1> x = randm(4,1,rnd);
164                 matrix<double,0,1> y = randm(4,1,rnd);
165                 cov += (x-xm)*trans(y-ym);
166             }
167             cov /= num-1;
168 
169             running_cross_covariance<matrix<double> > rcc = rcc1 + rcc2;
170             DLIB_TEST(max(abs(rcc.covariance_xy()-cov)) < 1e-14);
171             DLIB_TEST(max(abs(rcc.mean_x()-xm)) < 1e-14);
172             DLIB_TEST(max(abs(rcc.mean_y()-ym)) < 1e-14);
173         }
174 
dense_to_sparse(const matrix<double,0,1> & x)175         std::map<unsigned long,double> dense_to_sparse (
176             const matrix<double,0,1>& x
177         )
178         {
179             std::map<unsigned long,double> temp;
180             for (long i = 0; i < x.size(); ++i)
181                 temp[i] = x(i);
182             return temp;
183         }
184 
test_running_cross_covariance_sparse()185         void test_running_cross_covariance_sparse()
186         {
187             running_cross_covariance<matrix<double> > rcc1, rcc2;
188 
189             running_covariance<matrix<double> > rc1, rc2;
190 
191             matrix<double,0,1> xm, ym;
192             const int num = 40;
193 
194             rc1.set_dimension(4);
195             rc2.set_dimension(4);
196 
197             rcc1.set_dimensions(4,5);
198             rcc2.set_dimensions(4,5);
199 
200             dlib::rand rnd;
201             for (int i = 0; i < num; ++i)
202             {
203                 matrix<double,0,1> x = randm(4,1,rnd);
204                 matrix<double,0,1> y = randm(5,1,rnd);
205 
206                 xm += x/num;
207                 ym += y/num;
208 
209                 if (i < 15)
210                 {
211                     rcc1.add(x,dense_to_sparse(y));
212                     rc1.add(x);
213                 }
214                 else if (i < 30)
215                 {
216                     rcc2.add(dense_to_sparse(x),y);
217                     rc2.add(dense_to_sparse(x));
218                 }
219                 else
220                 {
221                     rcc2.add(dense_to_sparse(x),dense_to_sparse(y));
222                     rc2.add(x);
223                 }
224             }
225 
226             rnd.clear();
227             matrix<double> cov, cov2;
228             for (int i = 0; i < num; ++i)
229             {
230                 matrix<double,0,1> x = randm(4,1,rnd);
231                 matrix<double,0,1> y = randm(5,1,rnd);
232                 cov += (x-xm)*trans(y-ym);
233                 cov2 += (x-xm)*trans(x-xm);
234             }
235             cov /= num-1;
236             cov2 /= num-1;
237 
238             running_cross_covariance<matrix<double> > rcc = rcc1 + rcc2;
239             DLIB_TEST_MSG(max(abs(rcc.covariance_xy()-cov)) < 1e-14, max(abs(rcc.covariance_xy()-cov)));
240             DLIB_TEST(max(abs(rcc.mean_x()-xm)) < 1e-14);
241             DLIB_TEST(max(abs(rcc.mean_y()-ym)) < 1e-14);
242 
243             running_covariance<matrix<double> > rc = rc1 + rc2;
244             DLIB_TEST(max(abs(rc.covariance()-cov2)) < 1e-14);
245             DLIB_TEST(max(abs(rc.mean()-xm)) < 1e-14);
246         }
247 
test_running_covariance()248         void test_running_covariance (
249         )
250         {
251             dlib::rand rnd;
252             std::vector<matrix<double,0,1> > vects;
253 
254             running_covariance<matrix<double,0,1> > cov, cov2;
255             DLIB_TEST(cov.in_vector_size() == 0);
256 
257             for (unsigned long dims = 1; dims < 5; ++dims)
258             {
259                 for (unsigned long samps = 2; samps < 10; ++samps)
260                 {
261                     vects.clear();
262                     cov.clear();
263                     DLIB_TEST(cov.in_vector_size() == 0);
264                     for (unsigned long i = 0; i < samps; ++i)
265                     {
266                         vects.push_back(randm(dims,1,rnd));
267                         cov.add(vects.back());
268 
269                     }
270                     DLIB_TEST(cov.in_vector_size() == (long)dims);
271 
272                     DLIB_TEST(equal(mean(mat(vects)), cov.mean()));
273                     DLIB_TEST_MSG(equal(covariance(mat(vects)), cov.covariance()),
274                               max(abs(covariance(mat(vects)) - cov.covariance()))
275                               << "   dims = " << dims << "   samps = " << samps
276                               );
277                 }
278             }
279 
280             for (unsigned long dims = 1; dims < 5; ++dims)
281             {
282                 for (unsigned long samps = 2; samps < 10; ++samps)
283                 {
284                     vects.clear();
285                     cov.clear();
286                     cov2.clear();
287                     DLIB_TEST(cov.in_vector_size() == 0);
288                     for (unsigned long i = 0; i < samps; ++i)
289                     {
290                         vects.push_back(randm(dims,1,rnd));
291                         if ((i%2) == 0)
292                             cov.add(vects.back());
293                         else
294                             cov2.add(vects.back());
295 
296                     }
297                     DLIB_TEST((cov+cov2).in_vector_size() == (long)dims);
298 
299                     DLIB_TEST(equal(mean(mat(vects)), (cov+cov2).mean()));
300                     DLIB_TEST_MSG(equal(covariance(mat(vects)), (cov+cov2).covariance()),
301                               max(abs(covariance(mat(vects)) - (cov+cov2).covariance()))
302                               << "   dims = " << dims << "   samps = " << samps
303                               );
304                 }
305             }
306 
307         }
308 
test_running_stats()309         void test_running_stats()
310         {
311             print_spinner();
312 
313             running_stats<double> rs, rs2;
314 
315             running_scalar_covariance<double> rsc1, rsc2;
316             running_scalar_covariance_decayed<double> rscd1(1000000), rscd2(1000000);
317 
318             for (double i = 0; i < 100; ++i)
319             {
320                 rs.add(i);
321 
322                 rsc1.add(i,i);
323                 rsc2.add(i,i);
324                 rsc2.add(i,-i);
325 
326                 rscd1.add(i,i);
327                 rscd2.add(i,i);
328                 rscd2.add(i,-i);
329             }
330 
331             // make sure the running_stats and running_scalar_covariance agree
332             DLIB_TEST_MSG(std::abs(rs.mean() - rsc1.mean_x()) < 1e-10, std::abs(rs.mean() - rsc1.mean_x()));
333             DLIB_TEST(std::abs(rs.mean() - rsc1.mean_y()) < 1e-10);
334             DLIB_TEST(std::abs(rs.stddev() - rsc1.stddev_x()) < 1e-10);
335             DLIB_TEST(std::abs(rs.stddev() - rsc1.stddev_y()) < 1e-10);
336             DLIB_TEST(std::abs(rs.variance() - rsc1.variance_x()) < 1e-10);
337             DLIB_TEST(std::abs(rs.variance() - rsc1.variance_y()) < 1e-10);
338             DLIB_TEST(rs.current_n() == rsc1.current_n());
339 
340             DLIB_TEST(std::abs(rsc1.correlation() - 1) < 1e-10);
341             DLIB_TEST(std::abs(rsc2.correlation() - 0) < 1e-10);
342 
343 
344             DLIB_TEST_MSG(std::abs(rs.mean() - rscd1.mean_x()) < 1e-2, std::abs(rs.mean() - rscd1.mean_x()) << " " << rscd1.mean_x());
345             DLIB_TEST(std::abs(rs.mean() - rscd1.mean_y()) < 1e-2);
346             DLIB_TEST_MSG(std::abs(rs.stddev() - rscd1.stddev_x()) < 1e-2, std::abs(rs.stddev() - rscd1.stddev_x()));
347             DLIB_TEST(std::abs(rs.stddev() - rscd1.stddev_y()) < 1e-2);
348             DLIB_TEST_MSG(std::abs(rs.variance() - rscd1.variance_x()) < 1e-2, std::abs(rs.variance() - rscd1.variance_x()));
349             DLIB_TEST(std::abs(rs.variance() - rscd1.variance_y()) < 1e-2);
350             DLIB_TEST(std::abs(rscd1.correlation() - 1) < 1e-2);
351             DLIB_TEST(std::abs(rscd2.correlation() - 0) < 1e-2);
352 
353 
354 
355             // test serialization of running_stats
356             ostringstream sout;
357             serialize(rs, sout);
358             istringstream sin(sout.str());
359             deserialize(rs2, sin);
360             // make sure the running_stats and running_scalar_covariance agree
361             DLIB_TEST_MSG(std::abs(rs2.mean() - rsc1.mean_x()) < 1e-10, std::abs(rs2.mean() - rsc1.mean_x()));
362             DLIB_TEST(std::abs(rs2.mean() - rsc1.mean_y()) < 1e-10);
363             DLIB_TEST(std::abs(rs2.stddev() - rsc1.stddev_x()) < 1e-10);
364             DLIB_TEST(std::abs(rs2.stddev() - rsc1.stddev_y()) < 1e-10);
365             DLIB_TEST(std::abs(rs2.variance() - rsc1.variance_x()) < 1e-10);
366             DLIB_TEST(std::abs(rs2.variance() - rsc1.variance_y()) < 1e-10);
367             DLIB_TEST(rs2.current_n() == rsc1.current_n());
368 
369             rsc1.clear();
370             rsc1.add(1, -1);
371             rsc1.add(0, 0);
372             rsc1.add(1, -1);
373             rsc1.add(0, 0);
374             rsc1.add(1, -1);
375             rsc1.add(0, 0);
376 
377             DLIB_TEST(std::abs(rsc1.covariance() - -0.3) < 1e-10);
378         }
379 
test_skewness_and_kurtosis_1()380         void test_skewness_and_kurtosis_1()
381         {
382 
383             dlib::rand rnum;
384             running_stats<double> rs1;
385 
386             double tp = 0;
387 
388             rnum.set_seed("DlibRocks");
389 
390             for(int i = 0; i< 1000000; i++)
391             {
392                 tp = rnum.get_random_gaussian();
393                 rs1.add(tp);
394             }
395 
396             // check the unbiased skewness and excess kurtosis of one million Gaussian
397             // draws are both near_vects zero.
398             DLIB_TEST(abs(rs1.skewness()) < 0.1);
399             DLIB_TEST(abs(rs1.ex_kurtosis()) < 0.1);
400         }
401 
test_skewness_and_kurtosis_2()402         void test_skewness_and_kurtosis_2()
403         {
404 
405             string str = "DlibRocks";
406 
407             for(int j = 0; j<5 ; j++)
408             {
409                 matrix<double,1,100000> dat;
410                 dlib::rand rnum;
411                 running_stats<double> rs1;
412 
413                 double tp = 0;
414                 double n = 100000;
415                 double xb = 0;
416 
417                 double sknum = 0;
418                 double skdenom = 0;
419                 double unbi_skew = 0;
420 
421                 double exkurnum = 0;
422                 double exkurdenom = 0;
423                 double unbi_exkur = 0;
424 
425                 random_shuffle(str.begin(), str.end());
426                 rnum.set_seed(str);
427 
428                 for(int i = 0; i<n; i++)
429                 {
430                     tp = rnum.get_random_gaussian();
431                     rs1.add(tp);
432                     dat(i)=tp;
433                     xb += dat(i);
434                 }
435 
436                 xb = xb/n;
437 
438                 for(int i = 0; i < n; i++ )
439                 {
440                     sknum += pow(dat(i) - xb,3);
441                     skdenom += pow(dat(i) - xb,2);
442                     exkurnum += pow(dat(i) - xb,4);
443                     exkurdenom += pow(dat(i)-xb,2);
444                 }
445 
446                 sknum = sknum/n;
447                 skdenom = pow(skdenom/n,1.5);
448                 exkurnum = exkurnum/n;
449                 exkurdenom = pow(exkurdenom/n,2);
450 
451                 unbi_skew = sqrt(n*(n-1))/(n-2)*sknum/skdenom;
452                 unbi_exkur = (n-1)*((n+1)*(exkurnum/exkurdenom-3)+6)/((n-2)*(n-3));
453 
454                 dlog << LINFO << "Skew Diff: " <<  unbi_skew - rs1.skewness();
455                 dlog << LINFO << "Kur Diff: " << unbi_exkur - rs1.ex_kurtosis();
456 
457                 // Test an alternative implementation of the unbiased skewness and excess
458                 // kurtosis against the one in running_stats.
459                 DLIB_TEST(abs(unbi_skew - rs1.skewness()) < 1e-10);
460                 DLIB_TEST(abs(unbi_exkur - rs1.ex_kurtosis()) < 1e-10);
461             }
462         }
463 
test_randomize_samples()464         void test_randomize_samples()
465         {
466             std::vector<unsigned int> t(15),u(15),v(15);
467 
468             for (unsigned long i = 0; i < t.size(); ++i)
469             {
470                 t[i] = i;
471                 u[i] = i+1;
472                 v[i] = i+2;
473             }
474             randomize_samples(t,u,v);
475 
476             DLIB_TEST(t.size() == 15);
477             DLIB_TEST(u.size() == 15);
478             DLIB_TEST(v.size() == 15);
479 
480             for (unsigned long i = 0; i < t.size(); ++i)
481             {
482                 const unsigned long val = t[i];
483                 DLIB_TEST(u[i] == val+1);
484                 DLIB_TEST(v[i] == val+2);
485             }
486         }
test_randomize_samples2()487         void test_randomize_samples2()
488         {
489             dlib::matrix<int,15,1> t(15),u(15),v(15);
490 
491             for (long i = 0; i < t.size(); ++i)
492             {
493                 t(i) = i;
494                 u(i) = i+1;
495                 v(i) = i+2;
496             }
497             randomize_samples(t,u,v);
498 
499             DLIB_TEST(t.size() == 15);
500             DLIB_TEST(u.size() == 15);
501             DLIB_TEST(v.size() == 15);
502 
503             for (long i = 0; i < t.size(); ++i)
504             {
505                 const long val = t(i);
506                 DLIB_TEST(u(i) == val+1);
507                 DLIB_TEST(v(i) == val+2);
508             }
509         }
510 
another_test()511         void another_test()
512         {
513             std::vector<double> a;
514 
515             running_stats<double> rs1, rs2;
516 
517             for (int i = 0; i < 10; ++i)
518             {
519                 rs1.add(i);
520                 a.push_back(i);
521             }
522 
523             DLIB_TEST(std::abs(variance(mat(a)) - rs1.variance()) < 1e-13);
524             DLIB_TEST(std::abs(stddev(mat(a)) - rs1.stddev()) < 1e-13);
525             DLIB_TEST(std::abs(mean(mat(a)) - rs1.mean()) < 1e-13);
526 
527             for (int i = 10; i < 20; ++i)
528             {
529                 rs2.add(i);
530                 a.push_back(i);
531             }
532 
533             DLIB_TEST(std::abs(variance(mat(a)) - (rs1+rs2).variance()) < 1e-13);
534             DLIB_TEST(std::abs(mean(mat(a)) - (rs1+rs2).mean()) < 1e-13);
535             DLIB_TEST((rs1+rs2).current_n() == 20);
536 
537             running_scalar_covariance<double> rc1, rc2, rc3;
538             dlib::rand rnd;
539             for (double i = 0; i < 10; ++i)
540             {
541                 const double a = i + rnd.get_random_gaussian();
542                 const double b = i + rnd.get_random_gaussian();
543                 rc1.add(a,b);
544                 rc3.add(a,b);
545             }
546             for (double i = 11; i < 20; ++i)
547             {
548                 const double a = i + rnd.get_random_gaussian();
549                 const double b = i + rnd.get_random_gaussian();
550                 rc2.add(a,b);
551                 rc3.add(a,b);
552             }
553 
554             DLIB_TEST(std::abs((rc1+rc2).mean_x() - rc3.mean_x()) < 1e-13);
555             DLIB_TEST(std::abs((rc1+rc2).mean_y() - rc3.mean_y()) < 1e-13);
556             DLIB_TEST_MSG(std::abs((rc1+rc2).variance_x() - rc3.variance_x()) < 1e-13, std::abs((rc1+rc2).variance_x() - rc3.variance_x()));
557             DLIB_TEST(std::abs((rc1+rc2).variance_y() - rc3.variance_y()) < 1e-13);
558             DLIB_TEST(std::abs((rc1+rc2).covariance() - rc3.covariance()) < 1e-13);
559             DLIB_TEST((rc1+rc2).current_n() == rc3.current_n());
560 
561         }
562 
test_average_precision()563         void test_average_precision()
564         {
565             std::vector<bool> items;
566             DLIB_TEST(average_precision(items) == 1);
567             DLIB_TEST(average_precision(items,1) == 0);
568 
569             items.push_back(true);
570             DLIB_TEST(average_precision(items) == 1);
571             DLIB_TEST(std::abs(average_precision(items,1) - 0.5) < 1e-14);
572 
573             items.push_back(true);
574             DLIB_TEST(average_precision(items) == 1);
575             DLIB_TEST(std::abs(average_precision(items,1) - 2.0/3.0) < 1e-14);
576 
577             items.push_back(false);
578 
579             DLIB_TEST(average_precision(items) == 1);
580             DLIB_TEST(std::abs(average_precision(items,1) - 2.0/3.0) < 1e-14);
581 
582             items.push_back(true);
583 
584             DLIB_TEST(std::abs(average_precision(items) - (2.0+3.0/4.0)/3.0) < 1e-14);
585 
586             items.push_back(true);
587 
588             DLIB_TEST(std::abs(average_precision(items)   - (2.0 + 4.0/5.0 + 4.0/5.0)/4.0) < 1e-14);
589             DLIB_TEST(std::abs(average_precision(items,1) - (2.0 + 4.0/5.0 + 4.0/5.0)/5.0) < 1e-14);
590         }
591 
592 
593         template <typename sample_type>
check_distance_metrics(const std::vector<frobmetric_training_sample<sample_type>> & samples)594         void check_distance_metrics (
595             const std::vector<frobmetric_training_sample<sample_type> >& samples
596         )
597         {
598             running_stats<double> rs;
599             for (unsigned long i = 0; i < samples.size(); ++i)
600             {
601                 for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
602                 {
603                     const double d1 = length_squared(samples[i].anchor_vect - samples[i].near_vects[j]);
604                     for (unsigned long k = 0; k < samples[i].far_vects.size(); ++k)
605                     {
606                         const double d2 = length_squared(samples[i].anchor_vect - samples[i].far_vects[k]);
607                         rs.add(d2-d1);
608                     }
609                 }
610             }
611 
612             dlog << LINFO << "dist gap max:    "<< rs.max();
613             dlog << LINFO << "dist gap min:    "<< rs.min();
614             dlog << LINFO << "dist gap mean:   "<< rs.mean();
615             dlog << LINFO << "dist gap stddev: "<< rs.stddev();
616             DLIB_TEST(rs.min() >= 0.99);
617             DLIB_TEST(rs.mean() >= 0.9999);
618         }
619 
test_vector_normalizer_frobmetric(dlib::rand & rnd)620         void test_vector_normalizer_frobmetric(dlib::rand& rnd)
621         {
622             print_spinner();
623             typedef matrix<double,0,1> sample_type;
624             vector_normalizer_frobmetric<sample_type> normalizer;
625 
626             std::vector<frobmetric_training_sample<sample_type> > samples;
627             frobmetric_training_sample<sample_type> samp;
628 
629             const long key = 1;
630             const long dims = 5;
631             // Lets make some two class training data.  Each sample will have dims dimensions but
632             // only the one with index equal to key will be meaningful.  In particular, if the key
633             // dimension is > 0 then the sample is class +1 and -1 otherwise.
634 
635             long k = 0;
636             for (int i = 0; i < 50; ++i)
637             {
638                 samp.clear();
639                 samp.anchor_vect = gaussian_randm(dims,1,k++);
640                 if (samp.anchor_vect(key) > 0)
641                     samp.anchor_vect(key) = rnd.get_random_double() + 5;
642                 else
643                     samp.anchor_vect(key) = -(rnd.get_random_double() + 5);
644 
645                 matrix<double,0,1> temp;
646 
647                 for (int j = 0; j < 5; ++j)
648                 {
649                     // Don't always put an equal number of near_vects and far_vects vectors into the
650                     // training samples.
651                     const int numa = rnd.get_random_32bit_number()%2 + 1;
652                     const int numb = rnd.get_random_32bit_number()%2 + 1;
653 
654                     for (int num = 0; num < numa; ++num)
655                     {
656                         temp = gaussian_randm(dims,1,k++); temp(key) = 0.1;
657                         //temp = gaussian_randm(dims,1,k++); temp(key) = std::abs(temp(key));
658                         if (samp.anchor_vect(key) > 0) samp.near_vects.push_back(temp);
659                         else                    samp.far_vects.push_back(temp);
660                     }
661 
662                     for (int num = 0; num < numb; ++num)
663                     {
664                         temp = gaussian_randm(dims,1,k++); temp(key) = -0.1;
665                         //temp = gaussian_randm(dims,1,k++); temp(key) = -std::abs(temp(key));
666                         if (samp.anchor_vect(key) < 0) samp.near_vects.push_back(temp);
667                         else                    samp.far_vects.push_back(temp);
668                     }
669                 }
670                 samples.push_back(samp);
671             }
672 
673             normalizer.set_epsilon(0.0001);
674             normalizer.set_c(100);
675             normalizer.set_max_iterations(6000);
676             normalizer.train(samples);
677 
678             dlog << LINFO << "learned transform: \n" << normalizer.transform();
679 
680             matrix<double,0,1> total;
681 
682             for (unsigned long i = 0; i < samples.size(); ++i)
683             {
684                 samples[i].anchor_vect = normalizer(samples[i].anchor_vect);
685                 total += samples[i].anchor_vect;
686                 for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
687                     samples[i].near_vects[j] = normalizer(samples[i].near_vects[j]);
688                 for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j)
689                     samples[i].far_vects[j] = normalizer(samples[i].far_vects[j]);
690             }
691             total /= samples.size();
692             dlog << LINFO << "sample transformed means: "<< trans(total);
693             DLIB_TEST(length(total) < 1e-9);
694             check_distance_metrics(samples);
695 
696             // make sure serialization works
697             stringstream os;
698             serialize(normalizer, os);
699             vector_normalizer_frobmetric<sample_type> normalizer2;
700             deserialize(normalizer2, os);
701             DLIB_TEST(equal(normalizer.transform(), normalizer2.transform()));
702             DLIB_TEST(equal(normalizer.transformed_means(), normalizer2.transformed_means()));
703             DLIB_TEST(normalizer.in_vector_size() == normalizer2.in_vector_size());
704             DLIB_TEST(normalizer.out_vector_size() == normalizer2.out_vector_size());
705             DLIB_TEST(normalizer.get_max_iterations() == normalizer2.get_max_iterations());
706             DLIB_TEST(std::abs(normalizer.get_c() - normalizer2.get_c()) < 1e-14);
707             DLIB_TEST(std::abs(normalizer.get_epsilon() - normalizer2.get_epsilon()) < 1e-14);
708 
709         }
710 
prior_frobnorm_test()711         void prior_frobnorm_test()
712         {
713             frobmetric_training_sample<matrix<double,0,1> > sample;
714             std::vector<frobmetric_training_sample<matrix<double,0,1> > > samples;
715 
716             matrix<double,3,1> x, near_, far_;
717             x    = 0,0,0;
718             near_ = 1,0,0;
719             far_  = 0,1,0;
720 
721             sample.anchor_vect = x;
722             sample.near_vects.push_back(near_);
723             sample.far_vects.push_back(far_);
724 
725             samples.push_back(sample);
726 
727             vector_normalizer_frobmetric<matrix<double,0,1> > trainer;
728             trainer.set_c(100);
729             print_spinner();
730             trainer.train(samples);
731 
732             matrix<double,3,3> correct;
733             correct = 0, 0, 0,
734                       0, 1, 0,
735                       0, 0, 0;
736 
737             dlog << LDEBUG << trainer.transform();
738             DLIB_TEST(max(abs(trainer.transform()-correct)) < 1e-8);
739 
740             trainer.set_uses_identity_matrix_prior(true);
741             print_spinner();
742             trainer.train(samples);
743             correct = 1, 0, 0,
744                       0, 2, 0,
745                       0, 0, 1;
746 
747             dlog << LDEBUG << trainer.transform();
748             DLIB_TEST(max(abs(trainer.transform()-correct)) < 1e-8);
749 
750         }
751 
test_lda()752         void test_lda ()
753         {
754             // This test makes sure we pick the right direction in a simple 2D -> 1D LDA
755             typedef matrix<double,2,1> sample_type;
756 
757             std::vector<unsigned long> labels;
758             std::vector<sample_type> samples;
759             for (int i=0; i<4; i++)
760             {
761                 sample_type s;
762                 s(0) = i;
763                 s(1) = i+1;
764                 samples.push_back(s);
765                 labels.push_back(1);
766 
767                 sample_type s1;
768                 s1(0) = i+1;
769                 s1(1) = i;
770                 samples.push_back(s1);
771                 labels.push_back(2);
772             }
773 
774             matrix<double> X;
775             X.set_size(8,2);
776             for (int i=0; i<8; i++){
777                 X(i,0) = samples[i](0);
778                 X(i,1) = samples[i](1);
779             }
780 
781             matrix<double,0,1> mean;
782 
783             dlib::compute_lda_transform(X,mean,labels,1);
784 
785             std::vector<double> vals1, vals2;
786             for (unsigned long i = 0; i < samples.size(); ++i)
787             {
788                 double val = X*samples[i]-mean;
789                 if (i%2 == 0)
790                     vals1.push_back(val);
791                 else
792                     vals2.push_back(val);
793                 dlog << LINFO << "1D LDA output: " << val;
794             }
795 
796             if (vals1[0] > vals2[0])
797                 swap(vals1, vals2);
798 
799             const double err = equal_error_rate(vals1, vals2).first;
800             dlog << LINFO << "LDA ERR: " << err;
801             DLIB_TEST(err == 0);
802             DLIB_TEST(equal_error_rate(vals2, vals1).first == 1);
803         }
804 
test_equal_error_rate()805         void test_equal_error_rate()
806         {
807             auto result = equal_error_rate({}, {});
808             DLIB_TEST(result.first == 0);
809             DLIB_TEST(result.second == 0);
810 
811             // no error case
812             result = equal_error_rate({1,1,1}, {2,2,2});
813             DLIB_TEST_MSG(result.first == 0, result.first);
814             DLIB_TEST_MSG(result.second == 2, result.second);
815 
816             // max error case
817             result = equal_error_rate({2,2,2}, {1,1,1});
818             DLIB_TEST_MSG(result.first == 1, result.first);
819             DLIB_TEST_MSG(result.second == 2, result.second);
820             // Another way to have max error
821             result = equal_error_rate({1,1,1}, {1,1,1});
822             DLIB_TEST_MSG(result.second == 1, result.second);
823             DLIB_TEST_MSG(result.first == 1, result.first);
824 
825             // wildly unbalanced
826             result = equal_error_rate({}, {1,1,1});
827             DLIB_TEST_MSG(result.first == 0, result.first);
828 
829             // wildly unbalanced
830             result = equal_error_rate({1,1,1}, {});
831             DLIB_TEST_MSG(result.first == 0, result.first);
832 
833             // 25% error case
834             result = equal_error_rate({1,1,1,3}, {2, 2, 0, 2});
835             DLIB_TEST_MSG(result.first == 0.25, result.first);
836             DLIB_TEST_MSG(result.second == 2, result.second);
837         }
838 
test_running_stats_decayed()839         void test_running_stats_decayed()
840         {
841             print_spinner();
842             std::vector<double> tmp(300);
843             std::vector<double> tmp_var(tmp.size());
844             dlib::rand rnd;
845             const int num_rounds = 100000;
846             for (int rounds = 0; rounds < num_rounds; ++rounds)
847             {
848                 running_stats_decayed<double> rs(100);
849 
850                 for (size_t i = 0; i < tmp.size(); ++i)
851                 {
852                     rs.add(rnd.get_random_gaussian() + 1);
853                     tmp[i] += rs.mean();
854                     if (i > 0)
855                         tmp_var[i] += rs.variance();
856                 }
857             }
858 
859             // should print all 1s basically since the mean and variance should always be 1.
860             for (size_t i = 0; i < tmp.size(); ++i)
861             {
862                 DLIB_TEST(std::abs(1-tmp[i]/num_rounds) < 0.001);
863                 if (i > 1)
864                     DLIB_TEST(std::abs(1-tmp_var[i]/num_rounds) < 0.01);
865             }
866         }
867 
test_running_scalar_covariance_decayed()868         void test_running_scalar_covariance_decayed()
869         {
870             print_spinner();
871             std::vector<double> tmp(300);
872             std::vector<double> tmp_var(tmp.size());
873             std::vector<double> tmp_covar(tmp.size());
874             dlib::rand rnd;
875             const int num_rounds = 500000;
876             for (int rounds = 0; rounds < num_rounds; ++rounds)
877             {
878                 running_scalar_covariance_decayed<double> rs(100);
879 
880                 for (size_t i = 0; i < tmp.size(); ++i)
881                 {
882                     rs.add(rnd.get_random_gaussian() + 1, rnd.get_random_gaussian() + 1);
883                     tmp[i] += (rs.mean_y()+rs.mean_x())/2;
884                     if (i > 0)
885                     {
886                         tmp_var[i] += (rs.variance_y()+rs.variance_x())/2;
887                         tmp_covar[i] += rs.covariance();
888                     }
889                 }
890             }
891 
892             // should print all 1s basically since the mean and variance should always be 1.
893             for (size_t i = 0; i < tmp.size(); ++i)
894             {
895                 DLIB_TEST(std::abs(1-tmp[i]/num_rounds) < 0.001);
896                 if (i > 1)
897                 {
898                     DLIB_TEST(std::abs(1-tmp_var[i]/num_rounds) < 0.01);
899                     DLIB_TEST(std::abs(tmp_covar[i]/num_rounds) < 0.001);
900                 }
901             }
902         }
903 
test_probability_values_are_increasing()904         void test_probability_values_are_increasing() {
905             DLIB_TEST(probability_values_are_increasing(std::vector<double>{1,2,3,4,5,6,7,8}) > 0.99);
906             DLIB_TEST(probability_values_are_increasing(std::vector<double>{8,7,6,5,4,4,3,2}) < 0.01);
907             DLIB_TEST(probability_values_are_increasing_robust(std::vector<double>{1,2,3,4,5,6,7,8}) > 0.99);
908             DLIB_TEST(probability_values_are_increasing_robust(std::vector<double>{8,7,6,5,4,4,3,2}) < 0.01);
909             DLIB_TEST(probability_values_are_increasing(std::vector<double>{1,2,1e10,3,4,5,6,7,8}) < 0.3);
910             DLIB_TEST(probability_values_are_increasing_robust(std::vector<double>{1,2,1e100,3,4,5,6,7,8}) > 0.99);
911         }
912 
test_event_corr()913         void test_event_corr()
914         {
915             print_spinner();
916             DLIB_TEST(event_correlation(1000,1000,500,2000) == 0);
917             DLIB_TEST(std::abs(event_correlation(1000,1000,300,2000) + 164.565757010104) < 1e-11);
918             DLIB_TEST(std::abs(event_correlation(1000,1000,700,2000) - 164.565757010104) < 1e-11);
919 
920             DLIB_TEST(event_correlation(10,1000,5,2000) == 0);
921             DLIB_TEST(event_correlation(1000,10,5,2000) == 0);
922             DLIB_TEST(std::abs(event_correlation(10,1000,1,2000) - event_correlation(1000,10,1,2000)) < 1e-11);
923             DLIB_TEST(std::abs(event_correlation(10,1000,9,2000) - event_correlation(1000,10,9,2000)) < 1e-11);
924 
925             DLIB_TEST(std::abs(event_correlation(10,1000,1,2000) + 3.69672251700842) < 1e-11);
926             DLIB_TEST(std::abs(event_correlation(10,1000,9,2000) - 3.69672251700842) < 1e-11);
927         }
928 
perform_test()929         void perform_test (
930         )
931         {
932             prior_frobnorm_test();
933             dlib::rand rnd;
934             for (int i = 0; i < 5; ++i)
935                 test_vector_normalizer_frobmetric(rnd);
936 
937             test_random_subset_selector();
938             test_random_subset_selector2();
939             test_running_covariance();
940             test_running_cross_covariance();
941             test_running_cross_covariance_sparse();
942             test_running_stats();
943             test_skewness_and_kurtosis_1();
944             test_skewness_and_kurtosis_2();
945             test_randomize_samples();
946             test_randomize_samples2();
947             another_test();
948             test_average_precision();
949             test_lda();
950             test_event_corr();
951             test_running_stats_decayed();
952             test_running_scalar_covariance_decayed();
953             test_equal_error_rate();
954             test_probability_values_are_increasing();
955         }
956     } a;
957 
958 }
959 
960 
961