1 /*  $Id: test_histogram.cpp 623450 2021-01-13 17:39:10Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Vladimir Ivanov
27 *
28 * File Description:
29 *   Demo program for CHistogram class
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <util/data_histogram.hpp>
37 #include <random>
38 
39 // must be last
40 #include <common/test_assert.h>
41 
42 USING_NCBI_SCOPE;
43 
44 
45 class CDataHistogramDemoApp : public CNcbiApplication
46 {
47 public:
Init(void)48     void Init(void) {
49         unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
50         arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "CHistogram demo program");
51         SetupArgDescriptions(arg_desc.release());
52         // Init randomizer
53         rnd.seed(seed());
54     }
55     void EstimateNumberOfBins();
56     void SimpleLinear(void);
57     void SimpleLog(void);
58     void CombinedScale(void);
59     void CustomDataType(void);
60     void Clone(void);
61 
Run(void)62     int Run(void) {
63         SimpleLinear();
64         SimpleLog();
65         CombinedScale();
66         CustomDataType();
67         EstimateNumberOfBins();
68         Clone();
69         return 0;
70     }
71 
72 private:
73     std::random_device seed;
74     std::default_random_engine rnd;
75 };
76 
77 
78 // We use macro to deal with histograms for simplicity,
79 // it works with any CHistogram template instances.
80 
81 // Simple run: add 100 integer random values to the histogram within
82 // specified range with some anomalies (< 5%).
83 //
84 #define RUN_INT(histogram, range_min, range_max) \
85     {{ \
86         int delta = int((float(range_max) - range_min)/10); \
87         if (!delta) delta = 2; \
88         ADD_INT_DATA(histogram, 100, 95, range_min - delta, range_max + delta, range_min, range_max); \
89         PRINT_STATS(histogram); \
90     }}
91 
92 // Simple run: add 100 double random values to the histogram within
93 // specified range with some anomalies (< 5%).
94 //
95 #define RUN_DOUBLE(histogram, range_min, range_max) \
96     {{ \
97         double delta = (range_max - range_min)/10; \
98         ADD_DOUBLE_DATA(histogram, 100, 95, range_min - delta, range_max + delta, range_min, range_max); \
99         PRINT_STATS(histogram); \
100     }}
101 
102 // Add values: 'n' integer samples in range [range_min, range_max] with 'percent'% values
103 // falling into major scale range [major_min, major_max].
104 //
105 #define ADD_INT_DATA(histogram, n, percent, range_min, range_max, major_min, major_max) \
106     {{ \
107         std::uniform_int_distribution<int> range_dist(range_min, range_max); \
108         std::uniform_int_distribution<int> major_dist(major_min, major_max); \
109         int v; \
110         ADD_DATA(histogram, n, percent, range_min, range_max, major_min, major_max); \
111     }}
112 
113 // Add values: 'n' double samples in range [range_min, range_max] with 'percent'% values
114 // falling into major scale range [major_min, major_max].
115 //
116 #define ADD_DOUBLE_DATA(histogram, n, percent, range_min, range_max, major_min, major_max) \
117     {{ \
118         std::uniform_real_distribution<double> range_dist(range_min, range_max); \
119         std::uniform_real_distribution<double> major_dist(major_min, major_max); \
120         double v; \
121         ADD_DATA(histogram, n, percent, range_min, range_max, major_min, major_max); \
122     }}
123 
124 #define ADD_DATA(histogram, n, percent, range_min, range_max, major_min, major_max) \
125     {{ \
126         size_t n_major = 0; \
127         size_t n_total = 1; \
128         \
129         for (size_t i = 0; i < n; i++) { \
130             if ( n_major * 100 / n_total <= percent) { \
131                 v = major_dist(rnd); \
132                 n_major++; \
133             } else { \
134                 v = range_dist(rnd); \
135             } \
136             histogram.Add(v); \
137             n_total++; \
138         } \
139     }}
140 
141 
142 // Print test header
143 #define PRINT_HEADER(msg) \
144     cout << endl \
145          << string(70, '-') << endl \
146          << msg << endl \
147          << string(70, '-') << endl \
148          << endl
149 
150 // Print histogram statistics.
151 #define PRINT_STATS(h) \
152     {{ \
153         cout << "Range          : [" << h.GetMin() << ":" << h.GetMax() << "]" << endl; \
154         cout << "Number of bins : "  << h.GetNumberOfBins() << endl; \
155         cout << "Starts         : "; \
156         PrintArray<>(h.GetNumberOfBins(), h.GetBinStartsPtr()); \
157         cout << "Counters       : "; \
158         PrintArray<>(h.GetNumberOfBins(), h.GetBinCountersPtr()); \
159         cout << "Total count    : "  << h.GetCount()             << endl; \
160         cout << "Anomaly (lower): "  << h.GetLowerAnomalyCount() << endl; \
161         cout << "Anomaly (upper): "  << h.GetUpperAnomalyCount() << endl; \
162         cout << "Sum            : "  << h.GetSum() << endl; \
163         cout << endl; \
164     }}
165 
166 template<typename T>
PrintArray(size_t n,const T * arr)167 void PrintArray(size_t n, const T* arr)
168 {
169     for (size_t i = 0; i < n; i++) {
170         cout << arr[i];
171         if (n > 1  &&  i < n - 1) { cout << ", "; };
172     }
173     cout << endl;
174 }
175 
176 
SimpleLinear(void)177 void CDataHistogramDemoApp::SimpleLinear(void)
178 {
179     PRINT_HEADER("Simple linear monotonic/simmetrical scales");
180     {{
181         cout << "Linear scale from 0 to 10, 10 bins with size 1 (10x1)" << endl;
182         CHistogram<int> h(0, 10, 10);
183         RUN_INT(h, 0, 10);
184     }}
185     {{
186         cout << "Linear scale from 0 to 50, 10 bins with size 5 (10x5)" << endl;
187         CHistogram<int> h(0, 50, 10);
188         RUN_INT(h, 0, 50);
189     }}
190     {{
191         cout << "Linear scale from 0 to 50, int scale, 11 bins (10x4 + 1x10)" << endl;
192         CHistogram<int> h(0, 50, 11);
193         RUN_INT(h, 0, 50);
194     }}
195     {{
196         cout << "Linear scale from 0 to 50, double scale, 11 bins with size 4.5454" << endl;
197         CHistogram<int, double> h(0, 50, 11);
198         RUN_INT(h, 0, 50);
199     }}
200     {{
201         cout << "Linear scale from 0 to 50, int symmetrical scale, 11 bins (5x4 + 1x10 + 5x4)" << endl;
202         CHistogram<int> h(0, 50, 11, CHistogram<>::eLinear, CHistogram<>::eSymmetrical);
203         RUN_INT(h, 0, 50);
204     }}
205     {{
206         cout << "Linear scale from 0 to 50, int symmetrical scale, 12 bins (1x5 + 10x4 + 1x5)" << endl;
207         CHistogram<int> h(0, 50, 12, CHistogram<>::eLinear, CHistogram<>::eSymmetrical);
208         RUN_INT(h, 0, 50);
209     }}
210     {{
211         // For double scale type the scale view doesn't matter, because we have no truncation
212         // all bins have same size -- so we have the same results as for eMonotonic.
213         cout << "Simple linear scale from 0 to 50, double symmetrical scale, 11 bins with size 4.5454" << endl;
214         typedef CHistogram<int, double> H;
215         H h(0, 50, 11, H::eLinear, H::eSymmetrical);
216         RUN_INT(h, 0, 50);
217     }}
218 }
219 
220 
SimpleLog(void)221 void CDataHistogramDemoApp::SimpleLog(void)
222 {
223     // We use a common logarithmic scales (eLog10) here for simplicity,
224     // but you feel free to use binary or natural logarithmic scales instead.
225 
226    {{
227         PRINT_HEADER("Monotonic common logarithmic scale from 1 to 1000");
228 
229         typedef CHistogram<unsigned int, double> H;
230         {{
231             H h(1, 1000, 3, H::eLog10, H::eMonotonic);
232             RUN_INT(h, 1, 1000);
233         }}
234         {{
235             H h(1, 1000, 6, H::eLog10, H::eMonotonic);
236             RUN_INT(h, 1, 1000);
237         }}
238         {{
239             H h(1, 1000, 20, H::eLog10, H::eMonotonic);
240             RUN_INT(h, 1, 1000);
241         }}
242     }}
243     {{
244         PRINT_HEADER("Monotonic common logarithmic scale from 0 to 1000");
245 
246         typedef CHistogram<unsigned int, double> H;
247         {{
248             H h(0, 1000, 3, H::eLog10, H::eMonotonic);
249             RUN_INT(h, 0, 1000);
250         }}
251         {{
252             H h(0, 1000, 6, H::eLog10, H::eMonotonic);
253             RUN_INT(h, 0, 1000);
254         }}
255         {{
256             H h(0, 1000, 20, H::eLog10, H::eMonotonic);
257             RUN_INT(h, 0, 1000);
258         }}
259     }}
260     {{
261         PRINT_HEADER("Monotonic common logarithmic scale for negative numbers: from -1000 to -1");
262 
263         typedef CHistogram<int, double> H;
264         {{
265             H h(-1000, -1, 3, H::eLog10, H::eMonotonic);
266             RUN_INT(h, -1000, -1);
267         }}
268         {{
269             H h(-1000, -1, 6, H::eLog10, H::eMonotonic);
270             RUN_INT(h, -1000, -1);
271         }}
272         {{
273             H h(-1000, -1, 20, H::eLog10, H::eMonotonic);
274             RUN_INT(h, -1000, -1);
275         }}
276     }}
277     {{
278         PRINT_HEADER("Monotonic common logarithmic scale for mixed range: -100 to +100");
279 
280         typedef CHistogram<int, double> H;
281         {{
282             H h(-100, 100, 3, H::eLog10, H::eMonotonic);
283             RUN_INT(h, -100, 100);
284         }}
285         {{
286             H h(-100, 100, 6, H::eLog10, H::eMonotonic);
287             RUN_INT(h, -100, 100);
288         }}
289         {{
290             H h(-100, 100, 20, H::eLog10, H::eMonotonic);
291             RUN_INT(h, -100, 100);
292         }}
293     }}
294     {{
295         PRINT_HEADER("Monotonic common logarithmic scale for small numbers");
296 
297         typedef CHistogram<double, double> H;
298         {{
299             H h(0.00001, 1, 3, H::eLog10, H::eMonotonic);
300             RUN_DOUBLE(h, 0.00001, 1);
301         }}
302         {{
303             H h(0.000000001, 1, 3, H::eLog10, H::eMonotonic);
304             RUN_DOUBLE(h, 0.000000001, 1);
305         }}
306         {{
307             H h(0.000000001, 0.001, 3, H::eLog10, H::eMonotonic);
308             RUN_DOUBLE(h, 0.000000001, 0.001);
309         }}
310         {{
311             H h(0.000000001, 100, 6, H::eLog10, H::eMonotonic);
312             RUN_DOUBLE(h, 0.000000001, 100);
313         }}
314     }}
315     {{
316         PRINT_HEADER("Symmetrical common logarithmic scale from 0 to 1000");
317 
318         typedef CHistogram<unsigned int, double> H;
319 
320         for (auto i = 1; i <= 7; i++) {
321             H h(0, 1000, i, H::eLog10, H::eSymmetrical);
322             RUN_INT(h, 0, 1000);
323         }
324         {{
325             H h(0, 1000, 20, H::eLog10, H::eSymmetrical);
326             RUN_INT(h, 0, 1000);
327         }}
328         {{
329             H h(0, 1000, 25, H::eLog10, H::eSymmetrical);
330             RUN_INT(h, 0, 1000);
331         }}
332     }}
333     {{
334         PRINT_HEADER("Symmetrical common logarithmic scale from -1000 to 1000");
335 
336         typedef CHistogram<int, double> H;
337 
338         for (auto i = 1; i <= 7; i++) {
339             H h(-1000, 1000, i, H::eLog10, H::eSymmetrical);
340             RUN_INT(h, -1000, 1000);
341         }
342         {{
343             H h(-1000, 1000, 20, H::eLog10, H::eSymmetrical);
344             RUN_INT(h, -1000, 1000);
345         }}
346         {{
347             H h(-1000, 1000, 25, H::eLog10, H::eSymmetrical);
348             RUN_INT(h, -1000, 1000);
349         }}
350     }}
351     {{
352         PRINT_HEADER("Monotonic common logarithmic scale for mixed range: -100 to +100");
353 
354         typedef CHistogram<int, double> H;
355 
356         for (auto i = 1; i <= 7; i++) {
357             H h(-100, 100, i, H::eLog10, H::eSymmetrical);
358             RUN_INT(h, -100, 100);
359         }
360         {{
361             H h(-100, 100, 20, H::eLog10, H::eSymmetrical);
362             RUN_INT(h, -100, 100);
363         }}
364         {{
365             H h(-100, 100, 25, H::eLog10, H::eSymmetrical);
366             RUN_INT(h, -100, 100);
367         }}
368     }}
369     {{
370         PRINT_HEADER("Monotonic common logarithmic scale for small numbers");
371 
372         typedef CHistogram<double, double> H;
373 
374         for (auto i = 1; i <= 7; i++) {
375             H h(0.000000001, 1, i, H::eLog10, H::eSymmetrical);
376             RUN_DOUBLE(h, 0.000000001, 1);
377         }
378         {{
379             H h(0.000000001, 1, 3, H::eLog10, H::eSymmetrical);
380             RUN_DOUBLE(h, 0.000000001, 1);
381         }}
382         {{
383             H h(0.000000001, 0.001, 3, H::eLog10, H::eSymmetrical);
384             RUN_DOUBLE(h, 0.000000001, 0.001);
385         }}
386         {{
387             H h(0.000000001, 0.001, 7, H::eLog10, H::eSymmetrical);
388             RUN_DOUBLE(h, 0.000000001, 0.001);
389         }}
390         {{
391             H h(0.000000001, 100, 10, H::eLog10, H::eSymmetrical);
392             RUN_DOUBLE(h, 0.000000001, 100);
393         }}
394     }}
395 }
396 
397 
CombinedScale(void)398 void CDataHistogramDemoApp::CombinedScale(void)
399 {
400     {{
401         typedef CHistogram<unsigned int, unsigned int> H;
402         {{
403             PRINT_HEADER("Combined scale: monotonic linear scale from 0 to 100 + 1L");
404             H h(50, 100, 5, H::eLinear, H::eMonotonic);
405             h.AddLeftScale(0, 10, H::eLinear);
406             RUN_INT(h, 0, 100);
407         }}
408         {{
409             PRINT_HEADER("Combined scale: monotonic linear scale from 0 to 100 + 1R");
410             H h(0, 50, 5, H::eLinear, H::eMonotonic);
411             h.AddRightScale(100, 10, H::eLinear);
412             RUN_INT(h, 0, 100);
413         }}
414         {{
415             PRINT_HEADER("Combined scale: monotonic linear scale from 0 to 100 + 2L + 2R");
416             H h(50, 60, 10, H::eLinear, H::eMonotonic);  // [50:60]  10 x 1
417             h.AddLeftScale(20, 5, H::eLinear);           // [20:50]   5 x 6
418             h.AddLeftScale(0, 4, H::eLinear);            // [ 0:20]   4 x 5
419             h.AddRightScale(70, 5, H::eLinear);          // [60:70]   5 x 2
420             h.AddRightScale(100, 6, H::eLinear);         // [70:100]  6 x 5
421             RUN_INT(h, 0, 100);
422         }}
423     }}
424     {{
425         typedef CHistogram<int, double> H;
426         {{
427             PRINT_HEADER("Combined scale: monotonic linear scale from 0 to 10 (x10) + log10 to 1000 (x3)");
428             H h(0, 10, 10, H::eLinear, H::eMonotonic);
429             h.AddRightScale(1000, 3, H::eLog10);
430             RUN_INT(h, 0, 1000);
431         }}
432         {{
433             PRINT_HEADER("Combined scale: monotonic linear scale from 0 to -10 (x10) + log10 to -1000 (x3)");
434             H h(-10, 0, 10, H::eLinear, H::eMonotonic);
435             h.AddLeftScale(-1000, 3, H::eLog10);
436             RUN_INT(h, -1000, 0);
437         }}
438         {{
439             PRINT_HEADER("Combined scale: symmetrical log2 from 0 to 10 (x10) + log10 to 1000 (x5)");
440             H h(0, 10, 10, H::eLog2, H::eSymmetrical);
441             h.AddRightScale(1000, 5, H::eLog10);
442             RUN_INT(h, 0, 1000);
443         }}
444     }}
445     {{
446         {{
447             PRINT_HEADER("Combined scale: mix (int)");
448             typedef CHistogram<int> H;
449             H h(5, 100, 4, H::eLog, H::eSymmetrical);
450             h.AddLeftScale(0, 2, H::eLinear);
451             h.AddLeftScale(-5, 3, H::eLinear);
452             h.AddLeftScale(-1000, 3, H::eLog2);
453             h.AddLeftScale(-10000, 3, H::eLog10);
454             h.AddRightScale(110, 5, H::eLinear);
455             h.AddRightScale(200, 6, H::eLinear);
456             h.AddRightScale(10000, 10, H::eLog2);
457             RUN_INT(h, -1000, 1000);
458         }}
459         {{
460             // same as above, but no truncation due int type
461             PRINT_HEADER("Combined scale: mix (double)");
462             typedef CHistogram<double> H;
463             H h(5, 100, 4, H::eLog, H::eSymmetrical);
464             h.AddLeftScale(0, 2, H::eLinear);
465             h.AddLeftScale(-5, 3, H::eLinear);
466             h.AddLeftScale(-1000, 3, H::eLog2);
467             h.AddLeftScale(-10000, 3, H::eLog10);
468             h.AddRightScale(110, 5, H::eLinear);
469             h.AddRightScale(200, 6, H::eLinear);
470             h.AddRightScale(10000, 10, H::eLog2);
471             RUN_DOUBLE(h, -1000, 1000);
472         }}
473     }}
474 }
475 
476 
CustomDataType(void)477 void CDataHistogramDemoApp::CustomDataType(void)
478 {
479     // Custom data type should have:
480     //   - operator T() -- to convert to scale type T ('double' in our case)
481     //   - operator >()
482     //
483     struct SValue {
484         // Default constructor
485         SValue() : v1(0), v2(0) {};
486         // Constructor
487         SValue(size_t p1, size_t p2) : v1(p1), v2(p2) {};
488         // Conversion to double (scale type).
489         operator double(void) const {
490             return (double)v1 + (v2 == 0 ? 0 : 1/(double)v2);
491         }
492         // Comparison: operatior >
493         bool operator> (const SValue& other) const {
494             if (v1 > other.v1) return true;
495             if (v1 < other.v1) return false;
496             return v2 > other.v2;
497         }
498         // Some value(s)
499         size_t v1;
500         size_t v2;
501     };
502 
503     PRINT_HEADER("Custom data type: symmetrical natural logarithmic scale from (0,0) to (100,0)");
504     typedef CHistogram<SValue, double, unsigned long> H;
505     H h(SValue(0,0), SValue(100,0), 10, H::eLog, H::eSymmetrical);
506 
507     // Generate and add some random data
508     std::uniform_int_distribution<size_t>  d1(0, 99);
509     std::uniform_int_distribution<size_t>  d2(0, 9999);
510 
511     for (size_t i = 0; i < 1000; i++) {
512         h.Add(SValue(d1(rnd),d2(rnd)));
513     }
514     // Print result
515     PRINT_STATS(h);
516 }
517 
518 
EstimateNumberOfBins(void)519 void CDataHistogramDemoApp::EstimateNumberOfBins(void)
520 {
521     PRINT_HEADER("Estimate number of bins");
522     typedef CHistogram<> H;
523 
524     const int N = 12;
525     const size_t num[N] = { 1, 3, 7, 10, 20, 40, 60, 100, 500, 1000, 5000, 20000 };
526 
527     cout << "N          : ";
528     for (int i = 0; i < N; i++) {
529         cout << num[i] << " ";
530     }
531     cout << "\nSquareRoot : ";
532     for (int i = 0; i < N; i++) {
533         cout << H::EstimateNumberOfBins(num[i], H::eSquareRoot) << " ";
534     }
535     cout << "\nJuran      : ";
536     for (int i = 0; i < N; i++) {
537         cout << H::EstimateNumberOfBins(num[i], H::eJuran) << " ";
538     }
539     cout << "\nSturge     : ";
540     for (int i = 0; i < N; i++) {
541         cout << H::EstimateNumberOfBins(num[i], H::eSturge) << " ";
542     }
543     cout << "\nRice       : ";
544     for (int i = 0; i < N; i++) {
545         cout << H::EstimateNumberOfBins(num[i], H::eRice) << " ";
546     }
547     cout << endl;
548 }
549 
550 
Clone(void)551 void CDataHistogramDemoApp::Clone(void)
552 {
553     PRINT_HEADER("CloneStructure() and move semantics");
554     typedef CHistogram<> H;
555 
556     H h(0, 10, 10, H::eLinear);
557     RUN_INT(h, 0, 10);
558 
559     // clone
560     {{
561         H hclone(h.Clone());
562         PRINT_STATS(hclone);
563     }}
564     {{
565         H hclone;
566         hclone = h.Clone();
567         PRINT_STATS(hclone);
568     }}
569     {{
570         H hclone(0, 100000000, 5, H::eLog10);
571         hclone = h.Clone(H::eCloneStructureOnly);
572         PRINT_STATS(hclone);
573     }}
574 
575     // clone and steal counters
576     {{
577         // Create clone and add counters to it
578         H hclone(h.Clone(H::eCloneStructureOnly));
579         for (size_t i = 0; i < 10; i++) {
580             hclone.Add(rand() % 10);
581         }
582         PRINT_STATS(hclone);
583         // Move counters to original histogram (add)
584         h.StealCountersFrom(hclone);
585         PRINT_STATS(h);
586     }}
587 }
588 
589 
main(int argc,char ** argv)590 int main(int argc, char** argv)
591 {
592     return CDataHistogramDemoApp().AppMain(argc, argv);
593 }
594