1 /* $Id: test_histogram.cpp 623450 2021-01-13 17:39:10Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Vladimir Ivanov
27 *
28 * File Description:
29 * Demo program for CHistogram class
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <util/data_histogram.hpp>
37 #include <random>
38
39 // must be last
40 #include <common/test_assert.h>
41
42 USING_NCBI_SCOPE;
43
44
45 class CDataHistogramDemoApp : public CNcbiApplication
46 {
47 public:
Init(void)48 void Init(void) {
49 unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
50 arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "CHistogram demo program");
51 SetupArgDescriptions(arg_desc.release());
52 // Init randomizer
53 rnd.seed(seed());
54 }
55 void EstimateNumberOfBins();
56 void SimpleLinear(void);
57 void SimpleLog(void);
58 void CombinedScale(void);
59 void CustomDataType(void);
60 void Clone(void);
61
Run(void)62 int Run(void) {
63 SimpleLinear();
64 SimpleLog();
65 CombinedScale();
66 CustomDataType();
67 EstimateNumberOfBins();
68 Clone();
69 return 0;
70 }
71
72 private:
73 std::random_device seed;
74 std::default_random_engine rnd;
75 };
76
77
78 // We use macro to deal with histograms for simplicity,
79 // it works with any CHistogram template instances.
80
81 // Simple run: add 100 integer random values to the histogram within
82 // specified range with some anomalies (< 5%).
83 //
84 #define RUN_INT(histogram, range_min, range_max) \
85 {{ \
86 int delta = int((float(range_max) - range_min)/10); \
87 if (!delta) delta = 2; \
88 ADD_INT_DATA(histogram, 100, 95, range_min - delta, range_max + delta, range_min, range_max); \
89 PRINT_STATS(histogram); \
90 }}
91
92 // Simple run: add 100 double random values to the histogram within
93 // specified range with some anomalies (< 5%).
94 //
95 #define RUN_DOUBLE(histogram, range_min, range_max) \
96 {{ \
97 double delta = (range_max - range_min)/10; \
98 ADD_DOUBLE_DATA(histogram, 100, 95, range_min - delta, range_max + delta, range_min, range_max); \
99 PRINT_STATS(histogram); \
100 }}
101
102 // Add values: 'n' integer samples in range [range_min, range_max] with 'percent'% values
103 // falling into major scale range [major_min, major_max].
104 //
105 #define ADD_INT_DATA(histogram, n, percent, range_min, range_max, major_min, major_max) \
106 {{ \
107 std::uniform_int_distribution<int> range_dist(range_min, range_max); \
108 std::uniform_int_distribution<int> major_dist(major_min, major_max); \
109 int v; \
110 ADD_DATA(histogram, n, percent, range_min, range_max, major_min, major_max); \
111 }}
112
113 // Add values: 'n' double samples in range [range_min, range_max] with 'percent'% values
114 // falling into major scale range [major_min, major_max].
115 //
116 #define ADD_DOUBLE_DATA(histogram, n, percent, range_min, range_max, major_min, major_max) \
117 {{ \
118 std::uniform_real_distribution<double> range_dist(range_min, range_max); \
119 std::uniform_real_distribution<double> major_dist(major_min, major_max); \
120 double v; \
121 ADD_DATA(histogram, n, percent, range_min, range_max, major_min, major_max); \
122 }}
123
124 #define ADD_DATA(histogram, n, percent, range_min, range_max, major_min, major_max) \
125 {{ \
126 size_t n_major = 0; \
127 size_t n_total = 1; \
128 \
129 for (size_t i = 0; i < n; i++) { \
130 if ( n_major * 100 / n_total <= percent) { \
131 v = major_dist(rnd); \
132 n_major++; \
133 } else { \
134 v = range_dist(rnd); \
135 } \
136 histogram.Add(v); \
137 n_total++; \
138 } \
139 }}
140
141
142 // Print test header
143 #define PRINT_HEADER(msg) \
144 cout << endl \
145 << string(70, '-') << endl \
146 << msg << endl \
147 << string(70, '-') << endl \
148 << endl
149
150 // Print histogram statistics.
151 #define PRINT_STATS(h) \
152 {{ \
153 cout << "Range : [" << h.GetMin() << ":" << h.GetMax() << "]" << endl; \
154 cout << "Number of bins : " << h.GetNumberOfBins() << endl; \
155 cout << "Starts : "; \
156 PrintArray<>(h.GetNumberOfBins(), h.GetBinStartsPtr()); \
157 cout << "Counters : "; \
158 PrintArray<>(h.GetNumberOfBins(), h.GetBinCountersPtr()); \
159 cout << "Total count : " << h.GetCount() << endl; \
160 cout << "Anomaly (lower): " << h.GetLowerAnomalyCount() << endl; \
161 cout << "Anomaly (upper): " << h.GetUpperAnomalyCount() << endl; \
162 cout << "Sum : " << h.GetSum() << endl; \
163 cout << endl; \
164 }}
165
166 template<typename T>
PrintArray(size_t n,const T * arr)167 void PrintArray(size_t n, const T* arr)
168 {
169 for (size_t i = 0; i < n; i++) {
170 cout << arr[i];
171 if (n > 1 && i < n - 1) { cout << ", "; };
172 }
173 cout << endl;
174 }
175
176
SimpleLinear(void)177 void CDataHistogramDemoApp::SimpleLinear(void)
178 {
179 PRINT_HEADER("Simple linear monotonic/simmetrical scales");
180 {{
181 cout << "Linear scale from 0 to 10, 10 bins with size 1 (10x1)" << endl;
182 CHistogram<int> h(0, 10, 10);
183 RUN_INT(h, 0, 10);
184 }}
185 {{
186 cout << "Linear scale from 0 to 50, 10 bins with size 5 (10x5)" << endl;
187 CHistogram<int> h(0, 50, 10);
188 RUN_INT(h, 0, 50);
189 }}
190 {{
191 cout << "Linear scale from 0 to 50, int scale, 11 bins (10x4 + 1x10)" << endl;
192 CHistogram<int> h(0, 50, 11);
193 RUN_INT(h, 0, 50);
194 }}
195 {{
196 cout << "Linear scale from 0 to 50, double scale, 11 bins with size 4.5454" << endl;
197 CHistogram<int, double> h(0, 50, 11);
198 RUN_INT(h, 0, 50);
199 }}
200 {{
201 cout << "Linear scale from 0 to 50, int symmetrical scale, 11 bins (5x4 + 1x10 + 5x4)" << endl;
202 CHistogram<int> h(0, 50, 11, CHistogram<>::eLinear, CHistogram<>::eSymmetrical);
203 RUN_INT(h, 0, 50);
204 }}
205 {{
206 cout << "Linear scale from 0 to 50, int symmetrical scale, 12 bins (1x5 + 10x4 + 1x5)" << endl;
207 CHistogram<int> h(0, 50, 12, CHistogram<>::eLinear, CHistogram<>::eSymmetrical);
208 RUN_INT(h, 0, 50);
209 }}
210 {{
211 // For double scale type the scale view doesn't matter, because we have no truncation
212 // all bins have same size -- so we have the same results as for eMonotonic.
213 cout << "Simple linear scale from 0 to 50, double symmetrical scale, 11 bins with size 4.5454" << endl;
214 typedef CHistogram<int, double> H;
215 H h(0, 50, 11, H::eLinear, H::eSymmetrical);
216 RUN_INT(h, 0, 50);
217 }}
218 }
219
220
SimpleLog(void)221 void CDataHistogramDemoApp::SimpleLog(void)
222 {
223 // We use a common logarithmic scales (eLog10) here for simplicity,
224 // but you feel free to use binary or natural logarithmic scales instead.
225
226 {{
227 PRINT_HEADER("Monotonic common logarithmic scale from 1 to 1000");
228
229 typedef CHistogram<unsigned int, double> H;
230 {{
231 H h(1, 1000, 3, H::eLog10, H::eMonotonic);
232 RUN_INT(h, 1, 1000);
233 }}
234 {{
235 H h(1, 1000, 6, H::eLog10, H::eMonotonic);
236 RUN_INT(h, 1, 1000);
237 }}
238 {{
239 H h(1, 1000, 20, H::eLog10, H::eMonotonic);
240 RUN_INT(h, 1, 1000);
241 }}
242 }}
243 {{
244 PRINT_HEADER("Monotonic common logarithmic scale from 0 to 1000");
245
246 typedef CHistogram<unsigned int, double> H;
247 {{
248 H h(0, 1000, 3, H::eLog10, H::eMonotonic);
249 RUN_INT(h, 0, 1000);
250 }}
251 {{
252 H h(0, 1000, 6, H::eLog10, H::eMonotonic);
253 RUN_INT(h, 0, 1000);
254 }}
255 {{
256 H h(0, 1000, 20, H::eLog10, H::eMonotonic);
257 RUN_INT(h, 0, 1000);
258 }}
259 }}
260 {{
261 PRINT_HEADER("Monotonic common logarithmic scale for negative numbers: from -1000 to -1");
262
263 typedef CHistogram<int, double> H;
264 {{
265 H h(-1000, -1, 3, H::eLog10, H::eMonotonic);
266 RUN_INT(h, -1000, -1);
267 }}
268 {{
269 H h(-1000, -1, 6, H::eLog10, H::eMonotonic);
270 RUN_INT(h, -1000, -1);
271 }}
272 {{
273 H h(-1000, -1, 20, H::eLog10, H::eMonotonic);
274 RUN_INT(h, -1000, -1);
275 }}
276 }}
277 {{
278 PRINT_HEADER("Monotonic common logarithmic scale for mixed range: -100 to +100");
279
280 typedef CHistogram<int, double> H;
281 {{
282 H h(-100, 100, 3, H::eLog10, H::eMonotonic);
283 RUN_INT(h, -100, 100);
284 }}
285 {{
286 H h(-100, 100, 6, H::eLog10, H::eMonotonic);
287 RUN_INT(h, -100, 100);
288 }}
289 {{
290 H h(-100, 100, 20, H::eLog10, H::eMonotonic);
291 RUN_INT(h, -100, 100);
292 }}
293 }}
294 {{
295 PRINT_HEADER("Monotonic common logarithmic scale for small numbers");
296
297 typedef CHistogram<double, double> H;
298 {{
299 H h(0.00001, 1, 3, H::eLog10, H::eMonotonic);
300 RUN_DOUBLE(h, 0.00001, 1);
301 }}
302 {{
303 H h(0.000000001, 1, 3, H::eLog10, H::eMonotonic);
304 RUN_DOUBLE(h, 0.000000001, 1);
305 }}
306 {{
307 H h(0.000000001, 0.001, 3, H::eLog10, H::eMonotonic);
308 RUN_DOUBLE(h, 0.000000001, 0.001);
309 }}
310 {{
311 H h(0.000000001, 100, 6, H::eLog10, H::eMonotonic);
312 RUN_DOUBLE(h, 0.000000001, 100);
313 }}
314 }}
315 {{
316 PRINT_HEADER("Symmetrical common logarithmic scale from 0 to 1000");
317
318 typedef CHistogram<unsigned int, double> H;
319
320 for (auto i = 1; i <= 7; i++) {
321 H h(0, 1000, i, H::eLog10, H::eSymmetrical);
322 RUN_INT(h, 0, 1000);
323 }
324 {{
325 H h(0, 1000, 20, H::eLog10, H::eSymmetrical);
326 RUN_INT(h, 0, 1000);
327 }}
328 {{
329 H h(0, 1000, 25, H::eLog10, H::eSymmetrical);
330 RUN_INT(h, 0, 1000);
331 }}
332 }}
333 {{
334 PRINT_HEADER("Symmetrical common logarithmic scale from -1000 to 1000");
335
336 typedef CHistogram<int, double> H;
337
338 for (auto i = 1; i <= 7; i++) {
339 H h(-1000, 1000, i, H::eLog10, H::eSymmetrical);
340 RUN_INT(h, -1000, 1000);
341 }
342 {{
343 H h(-1000, 1000, 20, H::eLog10, H::eSymmetrical);
344 RUN_INT(h, -1000, 1000);
345 }}
346 {{
347 H h(-1000, 1000, 25, H::eLog10, H::eSymmetrical);
348 RUN_INT(h, -1000, 1000);
349 }}
350 }}
351 {{
352 PRINT_HEADER("Monotonic common logarithmic scale for mixed range: -100 to +100");
353
354 typedef CHistogram<int, double> H;
355
356 for (auto i = 1; i <= 7; i++) {
357 H h(-100, 100, i, H::eLog10, H::eSymmetrical);
358 RUN_INT(h, -100, 100);
359 }
360 {{
361 H h(-100, 100, 20, H::eLog10, H::eSymmetrical);
362 RUN_INT(h, -100, 100);
363 }}
364 {{
365 H h(-100, 100, 25, H::eLog10, H::eSymmetrical);
366 RUN_INT(h, -100, 100);
367 }}
368 }}
369 {{
370 PRINT_HEADER("Monotonic common logarithmic scale for small numbers");
371
372 typedef CHistogram<double, double> H;
373
374 for (auto i = 1; i <= 7; i++) {
375 H h(0.000000001, 1, i, H::eLog10, H::eSymmetrical);
376 RUN_DOUBLE(h, 0.000000001, 1);
377 }
378 {{
379 H h(0.000000001, 1, 3, H::eLog10, H::eSymmetrical);
380 RUN_DOUBLE(h, 0.000000001, 1);
381 }}
382 {{
383 H h(0.000000001, 0.001, 3, H::eLog10, H::eSymmetrical);
384 RUN_DOUBLE(h, 0.000000001, 0.001);
385 }}
386 {{
387 H h(0.000000001, 0.001, 7, H::eLog10, H::eSymmetrical);
388 RUN_DOUBLE(h, 0.000000001, 0.001);
389 }}
390 {{
391 H h(0.000000001, 100, 10, H::eLog10, H::eSymmetrical);
392 RUN_DOUBLE(h, 0.000000001, 100);
393 }}
394 }}
395 }
396
397
CombinedScale(void)398 void CDataHistogramDemoApp::CombinedScale(void)
399 {
400 {{
401 typedef CHistogram<unsigned int, unsigned int> H;
402 {{
403 PRINT_HEADER("Combined scale: monotonic linear scale from 0 to 100 + 1L");
404 H h(50, 100, 5, H::eLinear, H::eMonotonic);
405 h.AddLeftScale(0, 10, H::eLinear);
406 RUN_INT(h, 0, 100);
407 }}
408 {{
409 PRINT_HEADER("Combined scale: monotonic linear scale from 0 to 100 + 1R");
410 H h(0, 50, 5, H::eLinear, H::eMonotonic);
411 h.AddRightScale(100, 10, H::eLinear);
412 RUN_INT(h, 0, 100);
413 }}
414 {{
415 PRINT_HEADER("Combined scale: monotonic linear scale from 0 to 100 + 2L + 2R");
416 H h(50, 60, 10, H::eLinear, H::eMonotonic); // [50:60] 10 x 1
417 h.AddLeftScale(20, 5, H::eLinear); // [20:50] 5 x 6
418 h.AddLeftScale(0, 4, H::eLinear); // [ 0:20] 4 x 5
419 h.AddRightScale(70, 5, H::eLinear); // [60:70] 5 x 2
420 h.AddRightScale(100, 6, H::eLinear); // [70:100] 6 x 5
421 RUN_INT(h, 0, 100);
422 }}
423 }}
424 {{
425 typedef CHistogram<int, double> H;
426 {{
427 PRINT_HEADER("Combined scale: monotonic linear scale from 0 to 10 (x10) + log10 to 1000 (x3)");
428 H h(0, 10, 10, H::eLinear, H::eMonotonic);
429 h.AddRightScale(1000, 3, H::eLog10);
430 RUN_INT(h, 0, 1000);
431 }}
432 {{
433 PRINT_HEADER("Combined scale: monotonic linear scale from 0 to -10 (x10) + log10 to -1000 (x3)");
434 H h(-10, 0, 10, H::eLinear, H::eMonotonic);
435 h.AddLeftScale(-1000, 3, H::eLog10);
436 RUN_INT(h, -1000, 0);
437 }}
438 {{
439 PRINT_HEADER("Combined scale: symmetrical log2 from 0 to 10 (x10) + log10 to 1000 (x5)");
440 H h(0, 10, 10, H::eLog2, H::eSymmetrical);
441 h.AddRightScale(1000, 5, H::eLog10);
442 RUN_INT(h, 0, 1000);
443 }}
444 }}
445 {{
446 {{
447 PRINT_HEADER("Combined scale: mix (int)");
448 typedef CHistogram<int> H;
449 H h(5, 100, 4, H::eLog, H::eSymmetrical);
450 h.AddLeftScale(0, 2, H::eLinear);
451 h.AddLeftScale(-5, 3, H::eLinear);
452 h.AddLeftScale(-1000, 3, H::eLog2);
453 h.AddLeftScale(-10000, 3, H::eLog10);
454 h.AddRightScale(110, 5, H::eLinear);
455 h.AddRightScale(200, 6, H::eLinear);
456 h.AddRightScale(10000, 10, H::eLog2);
457 RUN_INT(h, -1000, 1000);
458 }}
459 {{
460 // same as above, but no truncation due int type
461 PRINT_HEADER("Combined scale: mix (double)");
462 typedef CHistogram<double> H;
463 H h(5, 100, 4, H::eLog, H::eSymmetrical);
464 h.AddLeftScale(0, 2, H::eLinear);
465 h.AddLeftScale(-5, 3, H::eLinear);
466 h.AddLeftScale(-1000, 3, H::eLog2);
467 h.AddLeftScale(-10000, 3, H::eLog10);
468 h.AddRightScale(110, 5, H::eLinear);
469 h.AddRightScale(200, 6, H::eLinear);
470 h.AddRightScale(10000, 10, H::eLog2);
471 RUN_DOUBLE(h, -1000, 1000);
472 }}
473 }}
474 }
475
476
CustomDataType(void)477 void CDataHistogramDemoApp::CustomDataType(void)
478 {
479 // Custom data type should have:
480 // - operator T() -- to convert to scale type T ('double' in our case)
481 // - operator >()
482 //
483 struct SValue {
484 // Default constructor
485 SValue() : v1(0), v2(0) {};
486 // Constructor
487 SValue(size_t p1, size_t p2) : v1(p1), v2(p2) {};
488 // Conversion to double (scale type).
489 operator double(void) const {
490 return (double)v1 + (v2 == 0 ? 0 : 1/(double)v2);
491 }
492 // Comparison: operatior >
493 bool operator> (const SValue& other) const {
494 if (v1 > other.v1) return true;
495 if (v1 < other.v1) return false;
496 return v2 > other.v2;
497 }
498 // Some value(s)
499 size_t v1;
500 size_t v2;
501 };
502
503 PRINT_HEADER("Custom data type: symmetrical natural logarithmic scale from (0,0) to (100,0)");
504 typedef CHistogram<SValue, double, unsigned long> H;
505 H h(SValue(0,0), SValue(100,0), 10, H::eLog, H::eSymmetrical);
506
507 // Generate and add some random data
508 std::uniform_int_distribution<size_t> d1(0, 99);
509 std::uniform_int_distribution<size_t> d2(0, 9999);
510
511 for (size_t i = 0; i < 1000; i++) {
512 h.Add(SValue(d1(rnd),d2(rnd)));
513 }
514 // Print result
515 PRINT_STATS(h);
516 }
517
518
EstimateNumberOfBins(void)519 void CDataHistogramDemoApp::EstimateNumberOfBins(void)
520 {
521 PRINT_HEADER("Estimate number of bins");
522 typedef CHistogram<> H;
523
524 const int N = 12;
525 const size_t num[N] = { 1, 3, 7, 10, 20, 40, 60, 100, 500, 1000, 5000, 20000 };
526
527 cout << "N : ";
528 for (int i = 0; i < N; i++) {
529 cout << num[i] << " ";
530 }
531 cout << "\nSquareRoot : ";
532 for (int i = 0; i < N; i++) {
533 cout << H::EstimateNumberOfBins(num[i], H::eSquareRoot) << " ";
534 }
535 cout << "\nJuran : ";
536 for (int i = 0; i < N; i++) {
537 cout << H::EstimateNumberOfBins(num[i], H::eJuran) << " ";
538 }
539 cout << "\nSturge : ";
540 for (int i = 0; i < N; i++) {
541 cout << H::EstimateNumberOfBins(num[i], H::eSturge) << " ";
542 }
543 cout << "\nRice : ";
544 for (int i = 0; i < N; i++) {
545 cout << H::EstimateNumberOfBins(num[i], H::eRice) << " ";
546 }
547 cout << endl;
548 }
549
550
Clone(void)551 void CDataHistogramDemoApp::Clone(void)
552 {
553 PRINT_HEADER("CloneStructure() and move semantics");
554 typedef CHistogram<> H;
555
556 H h(0, 10, 10, H::eLinear);
557 RUN_INT(h, 0, 10);
558
559 // clone
560 {{
561 H hclone(h.Clone());
562 PRINT_STATS(hclone);
563 }}
564 {{
565 H hclone;
566 hclone = h.Clone();
567 PRINT_STATS(hclone);
568 }}
569 {{
570 H hclone(0, 100000000, 5, H::eLog10);
571 hclone = h.Clone(H::eCloneStructureOnly);
572 PRINT_STATS(hclone);
573 }}
574
575 // clone and steal counters
576 {{
577 // Create clone and add counters to it
578 H hclone(h.Clone(H::eCloneStructureOnly));
579 for (size_t i = 0; i < 10; i++) {
580 hclone.Add(rand() % 10);
581 }
582 PRINT_STATS(hclone);
583 // Move counters to original histogram (add)
584 h.StealCountersFrom(hclone);
585 PRINT_STATS(h);
586 }}
587 }
588
589
main(int argc,char ** argv)590 int main(int argc, char** argv)
591 {
592 return CDataHistogramDemoApp().AppMain(argc, argv);
593 }
594