1 #ifndef DATA_HISTOGRAM__HPP
2 #define DATA_HISTOGRAM__HPP
3 
4 /*  $Id: data_histogram.hpp 623452 2021-01-13 17:39:53Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author:  Vladimir Ivanov
30  *
31  */
32 
33 /// @file data_histogram.hpp
34 /// Frequency histogram for data distribution of the numerical samples.
35 
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbistl.hpp>
38 
39 #define _USE_MATH_DEFINES // to define math constants in math.h
40 #include <math.h>         // log/pow functions
41 #include <cmath>          // additional overloads for pow()
42 #include <memory>
43 #include <mutex>
44 
45 
46 /** @addtogroup Statistics
47  *
48  * @{
49  */
50 
51 BEGIN_NCBI_SCOPE
52 
53 
54 
55 /////////////////////////////////////////////////////////////////////////////
56 ///
57 /// Helper types for CHistogram<>::GetSum() support
58 //
59 
60 // if T doesn't have 'foo' method  (operator+= in our case) with the signature
61 // that allows to compile the bellow expression then instantiating this template
62 // is Substitution Failure (SF) which Is Not An Error (INAE) if this happens
63 // during overload resolution
64 template <typename T>
65 using T_HistogramValueTypeHavePlus = decltype((T&)(std::declval<T>().operator+=(std::declval<const T&>())));
66 
67 // Checks that T is arithmetic type or have operator+= implemented
68 template <typename T>
g_HistogramValueTypeHavePlus()69 constexpr bool g_HistogramValueTypeHavePlus()
70 {
71     return cxx_is_supported<T_HistogramValueTypeHavePlus, T>::value  ||  std::is_arithmetic<T>::value;
72 }
73 
74 
75 /////////////////////////////////////////////////////////////////////////////
76 ///
77 /// CHistogram -- collect the distribution of the numerical data samples.
78 ///
79 /// CHistogram <TValue, TScale, TCounter>
80 ///
81 /// TValue -   type of values that distribution will be collected.
82 ///            This can be any numerical or user defined type, if it allow
83 ///            comparison and can be converted to scale type TScale, see note.
84 ///
85 /// TScale -   numerical type used to calculate bins starting positions and sizes.
86 ///            For integer types some scales, logarithmic for examples, will lead
87 ///            to truncating values and creating unequal bins, so TScale
88 ///            should be float/double based.
89 ///
90 /// TCounter - type to store counters. Usually this is a positive integer type:
91 ///            int, unsigned int, long, size_t, Uint8 and etc.
92 ///
93 /// @note
94 ///   TValue and TScale types should be equal, or allow comparison and conversion
95 ///   between them. Any types can be used as TValue if it have:
96 ///   = default constructor: TValue()  -- for initialization;
97 ///   - operator TScale() const        -- to convert to scale type TScale;
98 ///   - bool operator >(const TValue&) -- for comparison.
99 ///
100 /// @note
101 ///   CHistogram is not MT safe by default, it is intended to be created
102 ///   and collect data withing the same thread. If you want to use the same
103 ///   CHistogram class object across multiple threads, you need to provide
104 ///   some sort of MT protection to guard access to the histogram object,
105 ///   or enable internal MT protection, and call CHistogram::EnableMT()
106 ///   immediately after creating a histogram object.
107 
108 
109 template <typename TValue = int, typename TScale = TValue, typename TCounter = Uint8>
110 class CHistogram
111 {
112 public:
113     /// Scale type.
114     /// Most common is a linear scale, where each bin have the same size.
115     /// It is good if you have a limited range, but sometimes values are distributed
116     /// very wide, and if you want to count all of them, but concentrate on a
117     /// some range with most significant values, logarithmic scales helps a lot.
118     /// For logarithmic scales each next bin have a greater size.
119     /// Bins size increasing depends on a logarithmic base (used scale type).
120     ///
121     enum EScaleType {
122         eLinear = 1, ///< Arithmetic or linear scale
123         eLog,        ///< Natural logarithmic scale with a base e ~ 2.72
124         eLog2,       ///< Binary logarithmic scale with a base 2
125         eLog10       ///< Common logarithmic scale with a base 10
126     };
127 
128     /// Methods to build bins for a specified scale.
129     /// Applies to the main scale, defined in constructor, only.
130     enum EScaleView {
131         /// Use specified scale method to calculate bins sizes from a minimum
132         /// to a maximum value.
133         eMonotonic,
134         /// Determine a mean for a specified value range and calculates
135         /// bins sizes using specified scale to both sides from it, symmetrically.
136         eSymmetrical
137     };
138 
139 
140     /////////////////////////////////////////////////////////////////////////
141     // Initialization
142 
143     /// Constructor.
144     ///
145     /// @param min_value
146     ///   Minimum allowed value in range [min_value, max_value].
147     /// @param max_value
148     ///   Maximum allowed value in range [min_value, max_value].
149     /// @param n_bins
150     ///   Number of bins for the histogram (main scale).
151     /// @param scale_type
152     ///   Predefined scale type. Corresponding scale function will be used
153     ///   to calculate scale's bin ranges.
154     /// @param scale_view
155     ///   Scale view (symmetrical or monotonic).
156     /// @sa EScaleType, EScaleView, AddLeftScale, AddRightScale, Add
157     ///
158     CHistogram(TValue min_value, TValue max_value, unsigned n_bins,
159                EScaleType scale_type = eLinear,
160                EScaleView scale_view = eMonotonic);
161 
162     /// Add auxiliary left/right scales.
163     ///
164     /// The CHistogram constructor defines a main scale for some data range.
165     /// It can be very limited to get more precise results for the value distribution.
166     /// You can get the number of hits whose values fall outside the range of the main
167     /// scale using GetLowerAnomalyCount()/GetUpperAnomalyCount() methods,
168     /// but each returns a single number of hits outside of the range.
169     /// The other method is to use auxiliary scale(s) to count the number of hits
170     /// for the less significant range(s). You can add any number of scales from each side.
171     /// So, you can use linear scale with much greater bin sizes, or logarithmic scales,
172     /// that allow to cover a very wide range of data with a limited number of bins.
173     /// See EScaleType for details.
174     ///
175     /// @param min_value
176     ///   Minimum allowed value for the auxiliary scale.
177     ///   Its maximum value is the same as a minimum value for the main scale,
178     ///   or previously added scale.
179     /// @param n_bins
180     ///   Number of bins for the auxiliary scale.
181     /// @param scale_type
182     ///   Predefined scale type. Corresponding scale function will be used
183     ///   to calculate scale's bin ranges.
184     /// @note
185     ///   It is not allowed to add left/right auxiliary scales after starting counting hits.
186     ///   Please use these methods before Add(), or after Reset().
187     /// @sa CHistogram::CHistogram, AddRightScale, EScaleType, GetLowerAnomalyCount, GetUpperAnomalyCount
188     ///
189     void AddLeftScale (TValue min_value, unsigned n_bins, EScaleType scale_type);
190     void AddRightScale(TValue max_value, unsigned n_bins, EScaleType scale_type);
191 
192 
193     /////////////////////////////////////////////////////////////////////////
194     // Populating
195 
196     /// Sum type: double for all floating points TValue types, int64_t/uint64_t for integral, and TValue otherwise.
197     using TIntegral   = typename std::conditional<std::numeric_limits   <TValue>::is_signed, int64_t,     uint64_t >::type;
198     using TArithmetic = typename std::conditional<std::is_floating_point<TValue>::value,     double,      TIntegral>::type;
199     using TSum        = typename std::conditional<std::is_arithmetic    <TValue>::value,     TArithmetic, TValue   >::type;
200 
201     /// Add value to the data distribution.
202     /// Try to find an appropriate bin for a specified value and increase its counter on 1.
203     /// Also, calculates a sum of all added values if TValue type have addition support.
204     /// @sa GetStarts, GetCounters, GetSum
205     ///
206     template <typename V, typename S = TSum,
207               std::enable_if_t<g_HistogramValueTypeHavePlus<S>(), int> = 0>
Add(const V & v)208     void Add(const V& v) {
209         MT_Lock();
210         x_Add(v);
211         // Calculate sum, TSum have operator+=
212         m_Sum += v;
213         MT_Unlock();
214     }
215     template <typename V, typename S = TSum,
216              std::enable_if_t<!g_HistogramValueTypeHavePlus<S>(), int> = 0>
Add(const V & v)217     void Add(const V& v) {
218         MT_Lock();
219         x_Add(v);
220         MT_Unlock();
221     }
222 
223     // Return value for a class member, MT safe
224     #define RETURN_MT_SAFE(member) \
225         if (m_IsMT) {    \
226             MT_Lock();       \
227             auto v = member; \
228             MT_Unlock();     \
229             return v;        \
230         }                    \
231         return member
232 
233     /// Return the sum of all added values.
234     /// @return
235     ///   Returned type depends on TValue type, and converts to:
236     ///     - double   -- for all floating point types;
237     ///     - int64_t  -- signed integral types;
238     ///     - uint64_t -- unsigned integral types;
239     ///     - TValue   -- for all other types.
240     ///   The sum can be calculated for floating, integral types and all other
241     ///   TValue types that have 'operator +=' defined.
242     ///   If TValue type doesn't have such operator defined, empty value {} is returned.
243     ///   Note, compiler can convert this empty {} value to TScale type, because every TValue
244     ///   should have 'operator TScale() const' by design.
245     /// @note
246     ///   This method doesn't check calculated sum on overflow.
247     /// @sa Add, TSum
GetSum(void) const248     TSum GetSum(void) const { RETURN_MT_SAFE(m_Sum); }
249 
250     /// Reset all data counters.
251     void Reset();
252 
253     /// Add counters from 'other' histogram to this histogram, 'other' doesn't changes.
254     /// @note Both histograms should have the same structure.
255     /// @sa Clone, Reset, StealCountersFrom
256     void AddCountersFrom(const CHistogram& other);
257 
258     /// Add counters from 'other' histogram to this histogram,
259     /// then reset the counters of 'other' histogram.
260     /// @note Both histograms should have the same structure.
261     /// @sa Clone, Reset, AddCountersFrom
262     void StealCountersFrom(CHistogram& other);
263 
264 
265     /////////////////////////////////////////////////////////////////////////
266     // Structure
267 
268     /// Get the lower bound of the combined scale.
GetMin() const269     TValue GetMin() const { return m_Min; }
270 
271     /// Get the upper bound of the combined scale.
GetMax() const272     TValue GetMax() const { return m_Max; }
273 
274     /// Return the number ot bins on the combined scale.
275     /// @sa GetBinStarts, GetBinCounters, GetBinCountersPtr
GetNumberOfBins() const276     unsigned GetNumberOfBins() const { return m_NumBins; }
277 
278     /// Get starting positions for bins on the combined scale.
279     ///
280     /// Populate a vector with starting positions for bins on the combined scale.
281     /// The size of the vector changes to be equal the number of bins.
282     /// @sa GetNumberOfBins, GetBinCounters, GetBinStartsPtr
GetBinStarts(vector<TScale> & positions)283     void GetBinStarts(vector<TScale>& positions) {
284         MT_Lock();
285         positions.resize(m_NumBins);
286         positions.assign(m_Starts.get(), m_Starts.get() + m_NumBins);
287         MT_Unlock();
288     }
289 
290     /// Get starting positions for bins on the combined scale (not MT safe).
291     ///
292     /// Returns a pointer to array. The number of bins can be obtained with GetNumberOfBins().
293     /// and a number of a counters in each bin -- with GetBinCounters().
294     /// @warning
295     ///   Be aware that any change in the histogram's structure, adding additional scales,
296     ///   invalidate all data and pointer itself. This method is not intended to use in MT
297     ///   environment. Please use vector version GetBinStarts(vector<TScale>&).
298     /// @sa GetNumberOfBins, GetBinCountersPtr, GetBinStarts, EnableMT
GetBinStartsPtr() const299     const TScale* GetBinStartsPtr() const { return m_Starts.get(); }
300 
301 
302     /////////////////////////////////////////////////////////////////////////
303     // Results
304 
305     /// Get counters for the combined scale's bins.
306     ///
307     /// Populate a vector with counters for the combined scale's bins.
308     /// The size of the vector changes to be equal to the number of bins.
309     /// @sa GetNumberOfBins, GetBinStarts, GetBinCountersPtr, Add
GetBinCounters(vector<TCounter> & counters)310     void GetBinCounters(vector<TCounter>& counters) {
311         MT_Lock();
312         counters.resize(m_NumBins);
313         counters.assign(m_Counters.get(), m_Counters.get() + m_NumBins);
314         MT_Unlock();
315     }
316 
317     /// Get counters for the combined scale's bins (not MT safe).
318     ///
319     /// Returns a pointer to array. The number of bins can be obtained with GetNumberOfBins().
320     /// @warning
321     ///   Be aware that any change in the histogram's structure invalidate all data
322     ///   and pointer itself. This method is not intended to use in MT environment.
323     ///   Please use vector version GetBinCounters(vector<TCounter>&).
324     /// @sa GetNumberOfBins, GetBinStartsPtr, GetBinCounters, Add, EnableMT
GetBinCountersPtr() const325     const TCounter* GetBinCountersPtr() const { return m_Counters.get(); }
326 
327     /// Get total number of hits whose value fell between GetMin() and GetMax().
328     /// The number of hits whose values fall outside that range can be obtained
329     /// using GetLowerAnomalyCount() and GetUpperAnomalyCount() methods.
330     /// @sa GetMin, GetMax, GetLowerAnomalyCount, GetUpperAnomalyCount, GetBinCounters
GetCount() const331     TCounter GetCount() const { RETURN_MT_SAFE(m_Count); }
332 
333     /// Get number of hits whose values were less than GetMin().
334     /// @sa GetUpperAnomalyCount, GetCount
GetLowerAnomalyCount() const335     size_t GetLowerAnomalyCount() const {RETURN_MT_SAFE(m_LowerAnomalyCount); }
336 
337     /// Get number of hits whose values were greater than GetMax().
338     /// @sa GetLowerAnomalyCount, GetCount
GetUpperAnomalyCount() const339     size_t GetUpperAnomalyCount() const { RETURN_MT_SAFE(m_UpperAnomalyCount); }
340 
341 
342     /////////////////////////////////////////////////////////////////////////
343     // Helpers
344 
345     /// Rules to calculate an estimated numbers of bins on the base of the expected
346     /// number of observations.
347     /// Note, that there is no "best" number of bins, and different bin sizes can reveal
348     /// different features of the data. All these methods generally make strong assumptions
349     /// about the shape of the distribution. Depending on the actual data distribution
350     /// and the goals of the analysis, different bin widths may be appropriate,
351     /// so experimentation is usually needed to determine an appropriate width.
352     /// @note
353     ///   All methods applies to the linear scale only, that have a fixed bin sizes.
354     /// @sa
355     ///   EstimateNumberOfBins
356     ///
357     enum EEstimateNumberOfBinsRule {
358         /// Square root rule. Used by Excel histograms and many others.
359         /// Default value for EstimateNumberOfBins() as well.
360         eSquareRoot = 0,
361         /// Juran's "Quality Control Handbook" that provide guidelines
362         /// to select the number of bins for histograms.
363         eJuran,
364         /// Herbert Sturge's rule. It works best for continuous data that is
365         /// normally distributed and symmetrical. As long as your data is not skewed,
366         /// using Sturge's rule should give you a nice-looking, easy to read
367         /// histogram that represents the data well.
368         eSturge,
369         /// Rice's rule. Presented as a simple alternative to Sturge's rule.
370         eRice
371     };
372 
373     /// Estimate numbers of bins on the base of the expected number of observations 'n'.
374     /// @note
375     ///   Applies to the linear scale only.
376     /// @sa
377     ///   EEstimateNumberOfBinsRules
378     static unsigned EstimateNumberOfBins(size_t n, EEstimateNumberOfBinsRule rule = 0);
379 
380 
381     /////////////////////////////////////////////////////////////////////////
382     // Move semantics
383 
384     /// Default constructor.
385     ///
386     /// Creates empty object. The object itself is invalid without min/max values,
387     /// and any scale. Should be used with conjunction of Clone() only.
388     /// @sa Clone
389     ///
390     CHistogram(void);
391 
392     /// Move constructor.
393     ///
394     /// Move 'other' histogram data to the current object. 'other' became invalid.
395     /// @example
396     ///     CHistogram<> h1(min, max, n, type);
397     ///     CHistogram<> h2(h1.Clone(how));
398     /// @sa Clone
399     ///
400     CHistogram(CHistogram&& other);
401 
402     /// Move assignment operator.
403     ///
404     /// Move 'other' histogram data to the current object. 'other' became invalid.
405     /// @example
406     ///     CHistogram<> h1(min, max, n, type);
407     ///     CHistogram<> h2;
408     ///     h2 = h1.Clone(how);
409     /// @sa Clone
410     ///
411     CHistogram& operator=(CHistogram&& other);
412 
413     enum EClone {
414         eCloneAll = 0,       ///< Clone whole histogram, with scale and counters
415         eCloneStructureOnly  ///< Clone structure only (the counters will be zeroed)
416     };
417 
418     /// Clone histogram structure.
419     ///
420     /// Creates a copy of the histogram structure depending on clone method.
421     /// By default it is a whole copy, but you can clone histogram's structure only, without counters.
422     ///
423     CHistogram Clone(EClone how = eCloneAll) const;
424 
425 
426     /////////////////////////////////////////////////////////////////////////
427     // Optional MT protection
428 
429     /// Add MT protection to histogram.
430     ///
431     /// By default CHistogram is not MT protected and intended to be created
432     /// and used withing the same thread. Calling this method add MT protection
433     /// on adding additional scales, adding and resetting values, cloning and
434     /// moving histograms. Please call this method immediately after creating
435     /// each histogram, before accessing it from any other thread, or deadlock can occurs.
436     ///
EnableMT(void)437     void EnableMT(void) {
438         m_IsMT = true;
439     };
440     /// MT locking
MT_Lock() const441     void MT_Lock() const {
442         if (m_IsMT) m_Mutex.lock();
443     }
444     // MT unlocking
MT_Unlock() const445     void MT_Unlock() const {
446         if (m_IsMT) m_Mutex.unlock();
447     }
448 
449 protected:
450     // Note, none of x_*() methods have MT locking, it should be done in public methods only.
451 
452     /// Calculate bins starting positions.
453     /// Calculate positions for 'n' number of bins in [start,end] value range,
454     /// starting from 'pos' bin index.
455     /// If start_value > end_value, calculating going from the right to left.
456     /// This is applicable for symmetrical scales, or multi-scales,
457     /// with left scale added.
458     void x_CalculateBins(TScale start_value, TScale end_value,
459                          unsigned pos, unsigned n,
460                          EScaleType scale_type, EScaleView scale_view);
461 
462     /// Calculate bins starting positions for a linear scale.
463     /// Account for an integer TScale type and calculation truncation.
464     void x_CalculateBinsLinear(
465                          TScale start_value, TScale end_value, TScale step,
466                          TScale* arr, unsigned pos, unsigned n,
467                          EScaleView scale_view);
468 
469     /// Calculate bins starting positions for a logarithmic scale.
470     void x_CalculateBinsLog(
471                          TScale start_value, TScale end_value,
472                          TScale* arr, unsigned pos, unsigned n,
473                          EScaleType scale_type, EScaleView scale_view);
474 
475     /// Scale function.
476     /// Calculates scale position on a base of data value.
477     TScale x_Func(EScaleType scale_type, TScale value);
478 
479     /// Inverse scale function.
480     /// Calculates a data value on a base of scale value/position.
481     TScale x_FuncInverse(EScaleType scale_type, TScale scale_value);
482 
483     /// Add value to the data distribution (internal version without locking).
484     void x_Add(TValue value);
485 
486     /// Add value to the data distribution using a linear search method.
487     /// Usually faster than bisection method on a small number of bins,
488     /// or if the values have a tendency to fall into the starting bins
489     /// for a used scale.
490     /// @sa Add, x_AddBisection
491     void x_AddLinear(TValue value);
492 
493     /// Add value to the data distribution using a bisection search method.
494     /// Usually faster on a long scales with a lot of bins.
495     /// @sa Add, x_AddLinear
496     void x_AddBisection(TValue value);
497 
498     /// Move data from 'other' histogram. 'other' became invalid.
499     void x_MoveFrom(CHistogram& other);
500 
501     /// Add counters from 'other' histogram.
502     void x_AddCountersFrom(CHistogram& other);
503 
504     /// Check that 'a' and 'b' scale values are equal (or almost equal for floating scales).
505     bool x_IsEqual(TScale a, TScale b);
506 
507     /// Reset all data counters (internal version without locking).
508     void x_Reset();
509 
510     /// Prevent copying.
511     /// See move constructor and move assignment operator to use with move-semantics.
512     CHistogram(const CHistogram&);
513     CHistogram& operator=(const CHistogram&);
514 
515 protected:
516     TValue    m_Min;         ///< Minimum value (the lower bound of combined scale)
517     TValue    m_Max;         ///< Maximum value (the upper bound of combined scale)
518     unsigned  m_NumBins;     ///< Number of bins (m_Starts[]/m_Counts[] length)
519     TSum      m_Sum = {};    ///< Sum of the all added values (if applicable for TValue)
520 
521     std::unique_ptr<TScale[]>   m_Starts;    ///< Combined scale: starting bins positions
522     std::unique_ptr<TCounter[]> m_Counters;  ///< Combined scale: counters - the number of measurements for each bin
523 
524     TCounter  m_Count;              ///< Number of counted values (sum all m_Counters[])
525     TCounter  m_LowerAnomalyCount;  ///< Number of anomaly values < m_Min
526     TCounter  m_UpperAnomalyCount;  ///< Number of anomaly values > m_Max
527 
528     bool      m_IsMT;               ///< MT protection flag
529     mutable std::mutex m_Mutex;     ///< MT protection mutex
530 };
531 
532 
533 
534 /// A series of same-structured histograms covering logarithmically (base 2)
535 /// increasing time periods... roughly
536 ///
537 template <typename TValue, typename TScale, typename TCounter>
538 class CHistogramTimeSeries
539 {
540 public:
541     /// @param model_histogram
542     ///   This histogram will be used as a template for all histogram objects
543     ///   in this time series.
544     using THistogram = CHistogram<TValue,TScale,TCounter>;
545     CHistogramTimeSeries(THistogram& model_histogram);
546 
547     /// Add value to the data distribution.
548     /// Try to find an appropriate bin for a specified value and increment its counter by 1.
549     /// @sa CHistogram::Add()
550     void Add(TValue value);
551 
552     /// Merge the most recent (now active) histogram data into the time series.
553     /// This method is generally supposed to be called periodically, thus
554     /// defining the unit of time (tick) as far as this API is concerned.
555     void Rotate();
556 
557     /// Reset to the initial state
558     void Reset();
559 
560     /// Type of the unit of time
561     using TTicks = unsigned int;
562 
563     /// A histograms which covers a certain number of ticks
564     struct STimeBin {
565         // Copy constructor and assignment operator should be passed with 'const'
566         // qualifier to operate on list<STimeBin>. But Clone() is not 'const'
567         // due optional protection, so we use const_cast<> here. It is safe,
568         // because Clone() change internal mutex only.
STimeBinCHistogramTimeSeries::STimeBin569         STimeBin(const STimeBin& other) :
570             histogram(other.histogram.Clone(THistogram::eCloneAll)),
571             n_ticks(other.n_ticks)
572         {}
operator =CHistogramTimeSeries::STimeBin573         STimeBin& operator=(const STimeBin& other)
574         {
575             histogram = other.histogram.Clone(THistogram::eCloneAll);
576             n_ticks = other.n_ticks;
577         }
STimeBinCHistogramTimeSeries::STimeBin578         STimeBin(THistogram&& h, TTicks t) :
579             histogram(std::move(h)), n_ticks(t)
580         {}
581 
582         THistogram histogram;  ///< Histogram for the ticks
583         TTicks     n_ticks;    ///< Number of ticks in this histogram
584     };
585     /// Type of the series of histograms
586     using TTimeBins = list<STimeBin>;
587 
588     /// Histograms -- in the order from the most recent to the least recent
589     TTimeBins GetHistograms() const;
590 
591     /// Number of ticks the histogram series has handled.
592     /// Initially the number of ticks is zero.
GetCurrentTick(void) const593     TTicks GetCurrentTick(void) const { return m_CurrentTick; }
594 
595 private:
596     void x_AppendBin(const THistogram& model_histogram, TTicks n_ticks);
597     void x_Shift(size_t index, typename TTimeBins::iterator current_it);
598 
599 private:
600     TTimeBins           m_TimeBins;
601     mutable std::mutex  m_Mutex;         // for Rotate() and GetHistograms()
602     TTicks              m_CurrentTick;
603 };
604 
605 
606 /* @} */
607 
608 
609 //////////////////////////////////////////////////////////////////////////////
610 //
611 // Inline
612 //
613 
614 template <typename TValue, typename TScale, typename TCounter>
CHistogram(TValue min_value,TValue max_value,unsigned n_bins,EScaleType scale_type,EScaleView scale_view)615 CHistogram<TValue, TScale, TCounter>::CHistogram
616 (
617     TValue      min_value,
618     TValue      max_value,
619     unsigned    n_bins,
620     EScaleType  scale_type,
621     EScaleView  scale_view
622 )
623     : m_Min(min_value), m_Max(max_value), m_NumBins(n_bins), m_IsMT(false)
624 {
625     if ( m_Min > m_Max ) {
626         NCBI_THROW(CCoreException, eInvalidArg, "Minimum value cannot exceed maximum value");
627     }
628     if ( !m_NumBins ) {
629         NCBI_THROW(CCoreException, eInvalidArg, "Number of bins cannot be zero");
630     }
631     // Allocate memory
632     m_Starts.reset(new TScale[m_NumBins]);
633     m_Counters.reset(new TCounter[m_NumBins]);
634 
635     x_CalculateBins(m_Min, m_Max, 0, m_NumBins, scale_type, scale_view);
636     // Reset counters
637     x_Reset();
638 }
639 
640 
641 template <typename TValue, typename TScale, typename TCounter>
642 void
AddLeftScale(TValue min_value,unsigned n_bins,EScaleType scale_type)643 CHistogram<TValue, TScale, TCounter>::AddLeftScale(
644     TValue min_value, unsigned n_bins, EScaleType scale_type)
645 {
646     MT_Lock();
647 
648     if ( min_value >= m_Min ) {
649         MT_Unlock();
650         NCBI_THROW(CCoreException, eInvalidArg, "New minimum value cannot exceed minimum value for the histogram");
651     }
652     if ( !n_bins ) {
653         MT_Unlock();
654         NCBI_THROW(CCoreException, eInvalidArg, "Number of bins cannot be zero");
655     }
656     if (m_Count + m_LowerAnomalyCount + m_UpperAnomalyCount) {
657         MT_Unlock();
658         NCBI_THROW(CCoreException, eInvalidArg, "Please call AddLeftScale() before Add()");
659     }
660     unsigned n_prev = m_NumBins;
661     m_NumBins += n_bins;
662 
663     // Reallocate memory for starting positions and counters
664 
665     std::unique_ptr<TScale[]> tmp_starts(new TScale[m_NumBins]);
666     memcpy(tmp_starts.get() + n_bins, m_Starts.get(), sizeof(TScale) * n_prev);
667     m_Starts.swap(tmp_starts);
668     m_Counters.reset(new TCounter[m_NumBins]);
669     memset(m_Counters.get(), 0, m_NumBins * sizeof(TCounter));
670 
671     // Calculate scale for newly added bins: from right to left
672     x_CalculateBins(m_Min, min_value, n_bins-1, n_bins, scale_type, eMonotonic);
673     m_Min = min_value;
674 
675     MT_Unlock();
676 }
677 
678 
679 template <typename TValue, typename TScale, typename TCounter>
680 void
AddRightScale(TValue max_value,unsigned n_bins,EScaleType scale_type)681 CHistogram<TValue, TScale, TCounter>::AddRightScale(
682     TValue max_value, unsigned n_bins, EScaleType scale_type)
683 {
684     MT_Lock();
685 
686     if ( max_value <= m_Max ) {
687         MT_Unlock();
688         NCBI_THROW(CCoreException, eInvalidArg, "New maximum value cannot be less than a maximum value for the histogram");
689     }
690     if ( !n_bins ) {
691         MT_Unlock();
692         NCBI_THROW(CCoreException, eInvalidArg, "Number of bins cannot be zero");
693     }
694     if (m_Count + m_LowerAnomalyCount + m_UpperAnomalyCount) {
695         MT_Unlock();
696         NCBI_THROW(CCoreException, eInvalidArg, "Please call AddRightScale() before Add()");
697     }
698     unsigned n_prev = m_NumBins;
699     m_NumBins += n_bins;
700 
701     // Reallocate memory for starting positions and counters
702 
703     std::unique_ptr<TScale[]> tmp_starts(new TScale[m_NumBins]);
704     memcpy(tmp_starts.get(), m_Starts.get(), sizeof(TScale) * n_prev);
705     m_Starts.swap(tmp_starts);
706     m_Counters.reset(new TCounter[m_NumBins]);
707     memset(m_Counters.get(), 0, m_NumBins * sizeof(TCounter));
708 
709     // Calculate scale for newly added bins: from left to right
710     x_CalculateBins(m_Max, max_value, n_prev, n_bins, scale_type, eMonotonic);
711     m_Max = max_value;
712 
713     MT_Unlock();
714 }
715 
716 
717 template <typename TValue, typename TScale, typename TCounter>
718 unsigned
EstimateNumberOfBins(size_t n,EEstimateNumberOfBinsRule rule)719 CHistogram<TValue, TScale, TCounter>::EstimateNumberOfBins(size_t n, EEstimateNumberOfBinsRule rule)
720 {
721     switch (rule) {
722     case eSquareRoot:
723         return (unsigned) ceil(sqrt(n));
724     case eJuran:
725         // It have no info for n < 20, but 5 is a reasonable minimum
726         if (n <    20) return  5;
727         if (n <=   50) return  6;
728         if (n <=  100) return  7;
729         if (n <=  200) return  8;
730         if (n <=  500) return  9;
731         if (n <= 1000) return 10;
732         return 11;  // handbook: 11-20  for 1000+ observations
733     case eSturge:
734         return 1 + (unsigned) ceil(log2(n));
735     case eRice:
736         return (unsigned) ceil(pow(n, double(1)/3));
737     }
738     _TROUBLE;
739     // unreachable, just to avoid warning
740     return 0;
741 }
742 
743 
744 template <typename TValue, typename TScale, typename TCounter>
745 void
Reset(void)746 CHistogram<TValue, TScale, TCounter>::Reset(void)
747 {
748     MT_Lock();
749     x_Reset();
750     MT_Unlock();
751 }
752 
753 
754 template <typename TValue, typename TScale, typename TCounter>
755 void
x_Reset(void)756 CHistogram<TValue, TScale, TCounter>::x_Reset(void)
757 {
758     // Reset counters
759     m_Count = m_LowerAnomalyCount = m_UpperAnomalyCount = 0;
760     // Reset bins
761     memset(m_Counters.get(), 0, m_NumBins * sizeof(TCounter));
762     // Reset sum (it can be any type, so use default constructor)
763     m_Sum = {};
764 }
765 
766 
767 template <typename TValue, typename TScale, typename TCounter>
768 void
x_Add(TValue value)769 CHistogram<TValue, TScale, TCounter>::x_Add(TValue value)
770 {
771     if (value < m_Min) {
772         m_LowerAnomalyCount++;
773         return;
774     }
775     if (value > m_Max) {
776         m_UpperAnomalyCount++;
777         return;
778     }
779     if (m_NumBins < 50) {
780         x_AddLinear(value);
781         return;
782     }
783     x_AddBisection(value);
784 }
785 
786 
787 template <typename TValue, typename TScale, typename TCounter>
788 void
x_AddLinear(TValue value)789 CHistogram<TValue, TScale, TCounter>::x_AddLinear(TValue value)
790 {
791     size_t i = 1;
792     while (i < m_NumBins) {
793         if (value < m_Starts[i]) {
794             break;
795         }
796         i++;
797     }
798     // The current bin with index 'i' have a greater value, so put value into the previous bin
799     m_Counters[i-1]++;
800     m_Count++;
801 }
802 
803 
804 template <typename TValue, typename TScale, typename TCounter>
805 void
x_AddBisection(TValue value)806 CHistogram<TValue, TScale, TCounter>::x_AddBisection(TValue value)
807 {
808     // Bisection search
809 
810     size_t left = 0, right = m_NumBins;
811     size_t i = 0, d;
812 
813     while (d = (right - left), d > 1) {
814         i = left + d / 2;
815         if (value < m_Starts[i]) {
816             right = i;
817         } else {
818             left = i;
819         }
820     }
821     // Algorithm can finish on the current or next bin, depending on even/odd number of bins
822     if (value < m_Starts[i]) {
823         i--;
824     }
825     m_Counters[i]++;
826     m_Count++;
827 }
828 
829 
830 template <typename TValue, typename TScale, typename TCounter>
831 void
x_CalculateBins(TScale start_value,TScale end_value,unsigned pos,unsigned n,EScaleType scale_type,EScaleView scale_view)832 CHistogram<TValue, TScale, TCounter>::x_CalculateBins
833 (
834     TScale      start_value,
835     TScale      end_value,
836     unsigned    pos,
837     unsigned    n,
838     EScaleType  scale_type,
839     EScaleView  scale_view
840 )
841 {
842     const char* errmsg_step = "Impossible to calculate scale step, please change TScale type, range, or number of bins";
843     const char* errmsg_dup  = "Impossible to calculate scales bin starting position, please change TScale type, range, or number of bins";
844 
845     TScale* arr = m_Starts.get();
846 
847     if (scale_type == eLinear) {
848         // Special processing for linear scales to account for
849         // an integer TScale type and calculation truncation.
850         TScale step = (max(start_value, end_value) - min(start_value, end_value)) / n;
851         if ( step == 0 ) {
852             NCBI_THROW(CCoreException, eCore, errmsg_step);
853         }
854         x_CalculateBinsLinear(start_value, end_value, step, arr, pos, n, scale_view);
855         return;
856     }
857 
858     // Log scales only
859     _ASSERT( scale_type == eLog   ||
860              scale_type == eLog2  ||
861              scale_type == eLog10 );
862 
863     x_CalculateBinsLog(start_value, end_value, arr, pos, n, scale_type, scale_view);
864 
865     // Check that we don't have bins with the same starting positions.
866     // This can happens mostly if TScale is an integer type due truncation.
867     // Linear scale doesn't need this, because we check 'step' instead (see above).
868     // Checking whole combined scale.
869     //
870     for (unsigned i = 1; i < m_NumBins; i++) {
871         if ( arr[i] <= arr[i-1] ) {
872             NCBI_THROW(CCoreException, eCore, errmsg_dup);
873         }
874     }
875     return;
876 }
877 
878 
879 template <typename TValue, typename TScale, typename TCounter>
880 void
x_CalculateBinsLinear(TScale start_value,TScale end_value,TScale step,TScale * arr,unsigned pos,unsigned n,EScaleView scale_view)881 CHistogram<TValue, TScale, TCounter>::x_CalculateBinsLinear
882 (
883     TScale      start_value,
884     TScale      end_value,
885     TScale      step,
886     TScale*     arr,
887     unsigned    pos,
888     unsigned    n,
889     EScaleView  scale_view
890 )
891 {
892     if (scale_view == eSymmetrical) {
893         // Account for an integer TScale type and calculation truncation.
894         // If TScale type doesn't allow integer division to the specified
895         // number of bins without residue, some bins will be larger than others.
896         // For monotonic view this will be always a very last bin at the end of
897         // the scale, but for the symmetrical view it depends on parity of 'n'.
898         // For even number of bins most left and right bins can be larger.
899         // For odd number of bins the center bin can be larger than other.
900 
901         if (n % 2 == 0) {
902             // Calculate from the center to both sides with step 'step'
903             TScale median = start_value + (end_value - start_value)/2;
904             x_CalculateBinsLinear(median, start_value, step, arr, pos + n/2 - 1, n/2, eMonotonic);
905             x_CalculateBinsLinear(median, end_value,   step, arr, pos + n/2,     n/2, eMonotonic);
906             // We already know value for the most left bin, assign it
907             // implicitly for such symmetrical scales to have bigger
908             // starting bin. We got bigger ending bin automatically.
909             arr[pos] = start_value;
910 
911         } else {
912             // Calculate from both sides to the center with step 'step'
913             // 2nd parameter doesn't matter, it just show direction.
914             x_CalculateBinsLinear(start_value, end_value, step, arr, pos, n/2 + 1, eMonotonic);
915             x_CalculateBinsLinear(end_value, start_value, step, arr, n-1, n/2,     eMonotonic);
916         }
917         return;
918     }
919 
920     // Calculate from left to right
921     if (start_value <= end_value) {
922         for (unsigned i = 0; i < n; i++) {
923             arr[pos+i] = start_value + step*i;
924         }
925     }
926     // Calculate from right to left
927     else {
928         for (unsigned i = 1; i <= n; i++) {
929             arr[pos+1-i] = start_value - step*i;
930         }
931     }
932 }
933 
934 
935 template <typename TValue, typename TScale, typename TCounter>
936 void
x_CalculateBinsLog(TScale start_value,TScale end_value,TScale * arr,unsigned pos,unsigned n,EScaleType scale_type,EScaleView scale_view)937 CHistogram<TValue, TScale, TCounter>::x_CalculateBinsLog
938 (
939     TScale      start_value,
940     TScale      end_value,
941     TScale*     arr,
942     unsigned    pos,
943     unsigned    n,
944     EScaleType  scale_type,
945     EScaleView  scale_view
946 )
947 {
948     if (scale_view == eSymmetrical) {
949         // Account for an integer TScale type and calculation truncation.
950         // If TScale type doesn't allow integer division to the specified
951         // number of bins without residue, some bins will be larger than others.
952         // For monotonic view this will be always a very last bin at the end of
953         // the scale, but for the symmetrical view it depends on parity of 'n'.
954         // For even number of bins most left and right bins can be larger.
955         // For odd number of bins the center bin can be larger than other.
956 
957         TScale median = start_value + (end_value - start_value)/2;
958         // Set most left bin directly, we already know its value
959         arr[pos] = start_value;
960         unsigned n2 = n/2;
961 
962         if (n % 2 == 0) {
963             // Calculate from the center to both sides
964             x_CalculateBinsLog(median, start_value, arr, pos + n2 - 1, n2, scale_type, eMonotonic);
965             x_CalculateBinsLog(median, end_value,   arr, pos + n2    , n2, scale_type, eMonotonic);
966         } else {
967             if (n == 1) {
968                 return;
969             }
970             _ASSERT(n > 1);
971 
972             // Median is located in the middle of the center bin.
973             // Too calculate starting point for this center bin end its ending point,
974             // by the way also a starting point for the next bin, we double the number of bins
975             // on each side of the median, and calculate values for each half-mark on a scale.
976             // After that copy values for needed half-marks only.
977 
978             std::unique_ptr<TScale[]> tmp(new TScale[n*2]);
979             TScale* tmp_arr = tmp.get();
980             // Calculate from the center to both sides (n % 2 == 0 case)
981             x_CalculateBinsLog(median, start_value, tmp_arr, n - 1, n, scale_type, eMonotonic);
982             x_CalculateBinsLog(median, end_value,   tmp_arr, n    , n, scale_type, eMonotonic);
983 
984             // For central bin: copy values for 2 half-marks from both sides of the center
985             arr[pos + n2]     = tmp_arr[n - 1];
986             arr[pos + n2 + 1] = tmp_arr[n + 1];
987 
988             // For other bins: copy values for every second half-mark from 2 half-marks above
989             for (unsigned i = 0; i < n2; i++) {
990                 arr[pos + i] = tmp_arr[i*2];
991             }
992             for (unsigned i=2, j=n+3; i <= n2; i++, j+=2) {
993                 arr[pos + n2 + i] = tmp_arr[j];
994             }
995         }
996         return;
997     }
998 
999     // Logarithm functions cannot operate on a negative values or 0.
1000     // so it will be necessary to transform data range. One of the methods
1001     // is too shift data to start it from 1. This method also helps to normalize
1002     // logarithmic scale, and we will use it for all data, even positive.
1003     // So, we calculate logarithms scale for a range of values [1:D+1],
1004     // where D is a distance between starting and ending points.
1005     // Because x^0 = 1, we need calculate only the maximum scale value log(D+1),
1006     // And every scale mark in between with the step log(D+1)/N.
1007     // Inverse function helps to convert every scale mark back to a bin
1008     // starting position.
1009 
1010     // Calculate from left to right
1011     if (start_value <= end_value) {
1012         TScale step = x_Func(scale_type, end_value - start_value + 1) / n;
1013         for (unsigned i = 1; i < n; i++) {
1014             // Calculate data value for a given scale mark
1015             arr[pos+i] = start_value + x_FuncInverse(scale_type, step*i) - 1;
1016         }
1017         // Set most left bin directly, we already know its value,
1018         // to avoid possible rounding errors.
1019         arr[pos] = start_value;
1020     }
1021     // Calculate from right to left
1022     else {
1023         TScale step = x_Func(scale_type, start_value - end_value + 1) / n;
1024         for (unsigned i = 1; i < n; i++) {
1025             // Calculate data value for a given scale mark
1026             arr[pos+1-i] = start_value - x_FuncInverse(scale_type, step*i) + 1;
1027         }
1028         // Set most left bin directly, we already know its value,
1029         // to avoid possible rounding errors.
1030         arr[pos+1-n] = end_value;
1031     }
1032 }
1033 
1034 
1035 template <typename TValue, typename TScale, typename TCounter>
1036 TScale
1037 CHistogram<TValue, TScale, TCounter>::x_Func(EScaleType scale_type, TScale value)
1038 {
1039     switch (scale_type) {
1040     case eLog:
1041         return (TScale)log(value);
1042     case eLog2:
1043         return (TScale)log2(value);
1044     case eLog10:
1045         return (TScale)log10(value);
1046     case eLinear:
1047         // It is last, because eLinear has served by x_CalculateBinsLinear() directly
1048         return value;
1049     }
1050     _TROUBLE;
1051     // unreachable, just to avoid warning
1052     return value;
1053 }
1054 
1055 
1056 template <typename TValue, typename TScale, typename TCounter>
1057 TScale
1058 CHistogram<TValue, TScale, TCounter>::x_FuncInverse(EScaleType scale_type, TScale scale_value)
1059 {
1060     switch (scale_type) {
1061     case eLog:
1062         return (TScale)pow(M_E, scale_value);
1063     case eLog2:
1064         return (TScale)pow(2, scale_value);
1065     case eLog10:
1066         return (TScale)pow(10, scale_value);
1067     case eLinear:
1068         // It is last, because eLinear has served by x_CalculateBinsLinear() directly
1069         return scale_value;
1070     }
1071     _TROUBLE;
1072     // unreachable, just to avoid warning
1073     return scale_value;
1074 }
1075 
1076 
1077 // Move semantics
1078 
1079 template <typename TValue, typename TScale, typename TCounter>
CHistogram()1080 CHistogram<TValue, TScale, TCounter>::CHistogram()
1081     : m_NumBins(0), m_IsMT(false)
1082 {
1083     // Reset counters
1084     x_Reset();
1085 }
1086 
1087 
1088 template <typename TValue, typename TScale, typename TCounter>
CHistogram(CHistogram && other)1089 CHistogram<TValue, TScale, TCounter>::CHistogram(CHistogram&& other)
1090 {
1091     if (this == &other) return;
1092     other.MT_Lock();
1093     x_MoveFrom(other);
1094     other.MT_Unlock();
1095 };
1096 
1097 
1098 template <typename TValue, typename TScale, typename TCounter>
1099 CHistogram<TValue, TScale, TCounter>&
operator =(CHistogram && other)1100 CHistogram<TValue, TScale, TCounter>::operator=(CHistogram&& other)
1101 {
1102     if (this == &other) return *this;
1103     other.MT_Lock();
1104     x_MoveFrom(other);
1105     other.MT_Unlock();
1106     return *this;
1107  };
1108 
1109 
1110 template <typename TValue, typename TScale, typename TCounter>
1111 CHistogram<TValue, TScale, TCounter>
Clone(EClone how) const1112 CHistogram<TValue, TScale, TCounter>::Clone(EClone how) const
1113 {
1114     MT_Lock();
1115 
1116     CHistogram h;
1117     h.m_Min     = m_Min;
1118     h.m_Max     = m_Max;
1119     h.m_NumBins = m_NumBins;
1120     h.m_IsMT    = m_IsMT;
1121 
1122     h.m_Starts.reset(new TScale[m_NumBins]);
1123     h.m_Counters.reset(new TCounter[m_NumBins]);
1124     memcpy(h.m_Starts.get(), m_Starts.get(), sizeof(TScale) * m_NumBins);
1125 
1126     switch (how) {
1127     case eCloneStructureOnly:
1128         // Reset counters
1129         h.m_Sum = 0;
1130         h.m_Count = 0;
1131         h.m_LowerAnomalyCount = 0;
1132         h.m_UpperAnomalyCount = 0;
1133         memset(h.m_Counters.get(), 0, m_NumBins * sizeof(TCounter));
1134         break;
1135     case eCloneAll:
1136         // Copy counters
1137         h.m_Sum = m_Sum;
1138         h.m_Count = m_Count;
1139         h.m_LowerAnomalyCount = m_LowerAnomalyCount;
1140         h.m_UpperAnomalyCount = m_UpperAnomalyCount;
1141         memcpy(h.m_Counters.get(), m_Counters.get(), sizeof(TCounter) * m_NumBins);
1142         break;
1143     default:
1144         MT_Unlock();
1145         _TROUBLE;
1146     }
1147     MT_Unlock();
1148     return h;
1149 }
1150 
1151 
1152 template <typename TValue, typename TScale, typename TCounter>
1153 void
x_MoveFrom(CHistogram & other)1154 CHistogram<TValue, TScale, TCounter>::x_MoveFrom(CHistogram& other)
1155 {
1156     m_Min               = other.m_Min;
1157     m_Max               = other.m_Max;
1158     m_NumBins           = other.m_NumBins;
1159     m_Sum               = other.m_Sum;
1160     m_Count             = other.m_Count;
1161     m_LowerAnomalyCount = other.m_LowerAnomalyCount;
1162     m_UpperAnomalyCount = other.m_UpperAnomalyCount;
1163     m_IsMT              = other.m_IsMT;
1164 
1165     m_Starts.reset(other.m_Starts.release());
1166     m_Counters.reset(other.m_Counters.release());
1167     other.m_NumBins = 0;
1168     other.m_Sum = 0;
1169 
1170     return;
1171 }
1172 
1173 
1174 template <typename TValue, typename TScale, typename TCounter>
1175 bool
x_IsEqual(TScale a,TScale b)1176 CHistogram<TValue, TScale, TCounter>::x_IsEqual(TScale a, TScale b)
1177 {
1178     if (std::numeric_limits<TScale>::is_integer) {
1179         return a == b;
1180     }
1181     // Approximately check for floating numbers
1182     return std::fabs(a - b) < 0.0001;
1183 }
1184 
1185 
1186 template <typename TValue, typename TScale, typename TCounter>
1187 void
AddCountersFrom(const CHistogram & other)1188 CHistogram<TValue, TScale, TCounter>::AddCountersFrom(const CHistogram& other)
1189 {
1190     if (this == &other) return;
1191 
1192     MT_Lock();
1193     other.MT_Lock();
1194     try {
1195         x_AddCountersFrom(const_cast<CHistogram&>(other));
1196     }
1197     catch (...) {
1198         MT_Unlock();
1199         other.MT_Unlock();
1200         throw;
1201     }
1202     MT_Unlock();
1203     other.MT_Unlock();
1204 }
1205 
1206 
1207 template <typename TValue, typename TScale, typename TCounter>
1208 void
StealCountersFrom(CHistogram & other)1209 CHistogram<TValue, TScale, TCounter>::StealCountersFrom(CHistogram& other)
1210 {
1211     if (this == &other) return;
1212 
1213     MT_Lock();
1214     other.MT_Lock();
1215     try {
1216         x_AddCountersFrom(other);
1217         other.x_Reset();
1218     }
1219     catch (...) {
1220         MT_Unlock();
1221         other.MT_Unlock();
1222         throw;
1223     }
1224     MT_Unlock();
1225     other.MT_Unlock();
1226 }
1227 
1228 
1229 template <typename TValue, typename TScale, typename TCounter>
1230 void
x_AddCountersFrom(CHistogram & other)1231 CHistogram<TValue, TScale, TCounter>::x_AddCountersFrom(CHistogram& other)
1232 {
1233     // Check structure
1234     if ( m_NumBins != other.m_NumBins  ||
1235         !x_IsEqual(m_Min, other.m_Min) ||
1236         !x_IsEqual(m_Max, other.m_Max)
1237         ) {
1238         NCBI_THROW(CCoreException, eInvalidArg, "Histograms have different structure");
1239     }
1240     // Check scale
1241     TScale* starts_cur   = m_Starts.get();
1242     TScale* starts_other = other.m_Starts.get();
1243     for (unsigned i = 0; i < m_NumBins; i++) {
1244         if (!x_IsEqual(starts_cur[i], starts_other[i])) {
1245             NCBI_THROW(CCoreException, eInvalidArg, "Histograms have different starting positions");
1246         }
1247     }
1248     // Add counters
1249     TCounter* counters_cur   = m_Counters.get();
1250     TCounter* counters_other = other.m_Counters.get();
1251     for (unsigned i = 0; i < m_NumBins; i++) {
1252         counters_cur[i] += counters_other[i];
1253     }
1254     m_Count += other.m_Count;
1255     m_LowerAnomalyCount += other.m_LowerAnomalyCount;
1256     m_UpperAnomalyCount += other.m_UpperAnomalyCount;
1257     m_Sum += other.m_Sum;
1258 }
1259 
1260 
1261 
1262 //============================================================================
1263 //
1264 // CHistogramTimeSeries
1265 //
1266 //============================================================================
1267 
1268 template <typename TValue, typename TScale, typename TCounter>
CHistogramTimeSeries(THistogram & model_histogram)1269 CHistogramTimeSeries<TValue, TScale, TCounter>::CHistogramTimeSeries(THistogram& model_histogram)
1270     : m_CurrentTick(0)
1271 {
1272     // Need to create the first item in the list
1273     x_AppendBin(model_histogram, 1);
1274 }
1275 
1276 
1277 template <typename TValue, typename TScale, typename TCounter>
1278 void
Add(TValue value)1279 CHistogramTimeSeries<TValue, TScale, TCounter>::Add(TValue value)
1280 {
1281     // The first bin always exists
1282     m_TimeBins.front().histogram.Add(value);
1283 }
1284 
1285 
1286 template <typename TValue, typename TScale, typename TCounter>
1287 void
Rotate()1288 CHistogramTimeSeries<TValue, TScale, TCounter>::Rotate()
1289 {
1290     ++m_CurrentTick;
1291 
1292     m_Mutex.lock();
1293     x_Shift(0, m_TimeBins.begin());
1294     m_Mutex.unlock();
1295 }
1296 
1297 
1298 template <typename TValue, typename TScale, typename TCounter>
1299 void
Reset()1300 CHistogramTimeSeries<TValue, TScale, TCounter>::Reset()
1301 {
1302     m_Mutex.lock();
1303     m_CurrentTick = 0;
1304     while (m_TimeBins.size() > 1)
1305         m_TimeBins.pop_back();
1306     m_TimeBins.front().histogram.Reset();
1307     m_TimeBins.front().n_ticks = 1;     // Just in case
1308     m_Mutex.unlock();
1309 }
1310 
1311 
1312 template <typename TValue, typename TScale, typename TCounter>
1313 void
x_Shift(size_t index,typename ncbi::CHistogramTimeSeries<TValue,TScale,TCounter>::TTimeBins::iterator current_it)1314 CHistogramTimeSeries<TValue, TScale, TCounter>::x_Shift(size_t index,
1315        typename ncbi::CHistogramTimeSeries<TValue, TScale, TCounter>::TTimeBins::iterator current_it)
1316 {
1317     if (m_TimeBins.size() <= index + 1) {
1318         // There is not enough bins. Need to add one
1319         x_AppendBin(m_TimeBins.front().histogram, TTicks(1) << index);
1320 
1321         // Move from index to index + 1
1322         auto next_it = current_it;
1323         ++next_it;
1324         next_it->histogram.StealCountersFrom(current_it->histogram);
1325 
1326         if (index != 0)
1327             current_it->n_ticks /= 2;
1328         return;
1329     }
1330 
1331     auto next_it = current_it;
1332     ++next_it;
1333 
1334     if (next_it->n_ticks == TTicks(1) << index) {
1335         // Half full bin; make it full and just accumulate
1336         next_it->n_ticks *= 2;
1337         next_it->histogram.StealCountersFrom(current_it->histogram);
1338         if (index != 0)
1339             current_it->n_ticks /= 2;
1340         return;
1341     }
1342 
1343     // The next bin is full; need to move further
1344     x_Shift(index + 1, next_it);
1345     // Move to the vacant next
1346     next_it->histogram.StealCountersFrom(current_it->histogram);
1347     // The current one is a half now
1348     if (index != 0)
1349         current_it->n_ticks /= 2;
1350 }
1351 
1352 
1353 template <typename TValue, typename TScale, typename TCounter>
1354 typename CHistogramTimeSeries<TValue, TScale, TCounter>::TTimeBins
GetHistograms() const1355 CHistogramTimeSeries<TValue, TScale, TCounter>::GetHistograms() const
1356 {
1357     m_Mutex.lock();
1358     TTimeBins ret = m_TimeBins;
1359     m_Mutex.unlock();
1360     return ret;
1361 }
1362 
1363 
1364 template <typename TValue, typename TScale, typename TCounter>
1365 void
x_AppendBin(const THistogram & model_histogram,TTicks n_ticks)1366 CHistogramTimeSeries<TValue, TScale, TCounter>::x_AppendBin(
1367     const THistogram& model_histogram, TTicks n_ticks)
1368 {
1369     m_TimeBins.push_back(
1370             STimeBin{model_histogram.Clone(THistogram::eCloneStructureOnly),
1371                      n_ticks} );
1372 }
1373 
1374 
1375 END_NCBI_SCOPE
1376 
1377 #endif  /* DATA_HISTOGRAM__HPP */
1378