1 #ifndef DATA_HISTOGRAM__HPP
2 #define DATA_HISTOGRAM__HPP
3
4 /* $Id: data_histogram.hpp 623452 2021-01-13 17:39:53Z ivanov $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Vladimir Ivanov
30 *
31 */
32
33 /// @file data_histogram.hpp
34 /// Frequency histogram for data distribution of the numerical samples.
35
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbistl.hpp>
38
39 #define _USE_MATH_DEFINES // to define math constants in math.h
40 #include <math.h> // log/pow functions
41 #include <cmath> // additional overloads for pow()
42 #include <memory>
43 #include <mutex>
44
45
46 /** @addtogroup Statistics
47 *
48 * @{
49 */
50
51 BEGIN_NCBI_SCOPE
52
53
54
55 /////////////////////////////////////////////////////////////////////////////
56 ///
57 /// Helper types for CHistogram<>::GetSum() support
58 //
59
60 // if T doesn't have 'foo' method (operator+= in our case) with the signature
61 // that allows to compile the bellow expression then instantiating this template
62 // is Substitution Failure (SF) which Is Not An Error (INAE) if this happens
63 // during overload resolution
64 template <typename T>
65 using T_HistogramValueTypeHavePlus = decltype((T&)(std::declval<T>().operator+=(std::declval<const T&>())));
66
67 // Checks that T is arithmetic type or have operator+= implemented
68 template <typename T>
g_HistogramValueTypeHavePlus()69 constexpr bool g_HistogramValueTypeHavePlus()
70 {
71 return cxx_is_supported<T_HistogramValueTypeHavePlus, T>::value || std::is_arithmetic<T>::value;
72 }
73
74
75 /////////////////////////////////////////////////////////////////////////////
76 ///
77 /// CHistogram -- collect the distribution of the numerical data samples.
78 ///
79 /// CHistogram <TValue, TScale, TCounter>
80 ///
81 /// TValue - type of values that distribution will be collected.
82 /// This can be any numerical or user defined type, if it allow
83 /// comparison and can be converted to scale type TScale, see note.
84 ///
85 /// TScale - numerical type used to calculate bins starting positions and sizes.
86 /// For integer types some scales, logarithmic for examples, will lead
87 /// to truncating values and creating unequal bins, so TScale
88 /// should be float/double based.
89 ///
90 /// TCounter - type to store counters. Usually this is a positive integer type:
91 /// int, unsigned int, long, size_t, Uint8 and etc.
92 ///
93 /// @note
94 /// TValue and TScale types should be equal, or allow comparison and conversion
95 /// between them. Any types can be used as TValue if it have:
96 /// = default constructor: TValue() -- for initialization;
97 /// - operator TScale() const -- to convert to scale type TScale;
98 /// - bool operator >(const TValue&) -- for comparison.
99 ///
100 /// @note
101 /// CHistogram is not MT safe by default, it is intended to be created
102 /// and collect data withing the same thread. If you want to use the same
103 /// CHistogram class object across multiple threads, you need to provide
104 /// some sort of MT protection to guard access to the histogram object,
105 /// or enable internal MT protection, and call CHistogram::EnableMT()
106 /// immediately after creating a histogram object.
107
108
109 template <typename TValue = int, typename TScale = TValue, typename TCounter = Uint8>
110 class CHistogram
111 {
112 public:
113 /// Scale type.
114 /// Most common is a linear scale, where each bin have the same size.
115 /// It is good if you have a limited range, but sometimes values are distributed
116 /// very wide, and if you want to count all of them, but concentrate on a
117 /// some range with most significant values, logarithmic scales helps a lot.
118 /// For logarithmic scales each next bin have a greater size.
119 /// Bins size increasing depends on a logarithmic base (used scale type).
120 ///
121 enum EScaleType {
122 eLinear = 1, ///< Arithmetic or linear scale
123 eLog, ///< Natural logarithmic scale with a base e ~ 2.72
124 eLog2, ///< Binary logarithmic scale with a base 2
125 eLog10 ///< Common logarithmic scale with a base 10
126 };
127
128 /// Methods to build bins for a specified scale.
129 /// Applies to the main scale, defined in constructor, only.
130 enum EScaleView {
131 /// Use specified scale method to calculate bins sizes from a minimum
132 /// to a maximum value.
133 eMonotonic,
134 /// Determine a mean for a specified value range and calculates
135 /// bins sizes using specified scale to both sides from it, symmetrically.
136 eSymmetrical
137 };
138
139
140 /////////////////////////////////////////////////////////////////////////
141 // Initialization
142
143 /// Constructor.
144 ///
145 /// @param min_value
146 /// Minimum allowed value in range [min_value, max_value].
147 /// @param max_value
148 /// Maximum allowed value in range [min_value, max_value].
149 /// @param n_bins
150 /// Number of bins for the histogram (main scale).
151 /// @param scale_type
152 /// Predefined scale type. Corresponding scale function will be used
153 /// to calculate scale's bin ranges.
154 /// @param scale_view
155 /// Scale view (symmetrical or monotonic).
156 /// @sa EScaleType, EScaleView, AddLeftScale, AddRightScale, Add
157 ///
158 CHistogram(TValue min_value, TValue max_value, unsigned n_bins,
159 EScaleType scale_type = eLinear,
160 EScaleView scale_view = eMonotonic);
161
162 /// Add auxiliary left/right scales.
163 ///
164 /// The CHistogram constructor defines a main scale for some data range.
165 /// It can be very limited to get more precise results for the value distribution.
166 /// You can get the number of hits whose values fall outside the range of the main
167 /// scale using GetLowerAnomalyCount()/GetUpperAnomalyCount() methods,
168 /// but each returns a single number of hits outside of the range.
169 /// The other method is to use auxiliary scale(s) to count the number of hits
170 /// for the less significant range(s). You can add any number of scales from each side.
171 /// So, you can use linear scale with much greater bin sizes, or logarithmic scales,
172 /// that allow to cover a very wide range of data with a limited number of bins.
173 /// See EScaleType for details.
174 ///
175 /// @param min_value
176 /// Minimum allowed value for the auxiliary scale.
177 /// Its maximum value is the same as a minimum value for the main scale,
178 /// or previously added scale.
179 /// @param n_bins
180 /// Number of bins for the auxiliary scale.
181 /// @param scale_type
182 /// Predefined scale type. Corresponding scale function will be used
183 /// to calculate scale's bin ranges.
184 /// @note
185 /// It is not allowed to add left/right auxiliary scales after starting counting hits.
186 /// Please use these methods before Add(), or after Reset().
187 /// @sa CHistogram::CHistogram, AddRightScale, EScaleType, GetLowerAnomalyCount, GetUpperAnomalyCount
188 ///
189 void AddLeftScale (TValue min_value, unsigned n_bins, EScaleType scale_type);
190 void AddRightScale(TValue max_value, unsigned n_bins, EScaleType scale_type);
191
192
193 /////////////////////////////////////////////////////////////////////////
194 // Populating
195
196 /// Sum type: double for all floating points TValue types, int64_t/uint64_t for integral, and TValue otherwise.
197 using TIntegral = typename std::conditional<std::numeric_limits <TValue>::is_signed, int64_t, uint64_t >::type;
198 using TArithmetic = typename std::conditional<std::is_floating_point<TValue>::value, double, TIntegral>::type;
199 using TSum = typename std::conditional<std::is_arithmetic <TValue>::value, TArithmetic, TValue >::type;
200
201 /// Add value to the data distribution.
202 /// Try to find an appropriate bin for a specified value and increase its counter on 1.
203 /// Also, calculates a sum of all added values if TValue type have addition support.
204 /// @sa GetStarts, GetCounters, GetSum
205 ///
206 template <typename V, typename S = TSum,
207 std::enable_if_t<g_HistogramValueTypeHavePlus<S>(), int> = 0>
Add(const V & v)208 void Add(const V& v) {
209 MT_Lock();
210 x_Add(v);
211 // Calculate sum, TSum have operator+=
212 m_Sum += v;
213 MT_Unlock();
214 }
215 template <typename V, typename S = TSum,
216 std::enable_if_t<!g_HistogramValueTypeHavePlus<S>(), int> = 0>
Add(const V & v)217 void Add(const V& v) {
218 MT_Lock();
219 x_Add(v);
220 MT_Unlock();
221 }
222
223 // Return value for a class member, MT safe
224 #define RETURN_MT_SAFE(member) \
225 if (m_IsMT) { \
226 MT_Lock(); \
227 auto v = member; \
228 MT_Unlock(); \
229 return v; \
230 } \
231 return member
232
233 /// Return the sum of all added values.
234 /// @return
235 /// Returned type depends on TValue type, and converts to:
236 /// - double -- for all floating point types;
237 /// - int64_t -- signed integral types;
238 /// - uint64_t -- unsigned integral types;
239 /// - TValue -- for all other types.
240 /// The sum can be calculated for floating, integral types and all other
241 /// TValue types that have 'operator +=' defined.
242 /// If TValue type doesn't have such operator defined, empty value {} is returned.
243 /// Note, compiler can convert this empty {} value to TScale type, because every TValue
244 /// should have 'operator TScale() const' by design.
245 /// @note
246 /// This method doesn't check calculated sum on overflow.
247 /// @sa Add, TSum
GetSum(void) const248 TSum GetSum(void) const { RETURN_MT_SAFE(m_Sum); }
249
250 /// Reset all data counters.
251 void Reset();
252
253 /// Add counters from 'other' histogram to this histogram, 'other' doesn't changes.
254 /// @note Both histograms should have the same structure.
255 /// @sa Clone, Reset, StealCountersFrom
256 void AddCountersFrom(const CHistogram& other);
257
258 /// Add counters from 'other' histogram to this histogram,
259 /// then reset the counters of 'other' histogram.
260 /// @note Both histograms should have the same structure.
261 /// @sa Clone, Reset, AddCountersFrom
262 void StealCountersFrom(CHistogram& other);
263
264
265 /////////////////////////////////////////////////////////////////////////
266 // Structure
267
268 /// Get the lower bound of the combined scale.
GetMin() const269 TValue GetMin() const { return m_Min; }
270
271 /// Get the upper bound of the combined scale.
GetMax() const272 TValue GetMax() const { return m_Max; }
273
274 /// Return the number ot bins on the combined scale.
275 /// @sa GetBinStarts, GetBinCounters, GetBinCountersPtr
GetNumberOfBins() const276 unsigned GetNumberOfBins() const { return m_NumBins; }
277
278 /// Get starting positions for bins on the combined scale.
279 ///
280 /// Populate a vector with starting positions for bins on the combined scale.
281 /// The size of the vector changes to be equal the number of bins.
282 /// @sa GetNumberOfBins, GetBinCounters, GetBinStartsPtr
GetBinStarts(vector<TScale> & positions)283 void GetBinStarts(vector<TScale>& positions) {
284 MT_Lock();
285 positions.resize(m_NumBins);
286 positions.assign(m_Starts.get(), m_Starts.get() + m_NumBins);
287 MT_Unlock();
288 }
289
290 /// Get starting positions for bins on the combined scale (not MT safe).
291 ///
292 /// Returns a pointer to array. The number of bins can be obtained with GetNumberOfBins().
293 /// and a number of a counters in each bin -- with GetBinCounters().
294 /// @warning
295 /// Be aware that any change in the histogram's structure, adding additional scales,
296 /// invalidate all data and pointer itself. This method is not intended to use in MT
297 /// environment. Please use vector version GetBinStarts(vector<TScale>&).
298 /// @sa GetNumberOfBins, GetBinCountersPtr, GetBinStarts, EnableMT
GetBinStartsPtr() const299 const TScale* GetBinStartsPtr() const { return m_Starts.get(); }
300
301
302 /////////////////////////////////////////////////////////////////////////
303 // Results
304
305 /// Get counters for the combined scale's bins.
306 ///
307 /// Populate a vector with counters for the combined scale's bins.
308 /// The size of the vector changes to be equal to the number of bins.
309 /// @sa GetNumberOfBins, GetBinStarts, GetBinCountersPtr, Add
GetBinCounters(vector<TCounter> & counters)310 void GetBinCounters(vector<TCounter>& counters) {
311 MT_Lock();
312 counters.resize(m_NumBins);
313 counters.assign(m_Counters.get(), m_Counters.get() + m_NumBins);
314 MT_Unlock();
315 }
316
317 /// Get counters for the combined scale's bins (not MT safe).
318 ///
319 /// Returns a pointer to array. The number of bins can be obtained with GetNumberOfBins().
320 /// @warning
321 /// Be aware that any change in the histogram's structure invalidate all data
322 /// and pointer itself. This method is not intended to use in MT environment.
323 /// Please use vector version GetBinCounters(vector<TCounter>&).
324 /// @sa GetNumberOfBins, GetBinStartsPtr, GetBinCounters, Add, EnableMT
GetBinCountersPtr() const325 const TCounter* GetBinCountersPtr() const { return m_Counters.get(); }
326
327 /// Get total number of hits whose value fell between GetMin() and GetMax().
328 /// The number of hits whose values fall outside that range can be obtained
329 /// using GetLowerAnomalyCount() and GetUpperAnomalyCount() methods.
330 /// @sa GetMin, GetMax, GetLowerAnomalyCount, GetUpperAnomalyCount, GetBinCounters
GetCount() const331 TCounter GetCount() const { RETURN_MT_SAFE(m_Count); }
332
333 /// Get number of hits whose values were less than GetMin().
334 /// @sa GetUpperAnomalyCount, GetCount
GetLowerAnomalyCount() const335 size_t GetLowerAnomalyCount() const {RETURN_MT_SAFE(m_LowerAnomalyCount); }
336
337 /// Get number of hits whose values were greater than GetMax().
338 /// @sa GetLowerAnomalyCount, GetCount
GetUpperAnomalyCount() const339 size_t GetUpperAnomalyCount() const { RETURN_MT_SAFE(m_UpperAnomalyCount); }
340
341
342 /////////////////////////////////////////////////////////////////////////
343 // Helpers
344
345 /// Rules to calculate an estimated numbers of bins on the base of the expected
346 /// number of observations.
347 /// Note, that there is no "best" number of bins, and different bin sizes can reveal
348 /// different features of the data. All these methods generally make strong assumptions
349 /// about the shape of the distribution. Depending on the actual data distribution
350 /// and the goals of the analysis, different bin widths may be appropriate,
351 /// so experimentation is usually needed to determine an appropriate width.
352 /// @note
353 /// All methods applies to the linear scale only, that have a fixed bin sizes.
354 /// @sa
355 /// EstimateNumberOfBins
356 ///
357 enum EEstimateNumberOfBinsRule {
358 /// Square root rule. Used by Excel histograms and many others.
359 /// Default value for EstimateNumberOfBins() as well.
360 eSquareRoot = 0,
361 /// Juran's "Quality Control Handbook" that provide guidelines
362 /// to select the number of bins for histograms.
363 eJuran,
364 /// Herbert Sturge's rule. It works best for continuous data that is
365 /// normally distributed and symmetrical. As long as your data is not skewed,
366 /// using Sturge's rule should give you a nice-looking, easy to read
367 /// histogram that represents the data well.
368 eSturge,
369 /// Rice's rule. Presented as a simple alternative to Sturge's rule.
370 eRice
371 };
372
373 /// Estimate numbers of bins on the base of the expected number of observations 'n'.
374 /// @note
375 /// Applies to the linear scale only.
376 /// @sa
377 /// EEstimateNumberOfBinsRules
378 static unsigned EstimateNumberOfBins(size_t n, EEstimateNumberOfBinsRule rule = 0);
379
380
381 /////////////////////////////////////////////////////////////////////////
382 // Move semantics
383
384 /// Default constructor.
385 ///
386 /// Creates empty object. The object itself is invalid without min/max values,
387 /// and any scale. Should be used with conjunction of Clone() only.
388 /// @sa Clone
389 ///
390 CHistogram(void);
391
392 /// Move constructor.
393 ///
394 /// Move 'other' histogram data to the current object. 'other' became invalid.
395 /// @example
396 /// CHistogram<> h1(min, max, n, type);
397 /// CHistogram<> h2(h1.Clone(how));
398 /// @sa Clone
399 ///
400 CHistogram(CHistogram&& other);
401
402 /// Move assignment operator.
403 ///
404 /// Move 'other' histogram data to the current object. 'other' became invalid.
405 /// @example
406 /// CHistogram<> h1(min, max, n, type);
407 /// CHistogram<> h2;
408 /// h2 = h1.Clone(how);
409 /// @sa Clone
410 ///
411 CHistogram& operator=(CHistogram&& other);
412
413 enum EClone {
414 eCloneAll = 0, ///< Clone whole histogram, with scale and counters
415 eCloneStructureOnly ///< Clone structure only (the counters will be zeroed)
416 };
417
418 /// Clone histogram structure.
419 ///
420 /// Creates a copy of the histogram structure depending on clone method.
421 /// By default it is a whole copy, but you can clone histogram's structure only, without counters.
422 ///
423 CHistogram Clone(EClone how = eCloneAll) const;
424
425
426 /////////////////////////////////////////////////////////////////////////
427 // Optional MT protection
428
429 /// Add MT protection to histogram.
430 ///
431 /// By default CHistogram is not MT protected and intended to be created
432 /// and used withing the same thread. Calling this method add MT protection
433 /// on adding additional scales, adding and resetting values, cloning and
434 /// moving histograms. Please call this method immediately after creating
435 /// each histogram, before accessing it from any other thread, or deadlock can occurs.
436 ///
EnableMT(void)437 void EnableMT(void) {
438 m_IsMT = true;
439 };
440 /// MT locking
MT_Lock() const441 void MT_Lock() const {
442 if (m_IsMT) m_Mutex.lock();
443 }
444 // MT unlocking
MT_Unlock() const445 void MT_Unlock() const {
446 if (m_IsMT) m_Mutex.unlock();
447 }
448
449 protected:
450 // Note, none of x_*() methods have MT locking, it should be done in public methods only.
451
452 /// Calculate bins starting positions.
453 /// Calculate positions for 'n' number of bins in [start,end] value range,
454 /// starting from 'pos' bin index.
455 /// If start_value > end_value, calculating going from the right to left.
456 /// This is applicable for symmetrical scales, or multi-scales,
457 /// with left scale added.
458 void x_CalculateBins(TScale start_value, TScale end_value,
459 unsigned pos, unsigned n,
460 EScaleType scale_type, EScaleView scale_view);
461
462 /// Calculate bins starting positions for a linear scale.
463 /// Account for an integer TScale type and calculation truncation.
464 void x_CalculateBinsLinear(
465 TScale start_value, TScale end_value, TScale step,
466 TScale* arr, unsigned pos, unsigned n,
467 EScaleView scale_view);
468
469 /// Calculate bins starting positions for a logarithmic scale.
470 void x_CalculateBinsLog(
471 TScale start_value, TScale end_value,
472 TScale* arr, unsigned pos, unsigned n,
473 EScaleType scale_type, EScaleView scale_view);
474
475 /// Scale function.
476 /// Calculates scale position on a base of data value.
477 TScale x_Func(EScaleType scale_type, TScale value);
478
479 /// Inverse scale function.
480 /// Calculates a data value on a base of scale value/position.
481 TScale x_FuncInverse(EScaleType scale_type, TScale scale_value);
482
483 /// Add value to the data distribution (internal version without locking).
484 void x_Add(TValue value);
485
486 /// Add value to the data distribution using a linear search method.
487 /// Usually faster than bisection method on a small number of bins,
488 /// or if the values have a tendency to fall into the starting bins
489 /// for a used scale.
490 /// @sa Add, x_AddBisection
491 void x_AddLinear(TValue value);
492
493 /// Add value to the data distribution using a bisection search method.
494 /// Usually faster on a long scales with a lot of bins.
495 /// @sa Add, x_AddLinear
496 void x_AddBisection(TValue value);
497
498 /// Move data from 'other' histogram. 'other' became invalid.
499 void x_MoveFrom(CHistogram& other);
500
501 /// Add counters from 'other' histogram.
502 void x_AddCountersFrom(CHistogram& other);
503
504 /// Check that 'a' and 'b' scale values are equal (or almost equal for floating scales).
505 bool x_IsEqual(TScale a, TScale b);
506
507 /// Reset all data counters (internal version without locking).
508 void x_Reset();
509
510 /// Prevent copying.
511 /// See move constructor and move assignment operator to use with move-semantics.
512 CHistogram(const CHistogram&);
513 CHistogram& operator=(const CHistogram&);
514
515 protected:
516 TValue m_Min; ///< Minimum value (the lower bound of combined scale)
517 TValue m_Max; ///< Maximum value (the upper bound of combined scale)
518 unsigned m_NumBins; ///< Number of bins (m_Starts[]/m_Counts[] length)
519 TSum m_Sum = {}; ///< Sum of the all added values (if applicable for TValue)
520
521 std::unique_ptr<TScale[]> m_Starts; ///< Combined scale: starting bins positions
522 std::unique_ptr<TCounter[]> m_Counters; ///< Combined scale: counters - the number of measurements for each bin
523
524 TCounter m_Count; ///< Number of counted values (sum all m_Counters[])
525 TCounter m_LowerAnomalyCount; ///< Number of anomaly values < m_Min
526 TCounter m_UpperAnomalyCount; ///< Number of anomaly values > m_Max
527
528 bool m_IsMT; ///< MT protection flag
529 mutable std::mutex m_Mutex; ///< MT protection mutex
530 };
531
532
533
534 /// A series of same-structured histograms covering logarithmically (base 2)
535 /// increasing time periods... roughly
536 ///
537 template <typename TValue, typename TScale, typename TCounter>
538 class CHistogramTimeSeries
539 {
540 public:
541 /// @param model_histogram
542 /// This histogram will be used as a template for all histogram objects
543 /// in this time series.
544 using THistogram = CHistogram<TValue,TScale,TCounter>;
545 CHistogramTimeSeries(THistogram& model_histogram);
546
547 /// Add value to the data distribution.
548 /// Try to find an appropriate bin for a specified value and increment its counter by 1.
549 /// @sa CHistogram::Add()
550 void Add(TValue value);
551
552 /// Merge the most recent (now active) histogram data into the time series.
553 /// This method is generally supposed to be called periodically, thus
554 /// defining the unit of time (tick) as far as this API is concerned.
555 void Rotate();
556
557 /// Reset to the initial state
558 void Reset();
559
560 /// Type of the unit of time
561 using TTicks = unsigned int;
562
563 /// A histograms which covers a certain number of ticks
564 struct STimeBin {
565 // Copy constructor and assignment operator should be passed with 'const'
566 // qualifier to operate on list<STimeBin>. But Clone() is not 'const'
567 // due optional protection, so we use const_cast<> here. It is safe,
568 // because Clone() change internal mutex only.
STimeBinCHistogramTimeSeries::STimeBin569 STimeBin(const STimeBin& other) :
570 histogram(other.histogram.Clone(THistogram::eCloneAll)),
571 n_ticks(other.n_ticks)
572 {}
operator =CHistogramTimeSeries::STimeBin573 STimeBin& operator=(const STimeBin& other)
574 {
575 histogram = other.histogram.Clone(THistogram::eCloneAll);
576 n_ticks = other.n_ticks;
577 }
STimeBinCHistogramTimeSeries::STimeBin578 STimeBin(THistogram&& h, TTicks t) :
579 histogram(std::move(h)), n_ticks(t)
580 {}
581
582 THistogram histogram; ///< Histogram for the ticks
583 TTicks n_ticks; ///< Number of ticks in this histogram
584 };
585 /// Type of the series of histograms
586 using TTimeBins = list<STimeBin>;
587
588 /// Histograms -- in the order from the most recent to the least recent
589 TTimeBins GetHistograms() const;
590
591 /// Number of ticks the histogram series has handled.
592 /// Initially the number of ticks is zero.
GetCurrentTick(void) const593 TTicks GetCurrentTick(void) const { return m_CurrentTick; }
594
595 private:
596 void x_AppendBin(const THistogram& model_histogram, TTicks n_ticks);
597 void x_Shift(size_t index, typename TTimeBins::iterator current_it);
598
599 private:
600 TTimeBins m_TimeBins;
601 mutable std::mutex m_Mutex; // for Rotate() and GetHistograms()
602 TTicks m_CurrentTick;
603 };
604
605
606 /* @} */
607
608
609 //////////////////////////////////////////////////////////////////////////////
610 //
611 // Inline
612 //
613
614 template <typename TValue, typename TScale, typename TCounter>
CHistogram(TValue min_value,TValue max_value,unsigned n_bins,EScaleType scale_type,EScaleView scale_view)615 CHistogram<TValue, TScale, TCounter>::CHistogram
616 (
617 TValue min_value,
618 TValue max_value,
619 unsigned n_bins,
620 EScaleType scale_type,
621 EScaleView scale_view
622 )
623 : m_Min(min_value), m_Max(max_value), m_NumBins(n_bins), m_IsMT(false)
624 {
625 if ( m_Min > m_Max ) {
626 NCBI_THROW(CCoreException, eInvalidArg, "Minimum value cannot exceed maximum value");
627 }
628 if ( !m_NumBins ) {
629 NCBI_THROW(CCoreException, eInvalidArg, "Number of bins cannot be zero");
630 }
631 // Allocate memory
632 m_Starts.reset(new TScale[m_NumBins]);
633 m_Counters.reset(new TCounter[m_NumBins]);
634
635 x_CalculateBins(m_Min, m_Max, 0, m_NumBins, scale_type, scale_view);
636 // Reset counters
637 x_Reset();
638 }
639
640
641 template <typename TValue, typename TScale, typename TCounter>
642 void
AddLeftScale(TValue min_value,unsigned n_bins,EScaleType scale_type)643 CHistogram<TValue, TScale, TCounter>::AddLeftScale(
644 TValue min_value, unsigned n_bins, EScaleType scale_type)
645 {
646 MT_Lock();
647
648 if ( min_value >= m_Min ) {
649 MT_Unlock();
650 NCBI_THROW(CCoreException, eInvalidArg, "New minimum value cannot exceed minimum value for the histogram");
651 }
652 if ( !n_bins ) {
653 MT_Unlock();
654 NCBI_THROW(CCoreException, eInvalidArg, "Number of bins cannot be zero");
655 }
656 if (m_Count + m_LowerAnomalyCount + m_UpperAnomalyCount) {
657 MT_Unlock();
658 NCBI_THROW(CCoreException, eInvalidArg, "Please call AddLeftScale() before Add()");
659 }
660 unsigned n_prev = m_NumBins;
661 m_NumBins += n_bins;
662
663 // Reallocate memory for starting positions and counters
664
665 std::unique_ptr<TScale[]> tmp_starts(new TScale[m_NumBins]);
666 memcpy(tmp_starts.get() + n_bins, m_Starts.get(), sizeof(TScale) * n_prev);
667 m_Starts.swap(tmp_starts);
668 m_Counters.reset(new TCounter[m_NumBins]);
669 memset(m_Counters.get(), 0, m_NumBins * sizeof(TCounter));
670
671 // Calculate scale for newly added bins: from right to left
672 x_CalculateBins(m_Min, min_value, n_bins-1, n_bins, scale_type, eMonotonic);
673 m_Min = min_value;
674
675 MT_Unlock();
676 }
677
678
679 template <typename TValue, typename TScale, typename TCounter>
680 void
AddRightScale(TValue max_value,unsigned n_bins,EScaleType scale_type)681 CHistogram<TValue, TScale, TCounter>::AddRightScale(
682 TValue max_value, unsigned n_bins, EScaleType scale_type)
683 {
684 MT_Lock();
685
686 if ( max_value <= m_Max ) {
687 MT_Unlock();
688 NCBI_THROW(CCoreException, eInvalidArg, "New maximum value cannot be less than a maximum value for the histogram");
689 }
690 if ( !n_bins ) {
691 MT_Unlock();
692 NCBI_THROW(CCoreException, eInvalidArg, "Number of bins cannot be zero");
693 }
694 if (m_Count + m_LowerAnomalyCount + m_UpperAnomalyCount) {
695 MT_Unlock();
696 NCBI_THROW(CCoreException, eInvalidArg, "Please call AddRightScale() before Add()");
697 }
698 unsigned n_prev = m_NumBins;
699 m_NumBins += n_bins;
700
701 // Reallocate memory for starting positions and counters
702
703 std::unique_ptr<TScale[]> tmp_starts(new TScale[m_NumBins]);
704 memcpy(tmp_starts.get(), m_Starts.get(), sizeof(TScale) * n_prev);
705 m_Starts.swap(tmp_starts);
706 m_Counters.reset(new TCounter[m_NumBins]);
707 memset(m_Counters.get(), 0, m_NumBins * sizeof(TCounter));
708
709 // Calculate scale for newly added bins: from left to right
710 x_CalculateBins(m_Max, max_value, n_prev, n_bins, scale_type, eMonotonic);
711 m_Max = max_value;
712
713 MT_Unlock();
714 }
715
716
717 template <typename TValue, typename TScale, typename TCounter>
718 unsigned
EstimateNumberOfBins(size_t n,EEstimateNumberOfBinsRule rule)719 CHistogram<TValue, TScale, TCounter>::EstimateNumberOfBins(size_t n, EEstimateNumberOfBinsRule rule)
720 {
721 switch (rule) {
722 case eSquareRoot:
723 return (unsigned) ceil(sqrt(n));
724 case eJuran:
725 // It have no info for n < 20, but 5 is a reasonable minimum
726 if (n < 20) return 5;
727 if (n <= 50) return 6;
728 if (n <= 100) return 7;
729 if (n <= 200) return 8;
730 if (n <= 500) return 9;
731 if (n <= 1000) return 10;
732 return 11; // handbook: 11-20 for 1000+ observations
733 case eSturge:
734 return 1 + (unsigned) ceil(log2(n));
735 case eRice:
736 return (unsigned) ceil(pow(n, double(1)/3));
737 }
738 _TROUBLE;
739 // unreachable, just to avoid warning
740 return 0;
741 }
742
743
744 template <typename TValue, typename TScale, typename TCounter>
745 void
Reset(void)746 CHistogram<TValue, TScale, TCounter>::Reset(void)
747 {
748 MT_Lock();
749 x_Reset();
750 MT_Unlock();
751 }
752
753
754 template <typename TValue, typename TScale, typename TCounter>
755 void
x_Reset(void)756 CHistogram<TValue, TScale, TCounter>::x_Reset(void)
757 {
758 // Reset counters
759 m_Count = m_LowerAnomalyCount = m_UpperAnomalyCount = 0;
760 // Reset bins
761 memset(m_Counters.get(), 0, m_NumBins * sizeof(TCounter));
762 // Reset sum (it can be any type, so use default constructor)
763 m_Sum = {};
764 }
765
766
767 template <typename TValue, typename TScale, typename TCounter>
768 void
x_Add(TValue value)769 CHistogram<TValue, TScale, TCounter>::x_Add(TValue value)
770 {
771 if (value < m_Min) {
772 m_LowerAnomalyCount++;
773 return;
774 }
775 if (value > m_Max) {
776 m_UpperAnomalyCount++;
777 return;
778 }
779 if (m_NumBins < 50) {
780 x_AddLinear(value);
781 return;
782 }
783 x_AddBisection(value);
784 }
785
786
787 template <typename TValue, typename TScale, typename TCounter>
788 void
x_AddLinear(TValue value)789 CHistogram<TValue, TScale, TCounter>::x_AddLinear(TValue value)
790 {
791 size_t i = 1;
792 while (i < m_NumBins) {
793 if (value < m_Starts[i]) {
794 break;
795 }
796 i++;
797 }
798 // The current bin with index 'i' have a greater value, so put value into the previous bin
799 m_Counters[i-1]++;
800 m_Count++;
801 }
802
803
804 template <typename TValue, typename TScale, typename TCounter>
805 void
x_AddBisection(TValue value)806 CHistogram<TValue, TScale, TCounter>::x_AddBisection(TValue value)
807 {
808 // Bisection search
809
810 size_t left = 0, right = m_NumBins;
811 size_t i = 0, d;
812
813 while (d = (right - left), d > 1) {
814 i = left + d / 2;
815 if (value < m_Starts[i]) {
816 right = i;
817 } else {
818 left = i;
819 }
820 }
821 // Algorithm can finish on the current or next bin, depending on even/odd number of bins
822 if (value < m_Starts[i]) {
823 i--;
824 }
825 m_Counters[i]++;
826 m_Count++;
827 }
828
829
830 template <typename TValue, typename TScale, typename TCounter>
831 void
x_CalculateBins(TScale start_value,TScale end_value,unsigned pos,unsigned n,EScaleType scale_type,EScaleView scale_view)832 CHistogram<TValue, TScale, TCounter>::x_CalculateBins
833 (
834 TScale start_value,
835 TScale end_value,
836 unsigned pos,
837 unsigned n,
838 EScaleType scale_type,
839 EScaleView scale_view
840 )
841 {
842 const char* errmsg_step = "Impossible to calculate scale step, please change TScale type, range, or number of bins";
843 const char* errmsg_dup = "Impossible to calculate scales bin starting position, please change TScale type, range, or number of bins";
844
845 TScale* arr = m_Starts.get();
846
847 if (scale_type == eLinear) {
848 // Special processing for linear scales to account for
849 // an integer TScale type and calculation truncation.
850 TScale step = (max(start_value, end_value) - min(start_value, end_value)) / n;
851 if ( step == 0 ) {
852 NCBI_THROW(CCoreException, eCore, errmsg_step);
853 }
854 x_CalculateBinsLinear(start_value, end_value, step, arr, pos, n, scale_view);
855 return;
856 }
857
858 // Log scales only
859 _ASSERT( scale_type == eLog ||
860 scale_type == eLog2 ||
861 scale_type == eLog10 );
862
863 x_CalculateBinsLog(start_value, end_value, arr, pos, n, scale_type, scale_view);
864
865 // Check that we don't have bins with the same starting positions.
866 // This can happens mostly if TScale is an integer type due truncation.
867 // Linear scale doesn't need this, because we check 'step' instead (see above).
868 // Checking whole combined scale.
869 //
870 for (unsigned i = 1; i < m_NumBins; i++) {
871 if ( arr[i] <= arr[i-1] ) {
872 NCBI_THROW(CCoreException, eCore, errmsg_dup);
873 }
874 }
875 return;
876 }
877
878
879 template <typename TValue, typename TScale, typename TCounter>
880 void
x_CalculateBinsLinear(TScale start_value,TScale end_value,TScale step,TScale * arr,unsigned pos,unsigned n,EScaleView scale_view)881 CHistogram<TValue, TScale, TCounter>::x_CalculateBinsLinear
882 (
883 TScale start_value,
884 TScale end_value,
885 TScale step,
886 TScale* arr,
887 unsigned pos,
888 unsigned n,
889 EScaleView scale_view
890 )
891 {
892 if (scale_view == eSymmetrical) {
893 // Account for an integer TScale type and calculation truncation.
894 // If TScale type doesn't allow integer division to the specified
895 // number of bins without residue, some bins will be larger than others.
896 // For monotonic view this will be always a very last bin at the end of
897 // the scale, but for the symmetrical view it depends on parity of 'n'.
898 // For even number of bins most left and right bins can be larger.
899 // For odd number of bins the center bin can be larger than other.
900
901 if (n % 2 == 0) {
902 // Calculate from the center to both sides with step 'step'
903 TScale median = start_value + (end_value - start_value)/2;
904 x_CalculateBinsLinear(median, start_value, step, arr, pos + n/2 - 1, n/2, eMonotonic);
905 x_CalculateBinsLinear(median, end_value, step, arr, pos + n/2, n/2, eMonotonic);
906 // We already know value for the most left bin, assign it
907 // implicitly for such symmetrical scales to have bigger
908 // starting bin. We got bigger ending bin automatically.
909 arr[pos] = start_value;
910
911 } else {
912 // Calculate from both sides to the center with step 'step'
913 // 2nd parameter doesn't matter, it just show direction.
914 x_CalculateBinsLinear(start_value, end_value, step, arr, pos, n/2 + 1, eMonotonic);
915 x_CalculateBinsLinear(end_value, start_value, step, arr, n-1, n/2, eMonotonic);
916 }
917 return;
918 }
919
920 // Calculate from left to right
921 if (start_value <= end_value) {
922 for (unsigned i = 0; i < n; i++) {
923 arr[pos+i] = start_value + step*i;
924 }
925 }
926 // Calculate from right to left
927 else {
928 for (unsigned i = 1; i <= n; i++) {
929 arr[pos+1-i] = start_value - step*i;
930 }
931 }
932 }
933
934
935 template <typename TValue, typename TScale, typename TCounter>
936 void
x_CalculateBinsLog(TScale start_value,TScale end_value,TScale * arr,unsigned pos,unsigned n,EScaleType scale_type,EScaleView scale_view)937 CHistogram<TValue, TScale, TCounter>::x_CalculateBinsLog
938 (
939 TScale start_value,
940 TScale end_value,
941 TScale* arr,
942 unsigned pos,
943 unsigned n,
944 EScaleType scale_type,
945 EScaleView scale_view
946 )
947 {
948 if (scale_view == eSymmetrical) {
949 // Account for an integer TScale type and calculation truncation.
950 // If TScale type doesn't allow integer division to the specified
951 // number of bins without residue, some bins will be larger than others.
952 // For monotonic view this will be always a very last bin at the end of
953 // the scale, but for the symmetrical view it depends on parity of 'n'.
954 // For even number of bins most left and right bins can be larger.
955 // For odd number of bins the center bin can be larger than other.
956
957 TScale median = start_value + (end_value - start_value)/2;
958 // Set most left bin directly, we already know its value
959 arr[pos] = start_value;
960 unsigned n2 = n/2;
961
962 if (n % 2 == 0) {
963 // Calculate from the center to both sides
964 x_CalculateBinsLog(median, start_value, arr, pos + n2 - 1, n2, scale_type, eMonotonic);
965 x_CalculateBinsLog(median, end_value, arr, pos + n2 , n2, scale_type, eMonotonic);
966 } else {
967 if (n == 1) {
968 return;
969 }
970 _ASSERT(n > 1);
971
972 // Median is located in the middle of the center bin.
973 // Too calculate starting point for this center bin end its ending point,
974 // by the way also a starting point for the next bin, we double the number of bins
975 // on each side of the median, and calculate values for each half-mark on a scale.
976 // After that copy values for needed half-marks only.
977
978 std::unique_ptr<TScale[]> tmp(new TScale[n*2]);
979 TScale* tmp_arr = tmp.get();
980 // Calculate from the center to both sides (n % 2 == 0 case)
981 x_CalculateBinsLog(median, start_value, tmp_arr, n - 1, n, scale_type, eMonotonic);
982 x_CalculateBinsLog(median, end_value, tmp_arr, n , n, scale_type, eMonotonic);
983
984 // For central bin: copy values for 2 half-marks from both sides of the center
985 arr[pos + n2] = tmp_arr[n - 1];
986 arr[pos + n2 + 1] = tmp_arr[n + 1];
987
988 // For other bins: copy values for every second half-mark from 2 half-marks above
989 for (unsigned i = 0; i < n2; i++) {
990 arr[pos + i] = tmp_arr[i*2];
991 }
992 for (unsigned i=2, j=n+3; i <= n2; i++, j+=2) {
993 arr[pos + n2 + i] = tmp_arr[j];
994 }
995 }
996 return;
997 }
998
999 // Logarithm functions cannot operate on a negative values or 0.
1000 // so it will be necessary to transform data range. One of the methods
1001 // is too shift data to start it from 1. This method also helps to normalize
1002 // logarithmic scale, and we will use it for all data, even positive.
1003 // So, we calculate logarithms scale for a range of values [1:D+1],
1004 // where D is a distance between starting and ending points.
1005 // Because x^0 = 1, we need calculate only the maximum scale value log(D+1),
1006 // And every scale mark in between with the step log(D+1)/N.
1007 // Inverse function helps to convert every scale mark back to a bin
1008 // starting position.
1009
1010 // Calculate from left to right
1011 if (start_value <= end_value) {
1012 TScale step = x_Func(scale_type, end_value - start_value + 1) / n;
1013 for (unsigned i = 1; i < n; i++) {
1014 // Calculate data value for a given scale mark
1015 arr[pos+i] = start_value + x_FuncInverse(scale_type, step*i) - 1;
1016 }
1017 // Set most left bin directly, we already know its value,
1018 // to avoid possible rounding errors.
1019 arr[pos] = start_value;
1020 }
1021 // Calculate from right to left
1022 else {
1023 TScale step = x_Func(scale_type, start_value - end_value + 1) / n;
1024 for (unsigned i = 1; i < n; i++) {
1025 // Calculate data value for a given scale mark
1026 arr[pos+1-i] = start_value - x_FuncInverse(scale_type, step*i) + 1;
1027 }
1028 // Set most left bin directly, we already know its value,
1029 // to avoid possible rounding errors.
1030 arr[pos+1-n] = end_value;
1031 }
1032 }
1033
1034
1035 template <typename TValue, typename TScale, typename TCounter>
1036 TScale
1037 CHistogram<TValue, TScale, TCounter>::x_Func(EScaleType scale_type, TScale value)
1038 {
1039 switch (scale_type) {
1040 case eLog:
1041 return (TScale)log(value);
1042 case eLog2:
1043 return (TScale)log2(value);
1044 case eLog10:
1045 return (TScale)log10(value);
1046 case eLinear:
1047 // It is last, because eLinear has served by x_CalculateBinsLinear() directly
1048 return value;
1049 }
1050 _TROUBLE;
1051 // unreachable, just to avoid warning
1052 return value;
1053 }
1054
1055
1056 template <typename TValue, typename TScale, typename TCounter>
1057 TScale
1058 CHistogram<TValue, TScale, TCounter>::x_FuncInverse(EScaleType scale_type, TScale scale_value)
1059 {
1060 switch (scale_type) {
1061 case eLog:
1062 return (TScale)pow(M_E, scale_value);
1063 case eLog2:
1064 return (TScale)pow(2, scale_value);
1065 case eLog10:
1066 return (TScale)pow(10, scale_value);
1067 case eLinear:
1068 // It is last, because eLinear has served by x_CalculateBinsLinear() directly
1069 return scale_value;
1070 }
1071 _TROUBLE;
1072 // unreachable, just to avoid warning
1073 return scale_value;
1074 }
1075
1076
1077 // Move semantics
1078
1079 template <typename TValue, typename TScale, typename TCounter>
CHistogram()1080 CHistogram<TValue, TScale, TCounter>::CHistogram()
1081 : m_NumBins(0), m_IsMT(false)
1082 {
1083 // Reset counters
1084 x_Reset();
1085 }
1086
1087
1088 template <typename TValue, typename TScale, typename TCounter>
CHistogram(CHistogram && other)1089 CHistogram<TValue, TScale, TCounter>::CHistogram(CHistogram&& other)
1090 {
1091 if (this == &other) return;
1092 other.MT_Lock();
1093 x_MoveFrom(other);
1094 other.MT_Unlock();
1095 };
1096
1097
1098 template <typename TValue, typename TScale, typename TCounter>
1099 CHistogram<TValue, TScale, TCounter>&
operator =(CHistogram && other)1100 CHistogram<TValue, TScale, TCounter>::operator=(CHistogram&& other)
1101 {
1102 if (this == &other) return *this;
1103 other.MT_Lock();
1104 x_MoveFrom(other);
1105 other.MT_Unlock();
1106 return *this;
1107 };
1108
1109
1110 template <typename TValue, typename TScale, typename TCounter>
1111 CHistogram<TValue, TScale, TCounter>
Clone(EClone how) const1112 CHistogram<TValue, TScale, TCounter>::Clone(EClone how) const
1113 {
1114 MT_Lock();
1115
1116 CHistogram h;
1117 h.m_Min = m_Min;
1118 h.m_Max = m_Max;
1119 h.m_NumBins = m_NumBins;
1120 h.m_IsMT = m_IsMT;
1121
1122 h.m_Starts.reset(new TScale[m_NumBins]);
1123 h.m_Counters.reset(new TCounter[m_NumBins]);
1124 memcpy(h.m_Starts.get(), m_Starts.get(), sizeof(TScale) * m_NumBins);
1125
1126 switch (how) {
1127 case eCloneStructureOnly:
1128 // Reset counters
1129 h.m_Sum = 0;
1130 h.m_Count = 0;
1131 h.m_LowerAnomalyCount = 0;
1132 h.m_UpperAnomalyCount = 0;
1133 memset(h.m_Counters.get(), 0, m_NumBins * sizeof(TCounter));
1134 break;
1135 case eCloneAll:
1136 // Copy counters
1137 h.m_Sum = m_Sum;
1138 h.m_Count = m_Count;
1139 h.m_LowerAnomalyCount = m_LowerAnomalyCount;
1140 h.m_UpperAnomalyCount = m_UpperAnomalyCount;
1141 memcpy(h.m_Counters.get(), m_Counters.get(), sizeof(TCounter) * m_NumBins);
1142 break;
1143 default:
1144 MT_Unlock();
1145 _TROUBLE;
1146 }
1147 MT_Unlock();
1148 return h;
1149 }
1150
1151
1152 template <typename TValue, typename TScale, typename TCounter>
1153 void
x_MoveFrom(CHistogram & other)1154 CHistogram<TValue, TScale, TCounter>::x_MoveFrom(CHistogram& other)
1155 {
1156 m_Min = other.m_Min;
1157 m_Max = other.m_Max;
1158 m_NumBins = other.m_NumBins;
1159 m_Sum = other.m_Sum;
1160 m_Count = other.m_Count;
1161 m_LowerAnomalyCount = other.m_LowerAnomalyCount;
1162 m_UpperAnomalyCount = other.m_UpperAnomalyCount;
1163 m_IsMT = other.m_IsMT;
1164
1165 m_Starts.reset(other.m_Starts.release());
1166 m_Counters.reset(other.m_Counters.release());
1167 other.m_NumBins = 0;
1168 other.m_Sum = 0;
1169
1170 return;
1171 }
1172
1173
1174 template <typename TValue, typename TScale, typename TCounter>
1175 bool
x_IsEqual(TScale a,TScale b)1176 CHistogram<TValue, TScale, TCounter>::x_IsEqual(TScale a, TScale b)
1177 {
1178 if (std::numeric_limits<TScale>::is_integer) {
1179 return a == b;
1180 }
1181 // Approximately check for floating numbers
1182 return std::fabs(a - b) < 0.0001;
1183 }
1184
1185
1186 template <typename TValue, typename TScale, typename TCounter>
1187 void
AddCountersFrom(const CHistogram & other)1188 CHistogram<TValue, TScale, TCounter>::AddCountersFrom(const CHistogram& other)
1189 {
1190 if (this == &other) return;
1191
1192 MT_Lock();
1193 other.MT_Lock();
1194 try {
1195 x_AddCountersFrom(const_cast<CHistogram&>(other));
1196 }
1197 catch (...) {
1198 MT_Unlock();
1199 other.MT_Unlock();
1200 throw;
1201 }
1202 MT_Unlock();
1203 other.MT_Unlock();
1204 }
1205
1206
1207 template <typename TValue, typename TScale, typename TCounter>
1208 void
StealCountersFrom(CHistogram & other)1209 CHistogram<TValue, TScale, TCounter>::StealCountersFrom(CHistogram& other)
1210 {
1211 if (this == &other) return;
1212
1213 MT_Lock();
1214 other.MT_Lock();
1215 try {
1216 x_AddCountersFrom(other);
1217 other.x_Reset();
1218 }
1219 catch (...) {
1220 MT_Unlock();
1221 other.MT_Unlock();
1222 throw;
1223 }
1224 MT_Unlock();
1225 other.MT_Unlock();
1226 }
1227
1228
1229 template <typename TValue, typename TScale, typename TCounter>
1230 void
x_AddCountersFrom(CHistogram & other)1231 CHistogram<TValue, TScale, TCounter>::x_AddCountersFrom(CHistogram& other)
1232 {
1233 // Check structure
1234 if ( m_NumBins != other.m_NumBins ||
1235 !x_IsEqual(m_Min, other.m_Min) ||
1236 !x_IsEqual(m_Max, other.m_Max)
1237 ) {
1238 NCBI_THROW(CCoreException, eInvalidArg, "Histograms have different structure");
1239 }
1240 // Check scale
1241 TScale* starts_cur = m_Starts.get();
1242 TScale* starts_other = other.m_Starts.get();
1243 for (unsigned i = 0; i < m_NumBins; i++) {
1244 if (!x_IsEqual(starts_cur[i], starts_other[i])) {
1245 NCBI_THROW(CCoreException, eInvalidArg, "Histograms have different starting positions");
1246 }
1247 }
1248 // Add counters
1249 TCounter* counters_cur = m_Counters.get();
1250 TCounter* counters_other = other.m_Counters.get();
1251 for (unsigned i = 0; i < m_NumBins; i++) {
1252 counters_cur[i] += counters_other[i];
1253 }
1254 m_Count += other.m_Count;
1255 m_LowerAnomalyCount += other.m_LowerAnomalyCount;
1256 m_UpperAnomalyCount += other.m_UpperAnomalyCount;
1257 m_Sum += other.m_Sum;
1258 }
1259
1260
1261
1262 //============================================================================
1263 //
1264 // CHistogramTimeSeries
1265 //
1266 //============================================================================
1267
1268 template <typename TValue, typename TScale, typename TCounter>
CHistogramTimeSeries(THistogram & model_histogram)1269 CHistogramTimeSeries<TValue, TScale, TCounter>::CHistogramTimeSeries(THistogram& model_histogram)
1270 : m_CurrentTick(0)
1271 {
1272 // Need to create the first item in the list
1273 x_AppendBin(model_histogram, 1);
1274 }
1275
1276
1277 template <typename TValue, typename TScale, typename TCounter>
1278 void
Add(TValue value)1279 CHistogramTimeSeries<TValue, TScale, TCounter>::Add(TValue value)
1280 {
1281 // The first bin always exists
1282 m_TimeBins.front().histogram.Add(value);
1283 }
1284
1285
1286 template <typename TValue, typename TScale, typename TCounter>
1287 void
Rotate()1288 CHistogramTimeSeries<TValue, TScale, TCounter>::Rotate()
1289 {
1290 ++m_CurrentTick;
1291
1292 m_Mutex.lock();
1293 x_Shift(0, m_TimeBins.begin());
1294 m_Mutex.unlock();
1295 }
1296
1297
1298 template <typename TValue, typename TScale, typename TCounter>
1299 void
Reset()1300 CHistogramTimeSeries<TValue, TScale, TCounter>::Reset()
1301 {
1302 m_Mutex.lock();
1303 m_CurrentTick = 0;
1304 while (m_TimeBins.size() > 1)
1305 m_TimeBins.pop_back();
1306 m_TimeBins.front().histogram.Reset();
1307 m_TimeBins.front().n_ticks = 1; // Just in case
1308 m_Mutex.unlock();
1309 }
1310
1311
1312 template <typename TValue, typename TScale, typename TCounter>
1313 void
x_Shift(size_t index,typename ncbi::CHistogramTimeSeries<TValue,TScale,TCounter>::TTimeBins::iterator current_it)1314 CHistogramTimeSeries<TValue, TScale, TCounter>::x_Shift(size_t index,
1315 typename ncbi::CHistogramTimeSeries<TValue, TScale, TCounter>::TTimeBins::iterator current_it)
1316 {
1317 if (m_TimeBins.size() <= index + 1) {
1318 // There is not enough bins. Need to add one
1319 x_AppendBin(m_TimeBins.front().histogram, TTicks(1) << index);
1320
1321 // Move from index to index + 1
1322 auto next_it = current_it;
1323 ++next_it;
1324 next_it->histogram.StealCountersFrom(current_it->histogram);
1325
1326 if (index != 0)
1327 current_it->n_ticks /= 2;
1328 return;
1329 }
1330
1331 auto next_it = current_it;
1332 ++next_it;
1333
1334 if (next_it->n_ticks == TTicks(1) << index) {
1335 // Half full bin; make it full and just accumulate
1336 next_it->n_ticks *= 2;
1337 next_it->histogram.StealCountersFrom(current_it->histogram);
1338 if (index != 0)
1339 current_it->n_ticks /= 2;
1340 return;
1341 }
1342
1343 // The next bin is full; need to move further
1344 x_Shift(index + 1, next_it);
1345 // Move to the vacant next
1346 next_it->histogram.StealCountersFrom(current_it->histogram);
1347 // The current one is a half now
1348 if (index != 0)
1349 current_it->n_ticks /= 2;
1350 }
1351
1352
1353 template <typename TValue, typename TScale, typename TCounter>
1354 typename CHistogramTimeSeries<TValue, TScale, TCounter>::TTimeBins
GetHistograms() const1355 CHistogramTimeSeries<TValue, TScale, TCounter>::GetHistograms() const
1356 {
1357 m_Mutex.lock();
1358 TTimeBins ret = m_TimeBins;
1359 m_Mutex.unlock();
1360 return ret;
1361 }
1362
1363
1364 template <typename TValue, typename TScale, typename TCounter>
1365 void
x_AppendBin(const THistogram & model_histogram,TTicks n_ticks)1366 CHistogramTimeSeries<TValue, TScale, TCounter>::x_AppendBin(
1367 const THistogram& model_histogram, TTicks n_ticks)
1368 {
1369 m_TimeBins.push_back(
1370 STimeBin{model_histogram.Clone(THistogram::eCloneStructureOnly),
1371 n_ticks} );
1372 }
1373
1374
1375 END_NCBI_SCOPE
1376
1377 #endif /* DATA_HISTOGRAM__HPP */
1378