1 /*
2  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef API_NUMERICS_RUNNING_STATISTICS_H_
12 #define API_NUMERICS_RUNNING_STATISTICS_H_
13 
14 #include <algorithm>
15 #include <cmath>
16 #include <limits>
17 
18 #include "absl/types/optional.h"
19 #include "rtc_base/checks.h"
20 #include "rtc_base/numerics/math_utils.h"
21 
22 namespace webrtc {
23 namespace webrtc_impl {
24 
25 // tl;dr: Robust and efficient online computation of statistics,
26 //        using Welford's method for variance. [1]
27 //
28 // This should be your go-to class if you ever need to compute
29 // min, max, mean, variance and standard deviation.
30 // If you need to get percentiles, please use webrtc::SamplesStatsCounter.
31 //
32 // Please note RemoveSample() won't affect min and max.
33 // If you want a full-fledged moving window over N last samples,
34 // please use webrtc::RollingAccumulator.
35 //
36 // The measures return absl::nullopt if no samples were fed (Size() == 0),
37 // otherwise the returned optional is guaranteed to contain a value.
38 //
39 // [1]
40 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
41 
42 // The type T is a scalar which must be convertible to double.
43 // Rationale: we often need greater precision for measures
44 //            than for the samples themselves.
45 template <typename T>
46 class RunningStatistics {
47  public:
48   // Update stats ////////////////////////////////////////////
49 
50   // Add a value participating in the statistics in O(1) time.
AddSample(T sample)51   void AddSample(T sample) {
52     max_ = std::max(max_, sample);
53     min_ = std::min(min_, sample);
54     ++size_;
55     // Welford's incremental update.
56     const double delta = sample - mean_;
57     mean_ += delta / size_;
58     const double delta2 = sample - mean_;
59     cumul_ += delta * delta2;
60   }
61 
62   // Remove a previously added value in O(1) time.
63   // Nb: This doesn't affect min or max.
64   // Calling RemoveSample when Size()==0 is incorrect.
RemoveSample(T sample)65   void RemoveSample(T sample) {
66     RTC_DCHECK_GT(Size(), 0);
67     // In production, just saturate at 0.
68     if (Size() == 0) {
69       return;
70     }
71     // Since samples order doesn't matter, this is the
72     // exact reciprocal of Welford's incremental update.
73     --size_;
74     const double delta = sample - mean_;
75     mean_ -= delta / size_;
76     const double delta2 = sample - mean_;
77     cumul_ -= delta * delta2;
78   }
79 
80   // Merge other stats, as if samples were added one by one, but in O(1).
MergeStatistics(const RunningStatistics<T> & other)81   void MergeStatistics(const RunningStatistics<T>& other) {
82     if (other.size_ == 0) {
83       return;
84     }
85     max_ = std::max(max_, other.max_);
86     min_ = std::min(min_, other.min_);
87     const int64_t new_size = size_ + other.size_;
88     const double new_mean =
89         (mean_ * size_ + other.mean_ * other.size_) / new_size;
90     // Each cumulant must be corrected.
91     //   * from: sum((x_i - mean_)²)
92     //   * to:   sum((x_i - new_mean)²)
93     auto delta = [new_mean](const RunningStatistics<T>& stats) {
94       return stats.size_ * (new_mean * (new_mean - 2 * stats.mean_) +
95                             stats.mean_ * stats.mean_);
96     };
97     cumul_ = cumul_ + delta(*this) + other.cumul_ + delta(other);
98     mean_ = new_mean;
99     size_ = new_size;
100   }
101 
102   // Get Measures ////////////////////////////////////////////
103 
104   // Returns number of samples involved via AddSample() or MergeStatistics(),
105   // minus number of times RemoveSample() was called.
Size()106   int64_t Size() const { return size_; }
107 
108   // Returns minimum among all seen samples, in O(1) time.
109   // This isn't affected by RemoveSample().
GetMin()110   absl::optional<T> GetMin() const {
111     if (size_ == 0) {
112       return absl::nullopt;
113     }
114     return min_;
115   }
116 
117   // Returns maximum among all seen samples, in O(1) time.
118   // This isn't affected by RemoveSample().
GetMax()119   absl::optional<T> GetMax() const {
120     if (size_ == 0) {
121       return absl::nullopt;
122     }
123     return max_;
124   }
125 
126   // Returns mean in O(1) time.
GetMean()127   absl::optional<double> GetMean() const {
128     if (size_ == 0) {
129       return absl::nullopt;
130     }
131     return mean_;
132   }
133 
134   // Returns unbiased sample variance in O(1) time.
GetVariance()135   absl::optional<double> GetVariance() const {
136     if (size_ == 0) {
137       return absl::nullopt;
138     }
139     return cumul_ / size_;
140   }
141 
142   // Returns unbiased standard deviation in O(1) time.
GetStandardDeviation()143   absl::optional<double> GetStandardDeviation() const {
144     if (size_ == 0) {
145       return absl::nullopt;
146     }
147     return std::sqrt(*GetVariance());
148   }
149 
150  private:
151   int64_t size_ = 0;  // Samples seen.
152   T min_ = infinity_or_max<T>();
153   T max_ = minus_infinity_or_min<T>();
154   double mean_ = 0;
155   double cumul_ = 0;  // Variance * size_, sometimes noted m2.
156 };
157 
158 }  // namespace webrtc_impl
159 }  // namespace webrtc
160 
161 #endif  // API_NUMERICS_RUNNING_STATISTICS_H_
162