1 #[cfg(feature = "serde1")] use serde::{Serialize, Deserialize};
2 use super::{MeanWithError, Estimate, Merge};
3 
4 /// Estimate the weighted and unweighted arithmetic mean of a sequence of
5 /// numbers ("population").
6 ///
7 ///
8 /// ## Example
9 ///
10 /// ```
11 /// use average::WeightedMean;
12 ///
13 /// let a: WeightedMean = (1..6).zip(1..6)
14 ///     .map(|(x, w)| (f64::from(x), f64::from(w))).collect();
15 /// println!("The weighted mean is {}.", a.mean());
16 /// ```
17 #[derive(Debug, Clone)]
18 #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
19 pub struct WeightedMean  {
20     /// Sum of the weights.
21     weight_sum: f64,
22     /// Weighted mean value.
23     weighted_avg: f64,
24 }
25 
26 impl WeightedMean {
27     /// Create a new weighted and unweighted mean estimator.
new() -> WeightedMean28     pub fn new() -> WeightedMean {
29         WeightedMean {
30             weight_sum: 0., weighted_avg: 0.,
31         }
32     }
33 
34     /// Add an observation sampled from the population.
35     #[inline]
add(&mut self, sample: f64, weight: f64)36     pub fn add(&mut self, sample: f64, weight: f64) {
37         // The algorithm for the unweighted mean was suggested by Welford in 1962.
38         //
39         // See
40         // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
41         // and
42         // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
43         self.weight_sum += weight;
44 
45         let prev_avg = self.weighted_avg;
46         self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg);
47     }
48 
49     /// Determine whether the sample is empty.
50     ///
51     /// Might be a false positive if the sum of weights is zero.
52     #[inline]
is_empty(&self) -> bool53     pub fn is_empty(&self) -> bool {
54         self.weight_sum == 0.
55     }
56 
57     /// Return the sum of the weights.
58     ///
59     /// Returns 0 for an empty sample.
60     #[inline]
sum_weights(&self) -> f6461     pub fn sum_weights(&self) -> f64 {
62         self.weight_sum
63     }
64 
65     /// Estimate the weighted mean of the population.
66     ///
67     /// Returns 0 for an empty sample.
68     #[inline]
mean(&self) -> f6469     pub fn mean(&self) -> f64 {
70         self.weighted_avg
71     }
72 }
73 
74 impl core::default::Default for WeightedMean {
default() -> WeightedMean75     fn default() -> WeightedMean {
76         WeightedMean::new()
77     }
78 }
79 
80 impl core::iter::FromIterator<(f64, f64)> for WeightedMean {
from_iter<T>(iter: T) -> WeightedMean where T: IntoIterator<Item=(f64, f64)>81     fn from_iter<T>(iter: T) -> WeightedMean
82         where T: IntoIterator<Item=(f64, f64)>
83     {
84         let mut a = WeightedMean::new();
85         for (i, w) in iter {
86             a.add(i, w);
87         }
88         a
89     }
90 }
91 
92 impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMean {
from_iter<T>(iter: T) -> WeightedMean where T: IntoIterator<Item=&'a (f64, f64)>93     fn from_iter<T>(iter: T) -> WeightedMean
94         where T: IntoIterator<Item=&'a (f64, f64)>
95     {
96         let mut a = WeightedMean::new();
97         for &(i, w) in iter {
98             a.add(i, w);
99         }
100         a
101     }
102 }
103 
104 impl Merge for WeightedMean {
105     /// Merge another sample into this one.
106     ///
107     ///
108     /// ## Example
109     ///
110     /// ```
111     /// use average::{WeightedMean, Merge};
112     ///
113     /// let weighted_sequence: &[(f64, f64)] = &[
114     ///     (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
115     ///     (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)];
116     /// let (left, right) = weighted_sequence.split_at(3);
117     /// let avg_total: WeightedMean = weighted_sequence.iter().collect();
118     /// let mut avg_left: WeightedMean = left.iter().collect();
119     /// let avg_right: WeightedMean = right.iter().collect();
120     /// avg_left.merge(&avg_right);
121     /// assert!((avg_total.mean() - avg_left.mean()).abs() < 1e-15);
122     /// ```
123     #[inline]
merge(&mut self, other: &WeightedMean)124     fn merge(&mut self, other: &WeightedMean) {
125         let total_weight_sum = self.weight_sum + other.weight_sum;
126         self.weighted_avg = (self.weight_sum * self.weighted_avg
127                              + other.weight_sum * other.weighted_avg)
128                             / total_weight_sum;
129         self.weight_sum = total_weight_sum;
130     }
131 }
132 
133 /// Estimate the weighted and unweighted arithmetic mean and the unweighted
134 /// variance of a sequence of numbers ("population").
135 ///
136 /// This can be used to estimate the standard error of the weighted mean.
137 ///
138 ///
139 /// ## Example
140 ///
141 /// ```
142 /// use average::WeightedMeanWithError;
143 ///
144 /// let a: WeightedMeanWithError = (1..6).zip(1..6)
145 ///     .map(|(x, w)| (f64::from(x), f64::from(w))).collect();
146 /// println!("The weighted mean is {} ± {}.", a.weighted_mean(), a.error());
147 /// ```
148 #[derive(Debug, Clone)]
149 #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
150 pub struct WeightedMeanWithError {
151     /// Sum of the squares of the weights.
152     weight_sum_sq: f64,
153     /// Estimator of the weighted mean.
154     weighted_avg: WeightedMean,
155     /// Estimator of unweighted mean and its variance.
156     unweighted_avg: MeanWithError,
157 }
158 
159 impl WeightedMeanWithError {
160     /// Create a new weighted and unweighted mean estimator.
161     #[inline]
new() -> WeightedMeanWithError162     pub fn new() -> WeightedMeanWithError {
163         WeightedMeanWithError {
164             weight_sum_sq: 0.,
165             weighted_avg: WeightedMean::new(),
166             unweighted_avg: MeanWithError::new(),
167         }
168     }
169 
170     /// Add an observation sampled from the population.
171     #[inline]
add(&mut self, sample: f64, weight: f64)172     pub fn add(&mut self, sample: f64, weight: f64) {
173         // The algorithm for the unweighted mean was suggested by Welford in 1962.
174         // The algorithm for the weighted mean was suggested by West in 1979.
175         //
176         // See
177         // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
178         // and
179         // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf.
180         self.weight_sum_sq += weight*weight;
181         self.weighted_avg.add(sample, weight);
182         self.unweighted_avg.add(sample);
183     }
184 
185     /// Determine whether the sample is empty.
186     #[inline]
is_empty(&self) -> bool187     pub fn is_empty(&self) -> bool {
188         self.unweighted_avg.is_empty()
189     }
190 
191     /// Return the sum of the weights.
192     ///
193     /// Returns 0 for an empty sample.
194     #[inline]
sum_weights(&self) -> f64195     pub fn sum_weights(&self) -> f64 {
196         self.weighted_avg.sum_weights()
197     }
198 
199     /// Return the sum of the squared weights.
200     ///
201     /// Returns 0 for an empty sample.
202     #[inline]
sum_weights_sq(&self) -> f64203     pub fn sum_weights_sq(&self) -> f64 {
204         self.weight_sum_sq
205     }
206 
207     /// Estimate the weighted mean of the population.
208     ///
209     /// Returns 0 for an empty sample.
210     #[inline]
weighted_mean(&self) -> f64211     pub fn weighted_mean(&self) -> f64 {
212         self.weighted_avg.mean()
213     }
214 
215     /// Estimate the unweighted mean of the population.
216     ///
217     /// Returns 0 for an empty sample.
218     #[inline]
unweighted_mean(&self) -> f64219     pub fn unweighted_mean(&self) -> f64 {
220         self.unweighted_avg.mean()
221     }
222 
223     /// Return the sample size.
224     #[inline]
len(&self) -> u64225     pub fn len(&self) -> u64 {
226         self.unweighted_avg.len()
227     }
228 
229     /// Calculate the effective sample size.
230     #[inline]
effective_len(&self) -> f64231     pub fn effective_len(&self) -> f64 {
232         if self.is_empty() {
233             return 0.
234         }
235         let weight_sum = self.weighted_avg.sum_weights();
236         weight_sum * weight_sum / self.weight_sum_sq
237     }
238 
239     /// Calculate the *unweighted* population variance of the sample.
240     ///
241     /// This is a biased estimator of the variance of the population.
242     #[inline]
population_variance(&self) -> f64243     pub fn population_variance(&self) -> f64 {
244         self.unweighted_avg.population_variance()
245     }
246 
247     /// Calculate the *unweighted* sample variance.
248     ///
249     /// This is an unbiased estimator of the variance of the population.
250     #[inline]
sample_variance(&self) -> f64251     pub fn sample_variance(&self) -> f64 {
252         self.unweighted_avg.sample_variance()
253     }
254 
255     /// Estimate the standard error of the *weighted* mean of the population.
256     ///
257     /// Returns 0 if the sum of weights is 0.
258     ///
259     /// This unbiased estimator assumes that the samples were independently
260     /// drawn from the same population with constant variance.
261     #[inline]
variance_of_weighted_mean(&self) -> f64262     pub fn variance_of_weighted_mean(&self) -> f64 {
263         // This uses the same estimate as WinCross, which should provide better
264         // results than the ones used by SPSS or Mentor.
265         //
266         // See http://www.analyticalgroup.com/download/WEIGHTED_VARIANCE.pdf.
267 
268         let weight_sum = self.weighted_avg.sum_weights();
269         if weight_sum == 0. {
270             return 0.;
271         }
272         let inv_effective_len = self.weight_sum_sq / (weight_sum * weight_sum);
273         self.sample_variance() * inv_effective_len
274     }
275 
276     /// Estimate the standard error of the *weighted* mean of the population.
277     ///
278     /// Returns 0 if the sum of weights is 0.
279     ///
280     /// This unbiased estimator assumes that the samples were independently
281     /// drawn from the same population with constant variance.
282     #[cfg(any(feature = "std", feature = "libm"))]
283     #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))]
284     #[inline]
error(&self) -> f64285     pub fn error(&self) -> f64 {
286         num_traits::Float::sqrt(self.variance_of_weighted_mean())
287     }
288 }
289 
290 impl Merge for WeightedMeanWithError {
291     /// Merge another sample into this one.
292     ///
293     ///
294     /// ## Example
295     ///
296     /// ```
297     /// use average::{WeightedMeanWithError, Merge};
298     ///
299     /// let weighted_sequence: &[(f64, f64)] = &[
300     ///     (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5),
301     ///     (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)];
302     /// let (left, right) = weighted_sequence.split_at(3);
303     /// let avg_total: WeightedMeanWithError = weighted_sequence.iter().collect();
304     /// let mut avg_left: WeightedMeanWithError = left.iter().collect();
305     /// let avg_right: WeightedMeanWithError = right.iter().collect();
306     /// avg_left.merge(&avg_right);
307     /// assert!((avg_total.weighted_mean() - avg_left.weighted_mean()).abs() < 1e-15);
308     /// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15);
309     /// ```
310     #[inline]
merge(&mut self, other: &WeightedMeanWithError)311     fn merge(&mut self, other: &WeightedMeanWithError) {
312         self.weight_sum_sq += other.weight_sum_sq;
313         self.weighted_avg.merge(&other.weighted_avg);
314         self.unweighted_avg.merge(&other.unweighted_avg);
315     }
316 }
317 
318 impl core::default::Default for WeightedMeanWithError {
default() -> WeightedMeanWithError319     fn default() -> WeightedMeanWithError {
320         WeightedMeanWithError::new()
321     }
322 }
323 
324 impl core::iter::FromIterator<(f64, f64)> for WeightedMeanWithError {
from_iter<T>(iter: T) -> WeightedMeanWithError where T: IntoIterator<Item=(f64, f64)>325     fn from_iter<T>(iter: T) -> WeightedMeanWithError
326         where T: IntoIterator<Item=(f64, f64)>
327     {
328         let mut a = WeightedMeanWithError::new();
329         for (i, w) in iter {
330             a.add(i, w);
331         }
332         a
333     }
334 }
335 
336 impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMeanWithError {
from_iter<T>(iter: T) -> WeightedMeanWithError where T: IntoIterator<Item=&'a (f64, f64)>337     fn from_iter<T>(iter: T) -> WeightedMeanWithError
338         where T: IntoIterator<Item=&'a (f64, f64)>
339     {
340         let mut a = WeightedMeanWithError::new();
341         for &(i, w) in iter {
342             a.add(i, w);
343         }
344         a
345     }
346 }
347