1 #[cfg(feature = "serde1")] use serde::{Serialize, Deserialize}; 2 use super::{MeanWithError, Estimate, Merge}; 3 4 /// Estimate the weighted and unweighted arithmetic mean of a sequence of 5 /// numbers ("population"). 6 /// 7 /// 8 /// ## Example 9 /// 10 /// ``` 11 /// use average::WeightedMean; 12 /// 13 /// let a: WeightedMean = (1..6).zip(1..6) 14 /// .map(|(x, w)| (f64::from(x), f64::from(w))).collect(); 15 /// println!("The weighted mean is {}.", a.mean()); 16 /// ``` 17 #[derive(Debug, Clone)] 18 #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] 19 pub struct WeightedMean { 20 /// Sum of the weights. 21 weight_sum: f64, 22 /// Weighted mean value. 23 weighted_avg: f64, 24 } 25 26 impl WeightedMean { 27 /// Create a new weighted and unweighted mean estimator. new() -> WeightedMean28 pub fn new() -> WeightedMean { 29 WeightedMean { 30 weight_sum: 0., weighted_avg: 0., 31 } 32 } 33 34 /// Add an observation sampled from the population. 35 #[inline] add(&mut self, sample: f64, weight: f64)36 pub fn add(&mut self, sample: f64, weight: f64) { 37 // The algorithm for the unweighted mean was suggested by Welford in 1962. 38 // 39 // See 40 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 41 // and 42 // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf. 43 self.weight_sum += weight; 44 45 let prev_avg = self.weighted_avg; 46 self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg); 47 } 48 49 /// Determine whether the sample is empty. 50 /// 51 /// Might be a false positive if the sum of weights is zero. 52 #[inline] is_empty(&self) -> bool53 pub fn is_empty(&self) -> bool { 54 self.weight_sum == 0. 55 } 56 57 /// Return the sum of the weights. 58 /// 59 /// Returns 0 for an empty sample. 60 #[inline] sum_weights(&self) -> f6461 pub fn sum_weights(&self) -> f64 { 62 self.weight_sum 63 } 64 65 /// Estimate the weighted mean of the population. 66 /// 67 /// Returns 0 for an empty sample. 68 #[inline] mean(&self) -> f6469 pub fn mean(&self) -> f64 { 70 self.weighted_avg 71 } 72 } 73 74 impl core::default::Default for WeightedMean { default() -> WeightedMean75 fn default() -> WeightedMean { 76 WeightedMean::new() 77 } 78 } 79 80 impl core::iter::FromIterator<(f64, f64)> for WeightedMean { from_iter<T>(iter: T) -> WeightedMean where T: IntoIterator<Item=(f64, f64)>81 fn from_iter<T>(iter: T) -> WeightedMean 82 where T: IntoIterator<Item=(f64, f64)> 83 { 84 let mut a = WeightedMean::new(); 85 for (i, w) in iter { 86 a.add(i, w); 87 } 88 a 89 } 90 } 91 92 impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMean { from_iter<T>(iter: T) -> WeightedMean where T: IntoIterator<Item=&'a (f64, f64)>93 fn from_iter<T>(iter: T) -> WeightedMean 94 where T: IntoIterator<Item=&'a (f64, f64)> 95 { 96 let mut a = WeightedMean::new(); 97 for &(i, w) in iter { 98 a.add(i, w); 99 } 100 a 101 } 102 } 103 104 impl Merge for WeightedMean { 105 /// Merge another sample into this one. 106 /// 107 /// 108 /// ## Example 109 /// 110 /// ``` 111 /// use average::{WeightedMean, Merge}; 112 /// 113 /// let weighted_sequence: &[(f64, f64)] = &[ 114 /// (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5), 115 /// (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)]; 116 /// let (left, right) = weighted_sequence.split_at(3); 117 /// let avg_total: WeightedMean = weighted_sequence.iter().collect(); 118 /// let mut avg_left: WeightedMean = left.iter().collect(); 119 /// let avg_right: WeightedMean = right.iter().collect(); 120 /// avg_left.merge(&avg_right); 121 /// assert!((avg_total.mean() - avg_left.mean()).abs() < 1e-15); 122 /// ``` 123 #[inline] merge(&mut self, other: &WeightedMean)124 fn merge(&mut self, other: &WeightedMean) { 125 let total_weight_sum = self.weight_sum + other.weight_sum; 126 self.weighted_avg = (self.weight_sum * self.weighted_avg 127 + other.weight_sum * other.weighted_avg) 128 / total_weight_sum; 129 self.weight_sum = total_weight_sum; 130 } 131 } 132 133 /// Estimate the weighted and unweighted arithmetic mean and the unweighted 134 /// variance of a sequence of numbers ("population"). 135 /// 136 /// This can be used to estimate the standard error of the weighted mean. 137 /// 138 /// 139 /// ## Example 140 /// 141 /// ``` 142 /// use average::WeightedMeanWithError; 143 /// 144 /// let a: WeightedMeanWithError = (1..6).zip(1..6) 145 /// .map(|(x, w)| (f64::from(x), f64::from(w))).collect(); 146 /// println!("The weighted mean is {} ± {}.", a.weighted_mean(), a.error()); 147 /// ``` 148 #[derive(Debug, Clone)] 149 #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] 150 pub struct WeightedMeanWithError { 151 /// Sum of the squares of the weights. 152 weight_sum_sq: f64, 153 /// Estimator of the weighted mean. 154 weighted_avg: WeightedMean, 155 /// Estimator of unweighted mean and its variance. 156 unweighted_avg: MeanWithError, 157 } 158 159 impl WeightedMeanWithError { 160 /// Create a new weighted and unweighted mean estimator. 161 #[inline] new() -> WeightedMeanWithError162 pub fn new() -> WeightedMeanWithError { 163 WeightedMeanWithError { 164 weight_sum_sq: 0., 165 weighted_avg: WeightedMean::new(), 166 unweighted_avg: MeanWithError::new(), 167 } 168 } 169 170 /// Add an observation sampled from the population. 171 #[inline] add(&mut self, sample: f64, weight: f64)172 pub fn add(&mut self, sample: f64, weight: f64) { 173 // The algorithm for the unweighted mean was suggested by Welford in 1962. 174 // The algorithm for the weighted mean was suggested by West in 1979. 175 // 176 // See 177 // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance 178 // and 179 // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf. 180 self.weight_sum_sq += weight*weight; 181 self.weighted_avg.add(sample, weight); 182 self.unweighted_avg.add(sample); 183 } 184 185 /// Determine whether the sample is empty. 186 #[inline] is_empty(&self) -> bool187 pub fn is_empty(&self) -> bool { 188 self.unweighted_avg.is_empty() 189 } 190 191 /// Return the sum of the weights. 192 /// 193 /// Returns 0 for an empty sample. 194 #[inline] sum_weights(&self) -> f64195 pub fn sum_weights(&self) -> f64 { 196 self.weighted_avg.sum_weights() 197 } 198 199 /// Return the sum of the squared weights. 200 /// 201 /// Returns 0 for an empty sample. 202 #[inline] sum_weights_sq(&self) -> f64203 pub fn sum_weights_sq(&self) -> f64 { 204 self.weight_sum_sq 205 } 206 207 /// Estimate the weighted mean of the population. 208 /// 209 /// Returns 0 for an empty sample. 210 #[inline] weighted_mean(&self) -> f64211 pub fn weighted_mean(&self) -> f64 { 212 self.weighted_avg.mean() 213 } 214 215 /// Estimate the unweighted mean of the population. 216 /// 217 /// Returns 0 for an empty sample. 218 #[inline] unweighted_mean(&self) -> f64219 pub fn unweighted_mean(&self) -> f64 { 220 self.unweighted_avg.mean() 221 } 222 223 /// Return the sample size. 224 #[inline] len(&self) -> u64225 pub fn len(&self) -> u64 { 226 self.unweighted_avg.len() 227 } 228 229 /// Calculate the effective sample size. 230 #[inline] effective_len(&self) -> f64231 pub fn effective_len(&self) -> f64 { 232 if self.is_empty() { 233 return 0. 234 } 235 let weight_sum = self.weighted_avg.sum_weights(); 236 weight_sum * weight_sum / self.weight_sum_sq 237 } 238 239 /// Calculate the *unweighted* population variance of the sample. 240 /// 241 /// This is a biased estimator of the variance of the population. 242 #[inline] population_variance(&self) -> f64243 pub fn population_variance(&self) -> f64 { 244 self.unweighted_avg.population_variance() 245 } 246 247 /// Calculate the *unweighted* sample variance. 248 /// 249 /// This is an unbiased estimator of the variance of the population. 250 #[inline] sample_variance(&self) -> f64251 pub fn sample_variance(&self) -> f64 { 252 self.unweighted_avg.sample_variance() 253 } 254 255 /// Estimate the standard error of the *weighted* mean of the population. 256 /// 257 /// Returns 0 if the sum of weights is 0. 258 /// 259 /// This unbiased estimator assumes that the samples were independently 260 /// drawn from the same population with constant variance. 261 #[inline] variance_of_weighted_mean(&self) -> f64262 pub fn variance_of_weighted_mean(&self) -> f64 { 263 // This uses the same estimate as WinCross, which should provide better 264 // results than the ones used by SPSS or Mentor. 265 // 266 // See http://www.analyticalgroup.com/download/WEIGHTED_VARIANCE.pdf. 267 268 let weight_sum = self.weighted_avg.sum_weights(); 269 if weight_sum == 0. { 270 return 0.; 271 } 272 let inv_effective_len = self.weight_sum_sq / (weight_sum * weight_sum); 273 self.sample_variance() * inv_effective_len 274 } 275 276 /// Estimate the standard error of the *weighted* mean of the population. 277 /// 278 /// Returns 0 if the sum of weights is 0. 279 /// 280 /// This unbiased estimator assumes that the samples were independently 281 /// drawn from the same population with constant variance. 282 #[cfg(any(feature = "std", feature = "libm"))] 283 #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] 284 #[inline] error(&self) -> f64285 pub fn error(&self) -> f64 { 286 num_traits::Float::sqrt(self.variance_of_weighted_mean()) 287 } 288 } 289 290 impl Merge for WeightedMeanWithError { 291 /// Merge another sample into this one. 292 /// 293 /// 294 /// ## Example 295 /// 296 /// ``` 297 /// use average::{WeightedMeanWithError, Merge}; 298 /// 299 /// let weighted_sequence: &[(f64, f64)] = &[ 300 /// (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5), 301 /// (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)]; 302 /// let (left, right) = weighted_sequence.split_at(3); 303 /// let avg_total: WeightedMeanWithError = weighted_sequence.iter().collect(); 304 /// let mut avg_left: WeightedMeanWithError = left.iter().collect(); 305 /// let avg_right: WeightedMeanWithError = right.iter().collect(); 306 /// avg_left.merge(&avg_right); 307 /// assert!((avg_total.weighted_mean() - avg_left.weighted_mean()).abs() < 1e-15); 308 /// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15); 309 /// ``` 310 #[inline] merge(&mut self, other: &WeightedMeanWithError)311 fn merge(&mut self, other: &WeightedMeanWithError) { 312 self.weight_sum_sq += other.weight_sum_sq; 313 self.weighted_avg.merge(&other.weighted_avg); 314 self.unweighted_avg.merge(&other.unweighted_avg); 315 } 316 } 317 318 impl core::default::Default for WeightedMeanWithError { default() -> WeightedMeanWithError319 fn default() -> WeightedMeanWithError { 320 WeightedMeanWithError::new() 321 } 322 } 323 324 impl core::iter::FromIterator<(f64, f64)> for WeightedMeanWithError { from_iter<T>(iter: T) -> WeightedMeanWithError where T: IntoIterator<Item=(f64, f64)>325 fn from_iter<T>(iter: T) -> WeightedMeanWithError 326 where T: IntoIterator<Item=(f64, f64)> 327 { 328 let mut a = WeightedMeanWithError::new(); 329 for (i, w) in iter { 330 a.add(i, w); 331 } 332 a 333 } 334 } 335 336 impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMeanWithError { from_iter<T>(iter: T) -> WeightedMeanWithError where T: IntoIterator<Item=&'a (f64, f64)>337 fn from_iter<T>(iter: T) -> WeightedMeanWithError 338 where T: IntoIterator<Item=&'a (f64, f64)> 339 { 340 let mut a = WeightedMeanWithError::new(); 341 for &(i, w) in iter { 342 a.add(i, w); 343 } 344 a 345 } 346 } 347