1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.math;
22 
23 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
24 
25 /**
26  * Compute the mean using a numerically stable online algorithm.
27  * <p>
28  * This class can repeatedly be fed with data using the put() methods, the
29  * resulting values for mean can be queried at any time using getMean().
30  * <p>
31  * The high-precision function is based on:
32  * <p>
33  * P. M. Neely<br>
34  * Comparison of Several Algorithms for Computation of Means, Standard
35  * Deviations and Correlation Coefficients<br>
36  * Communications of the ACM 9(7), 1966
37  *
38  * @author Erich Schubert
39  * @since 0.2
40  */
41 public class Mean {
42   /**
43    * Sum of all values.
44    */
45   protected double sum;
46 
47   /**
48    * Weight sum (number of samples).
49    */
50   protected double n;
51 
52   /**
53    * Empty constructor
54    */
Mean()55   public Mean() {
56     sum = 0.;
57     n = 0;
58   }
59 
60   /**
61    * Constructor from other instance
62    *
63    * @param other other instance to copy data from.
64    */
Mean(Mean other)65   public Mean(Mean other) {
66     this.sum = other.sum;
67     this.n = other.n;
68   }
69 
70   /**
71    * Add a single value with weight 1.0
72    *
73    * @param val Value
74    */
put(double val)75   public void put(double val) {
76     n += 1.;
77     sum += val;
78   }
79 
80   /**
81    * Add data with a given weight.
82    *
83    * @param val data
84    * @param weight weight
85    */
put(double val, double weight)86   public void put(double val, double weight) {
87     if(weight == 0.) {
88       return;
89     }
90     sum += val * weight;
91     n += weight;
92   }
93 
94   /**
95    * Join the data of another MeanVariance instance.
96    *
97    * @param other Data to join with
98    */
put(Mean other)99   public void put(Mean other) {
100     if(other.n == 0) {
101       return;
102     }
103     this.sum += other.sum;
104     this.n = other.n + this.n;
105   }
106 
107   /**
108    * Add values with weight 1.0
109    *
110    * @param vals Values
111    * @return this
112    */
put(double[] vals)113   public Mean put(double[] vals) {
114     for(double v : vals) {
115       put(v);
116     }
117     return this;
118   }
119 
120   /**
121    * Add values with weight 1.0
122    *
123    * @param vals Values
124    * @return this
125    */
put(double[] vals, double[] weights)126   public Mean put(double[] vals, double[] weights) {
127     assert (vals.length == weights.length);
128     for(int i = 0, end = vals.length; i < end; i++) {
129       put(vals[i], weights[i]);
130     }
131     return this;
132   }
133 
134   /**
135    * Get the number of points the average is based on.
136    *
137    * @return number of data points
138    */
getCount()139   public double getCount() {
140     return n;
141   }
142 
143   /**
144    * Return mean
145    *
146    * @return mean
147    */
getMean()148   public double getMean() {
149     return sum / n;
150   }
151 
152   /**
153    * Create and initialize a new array of MeanVariance
154    *
155    * @param dimensionality Dimensionality
156    * @return New and initialized Array
157    */
newArray(int dimensionality)158   public static Mean[] newArray(int dimensionality) {
159     Mean[] arr = new Mean[dimensionality];
160     for(int i = 0; i < dimensionality; i++) {
161       arr[i] = new Mean();
162     }
163     return arr;
164   }
165 
166   @Override
toString()167   public String toString() {
168     return "Mean(" + getMean() + ",weight=" + getCount() + ")";
169   }
170 
171   /**
172    * Reset the value.
173    */
reset()174   public void reset() {
175     sum = 0;
176     n = 0;
177   }
178 
179   /**
180    * Static helper function.
181    *
182    * @param data Data to compute the mean for.
183    * @return Mean
184    */
of(double... data)185   public static double of(double... data) {
186     double sum = 0.;
187     for(double v : data) {
188       sum += v;
189     }
190     return sum / data.length;
191   }
192 
193   /**
194    * Static helper function, with extra precision
195    *
196    * @param data Data to compute the mean for.
197    * @return Mean
198    */
199   @Reference(authors = "P. M. Neely", //
200       title = "Comparison of Several Algorithms for Computation of Means, Standard Deviations and Correlation Coefficients", //
201       booktitle = "Communications of the ACM 9(7), 1966", //
202       url = "https://doi.org/10.1145/365719.365958", //
203       bibkey = "doi:10.1145/365719.365958")
highPrecision(double... data)204   public static double highPrecision(double... data) {
205     double sum = 0.;
206     for(double v : data) {
207       sum += v;
208     }
209     sum /= data.length;
210     // Perform a second pass to increase precision
211     // In ideal math, this would sum to 0.
212     double err = 0;
213     for(double v : data) {
214       err += v - sum;
215     }
216     return sum + err / data.length;
217   }
218 }
219