1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.math; 22 23 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; 24 25 /** 26 * Compute the mean using a numerically stable online algorithm. 27 * <p> 28 * This class can repeatedly be fed with data using the put() methods, the 29 * resulting values for mean can be queried at any time using getMean(). 30 * <p> 31 * The high-precision function is based on: 32 * <p> 33 * P. M. Neely<br> 34 * Comparison of Several Algorithms for Computation of Means, Standard 35 * Deviations and Correlation Coefficients<br> 36 * Communications of the ACM 9(7), 1966 37 * 38 * @author Erich Schubert 39 * @since 0.2 40 */ 41 public class Mean { 42 /** 43 * Sum of all values. 44 */ 45 protected double sum; 46 47 /** 48 * Weight sum (number of samples). 49 */ 50 protected double n; 51 52 /** 53 * Empty constructor 54 */ Mean()55 public Mean() { 56 sum = 0.; 57 n = 0; 58 } 59 60 /** 61 * Constructor from other instance 62 * 63 * @param other other instance to copy data from. 64 */ Mean(Mean other)65 public Mean(Mean other) { 66 this.sum = other.sum; 67 this.n = other.n; 68 } 69 70 /** 71 * Add a single value with weight 1.0 72 * 73 * @param val Value 74 */ put(double val)75 public void put(double val) { 76 n += 1.; 77 sum += val; 78 } 79 80 /** 81 * Add data with a given weight. 82 * 83 * @param val data 84 * @param weight weight 85 */ put(double val, double weight)86 public void put(double val, double weight) { 87 if(weight == 0.) { 88 return; 89 } 90 sum += val * weight; 91 n += weight; 92 } 93 94 /** 95 * Join the data of another MeanVariance instance. 96 * 97 * @param other Data to join with 98 */ put(Mean other)99 public void put(Mean other) { 100 if(other.n == 0) { 101 return; 102 } 103 this.sum += other.sum; 104 this.n = other.n + this.n; 105 } 106 107 /** 108 * Add values with weight 1.0 109 * 110 * @param vals Values 111 * @return this 112 */ put(double[] vals)113 public Mean put(double[] vals) { 114 for(double v : vals) { 115 put(v); 116 } 117 return this; 118 } 119 120 /** 121 * Add values with weight 1.0 122 * 123 * @param vals Values 124 * @return this 125 */ put(double[] vals, double[] weights)126 public Mean put(double[] vals, double[] weights) { 127 assert (vals.length == weights.length); 128 for(int i = 0, end = vals.length; i < end; i++) { 129 put(vals[i], weights[i]); 130 } 131 return this; 132 } 133 134 /** 135 * Get the number of points the average is based on. 136 * 137 * @return number of data points 138 */ getCount()139 public double getCount() { 140 return n; 141 } 142 143 /** 144 * Return mean 145 * 146 * @return mean 147 */ getMean()148 public double getMean() { 149 return sum / n; 150 } 151 152 /** 153 * Create and initialize a new array of MeanVariance 154 * 155 * @param dimensionality Dimensionality 156 * @return New and initialized Array 157 */ newArray(int dimensionality)158 public static Mean[] newArray(int dimensionality) { 159 Mean[] arr = new Mean[dimensionality]; 160 for(int i = 0; i < dimensionality; i++) { 161 arr[i] = new Mean(); 162 } 163 return arr; 164 } 165 166 @Override toString()167 public String toString() { 168 return "Mean(" + getMean() + ",weight=" + getCount() + ")"; 169 } 170 171 /** 172 * Reset the value. 173 */ reset()174 public void reset() { 175 sum = 0; 176 n = 0; 177 } 178 179 /** 180 * Static helper function. 181 * 182 * @param data Data to compute the mean for. 183 * @return Mean 184 */ of(double... data)185 public static double of(double... data) { 186 double sum = 0.; 187 for(double v : data) { 188 sum += v; 189 } 190 return sum / data.length; 191 } 192 193 /** 194 * Static helper function, with extra precision 195 * 196 * @param data Data to compute the mean for. 197 * @return Mean 198 */ 199 @Reference(authors = "P. M. Neely", // 200 title = "Comparison of Several Algorithms for Computation of Means, Standard Deviations and Correlation Coefficients", // 201 booktitle = "Communications of the ACM 9(7), 1966", // 202 url = "https://doi.org/10.1145/365719.365958", // 203 bibkey = "doi:10.1145/365719.365958") highPrecision(double... data)204 public static double highPrecision(double... data) { 205 double sum = 0.; 206 for(double v : data) { 207 sum += v; 208 } 209 sum /= data.length; 210 // Perform a second pass to increase precision 211 // In ideal math, this would sum to 0. 212 double err = 0; 213 for(double v : data) { 214 err += v - sum; 215 } 216 return sum + err / data.length; 217 } 218 } 219