1<?php 2/* Copyright (c) 1998-2013 ILIAS open source, Extended GPL, see docs/LICENSE */ 3 4/** 5* This class provides mathematical functions for statistics. 6* It works on an array of numeric values. 7* 8* @author Helmut Schottmüller <hschottm@tzi.de> 9* @version $Id$ 10*/ 11 12 /** 13 * Constants for the handling of elements which are not a number 14 */ 15 define("NAN_HANDLING_REMOVE", 0); 16 define("NAN_HANDLING_ZERO", 1); 17 18class ilStatistics 19{ 20 /** 21 * Handling of values which are no a number (NAN) 22 * 23 * If set to NAN_HANDLING_REMOVE, all elements which are not a number will be removed, 24 * if set to NAN_HANDLING_ZERO, all elements which are not a number will be set to zero. 25 * 26 * @var integer 27 */ 28 public $nan_handling; 29 30 /** 31 * Array containing the data 32 * 33 * @var array 34 */ 35 36 public $stat_data; 37 38 /** 39 * Constructor of ilStatistics class 40 * 41 * @access public 42 */ 43 public function __construct($nan_handling = NAN_HANDLING_REMOVE) 44 { 45 $this->nan_handling = $nan_handling; 46 $this->stat_data = array(); 47 } 48 49 /** 50 * Set the handling of elements which are not a number 51 * 52 * If set to NAN_HANDLING_REMOVE, all elements which are not a number will be removed, 53 * if set to NAN_HANDLING_ZERO, all elements which are not a number will be set to zero. 54 * 55 * @param integer $nan_handling A constant defining the handling of elements which are not a number 56 * @access public 57 */ 58 public function setNANHandling($nan_handling = NAN_HANDLING_REMOVE) 59 { 60 $this->nan_handling = $nan_handling; 61 } 62 63 /** 64 * Get the handling of elements which are not a number 65 * 66 * Returns NAN_HANDLING_REMOVE if all elements which are not a number will be removed. 67 * Returns NAN_HANDLING_ZERO if all elements which are not a number will be set to zero. 68 * 69 * @return integer A constant defining the handling of elements which are not a number 70 * @access public 71 */ 72 public function getNANHandling() 73 { 74 return $this->nan_handling; 75 } 76 77 /** 78 * Sets the data and checks for invalid values 79 * 80 * @param array $stat_data An array containing the numeric data 81 * @access public 82 */ 83 public function setData($stat_data) 84 { 85 $this->stat_data = array_values($stat_data); 86 $this->validate(); 87 } 88 89 /** 90 * Returns the numeric value array containing the data 91 * 92 * @return array An array containing the sorted numeric data 93 * @access public 94 */ 95 public function getData() 96 { 97 return $this->stat_data; 98 } 99 100 /** 101 * Calculates the minimum value 102 * 103 * @return mixed The minimum value or false, if no minimum exists 104 * @see max() 105 * @access public 106 */ 107 public function min() 108 { 109 if (count($this->stat_data)) { 110 $min = min($this->stat_data); 111 } else { 112 $min = false; 113 } 114 return $min; 115 } 116 117 /** 118 * Calculates the maximum value 119 * 120 * @return mixed The maximum value or false, if no maximum exists 121 * @see min() 122 * @access public 123 */ 124 public function max() 125 { 126 if (count($this->stat_data)) { 127 $max = max($this->stat_data); 128 } else { 129 $max = false; 130 } 131 return $max; 132 } 133 134 /** 135 * Calculates number of data values 136 * 137 * @return mixed The number of data values 138 * @access public 139 */ 140 public function count() 141 { 142 return count($this->stat_data); 143 } 144 145 /** 146 * Calculates the sum of x_1^n + x_2^n + ... + x_i^n 147 * 148 * @param numeric $n The exponent 149 * @return mixed The sum of x_1^n + x_2^n + ... + x_i^n or false, if no values exist 150 * @access public 151 */ 152 public function sum_n($n) 153 { 154 $sum_n = false; 155 if (count($this->stat_data)) { 156 $sum_n = 0; 157 foreach ($this->stat_data as $value) { 158 $sum_n += pow((double) $value, (double) $n); 159 } 160 } 161 return $sum_n; 162 } 163 164 /** 165 * Calculates the sum of x_1 + x_2 + ... + x_i 166 * 167 * @return mixed The sum of x_1 + x_2 + ... + x_i or false, if no values exist 168 * @access public 169 */ 170 public function sum() 171 { 172 return $this->sum_n(1); 173 } 174 175 176 /** 177 * Calculates the sum of x_1^2 + x_2^2 + ... + x_i^2 178 * 179 * @return mixed The sum of x_1^2 + x_2^2 + ... + x_i^2 or false, if no values exist 180 * @access public 181 */ 182 public function sum2() 183 { 184 return $this->sum_n(2); 185 } 186 187 /** 188 * Calculates the product of x_1^n * x_2^n * ... * x_i^n 189 * 190 * @param numeric $n The exponent 191 * @return mixed The product of x_1^n * x_2^n * ... * x_i^n or false, if no values exist 192 * @access public 193 */ 194 public function product_n($n) 195 { 196 $prod_n = false; 197 if (count($this->stat_data)) { 198 if ($this->min() === 0) { 199 return 0.0; 200 } 201 $prod_n = 1.0; 202 foreach ($this->stat_data as $value) { 203 $prod_n *= pow((double) $value, (double) $n); 204 } 205 } 206 return $prod_n; 207 } 208 209 /** 210 * Calculates the product of x_1 * x_2 * ... * x_i 211 * 212 * @param numeric $n The exponent 213 * @return mixed The product of x_1 * x_2 * ... * x_i or false, if no values exist 214 * @access public 215 */ 216 public function product($n) 217 { 218 return $this->product_n(1); 219 } 220 221 /** 222 * Arithmetic mean of the data values 223 * xbar = (1/n)*∑x_i 224 * 225 * @return mixed The arithmetic mean or false, if there is an error or no values 226 * @access public 227 */ 228 public function arithmetic_mean() 229 { 230 $sum = $this->sum(); 231 if ($sum === false) { 232 return false; 233 } 234 $count = $this->count(); 235 if ($count == 0) { 236 return false; 237 } 238 return (double) ($sum / $count); 239 } 240 241 /** 242 * Geometric mean of the data values 243 * geometric_mean = (x_1 * x_2 * ... * x_n)^(1/n) 244 * 245 * The geometric mean of a set of positive data is defined as the product of all 246 * the members of the set, raised to a power equal to the reciprocal of the number 247 * of members. 248 * 249 * @return mixed The geometric mean or false, if there is an error or no values 250 * @access public 251 */ 252 public function geometric_mean() 253 { 254 $prod = $this->product(); 255 if (($prod === false) or ($prod === 0)) { 256 return false; 257 } 258 $count = $this->count(); 259 if ($count == 0) { 260 return false; 261 } 262 return pow((double) $prod, (double) (1 / $count)); 263 } 264 265 /** 266 * Harmonic mean of the data values 267 * harmonic_mean = n/(1/x_1 + 1/x_2 + ... + 1/x_n) 268 * 269 * @return mixed The harmonic mean or false, if there is an error or no values 270 * @access public 271 */ 272 public function harmonic_mean() 273 { 274 $min = $this->min(); 275 if (($min === false) or ($min === 0)) { 276 return false; 277 } 278 $count = $this->count(); 279 if ($count == 0) { 280 return false; 281 } 282 $sum = 0; 283 foreach ($this->stat_data as $value) { 284 $sum += 1 / $value; 285 } 286 return $count / $sum; 287 } 288 289 /** 290 * Median of the data values 291 * 292 * @return mixed The median or false, if there are no data values 293 * @access public 294 */ 295 public function median() 296 { 297 $median = false; 298 if (count($this->stat_data)) { 299 $median = 0; 300 $count = $this->count(); 301 if ((count($this->stat_data) % 2) == 0) { 302 $median = ($this->stat_data[($count / 2) - 1] + $this->stat_data[($count / 2)]) / 2; 303 } else { 304 $median = $this->stat_data[(($count + 1) / 2) - 1]; 305 } 306 } 307 return $median; 308 } 309 310 /** 311 * Returns the rank of a given value 312 * 313 * @return mixed The rank, if the value exists in the data, otherwise false 314 * @access public 315 */ 316 public function rank($value) 317 { 318 if (!is_numeric($value)) { 319 return false; 320 } 321 $rank = array_search($value, $this->stat_data); 322 if ($rank !== false) { 323 $rank = $this->count() - $rank; 324 } 325 return $rank; 326 } 327 328 /** 329 * Returns the rank of the median 330 * 331 * This method is different from the rank method because the median could 332 * be the arithmetic mean of the two middle values when the data size is even. 333 * In this case the median could a value which is not part of the data set. 334 * 335 * @return mixed The rank of the median, otherwise false 336 * @access public 337 */ 338 public function rank_median() 339 { 340 $count = $this->count(); 341 if ($count == 0) { 342 return false; 343 } 344 345 if (($count % 2) == 0) { 346 $rank_median = ($count + 1) / 2; 347 } else { 348 $rank_median = ($count + 1) / 2; 349 } 350 return $rank_median; 351 } 352 353 /** 354 * n-Quantile of the data values 355 * 356 * @param double $n A value between 0 an 100 calculating the n-Quantile 357 * @return mixed The n-quantile or false, if there are no data values 358 * @access public 359 */ 360 public function quantile($n) 361 { 362 $count = $this->count(); 363 if ($count == 0) { 364 return false; 365 } 366 $nprod = ($n / 100) * $count; 367 if (intval($nprod) == $nprod) { 368 $k = $nprod; 369 if ($k == 0) { 370 return $this->stat_data[$k]; 371 } elseif ($k == $count) { 372 return $this->stat_data[$k - 1]; 373 } else { 374 return ($this->stat_data[$k - 1] + $this->stat_data[$k]) / 2; 375 } 376 } else { 377 $k = ceil($nprod); 378 return $this->stat_data[$k - 1]; 379 } 380 } 381 382 /** 383 * Validates the numeric data and handles values which are not a number 384 * according to the $nan_handling variable. After validation the data 385 * is sorted. 386 * 387 * @return boolean Returns true on success, otherwise false 388 * @access private 389 */ 390 public function validate() 391 { 392 $result = true; 393 foreach ($this->stat_data as $key => $value) { 394 if (!is_numeric($value)) { 395 switch ($this->nan_handling) { 396 case NAN_HANDLING_REMOVE: 397 unset($this->stat_data[$key]); 398 break; 399 case NAN_HANDLING_ZERO: 400 $this->stat_data[$key] = 0; 401 break; 402 default: 403 $result = false; 404 } 405 } 406 } 407 sort($this->stat_data); 408 return $result; 409 } 410} 411