1<?php
2/* Copyright (c) 1998-2013 ILIAS open source, Extended GPL, see docs/LICENSE */
3
4/**
5* This class provides mathematical functions for statistics.
6* It works on an array of numeric values.
7*
8* @author Helmut Schottmüller <hschottm@tzi.de>
9* @version $Id$
10*/
11
12    /**
13    * Constants for the handling of elements which are not a number
14    */
15    define("NAN_HANDLING_REMOVE", 0);
16    define("NAN_HANDLING_ZERO", 1);
17
18class ilStatistics
19{
20    /**
21    * Handling of values which are no a number (NAN)
22    *
23    * If set to NAN_HANDLING_REMOVE, all elements which are not a number will be removed,
24    * if set to NAN_HANDLING_ZERO, all elements which are not a number will be set to zero.
25    *
26    * @var integer
27    */
28    public $nan_handling;
29
30    /**
31    * Array containing the data
32    *
33    * @var array
34    */
35
36    public $stat_data;
37
38    /**
39    * Constructor of ilStatistics class
40    *
41    * @access public
42    */
43    public function __construct($nan_handling = NAN_HANDLING_REMOVE)
44    {
45        $this->nan_handling = $nan_handling;
46        $this->stat_data = array();
47    }
48
49    /**
50    * Set the handling of elements which are not a number
51    *
52    * If set to NAN_HANDLING_REMOVE, all elements which are not a number will be removed,
53    * if set to NAN_HANDLING_ZERO, all elements which are not a number will be set to zero.
54    *
55    * @param integer $nan_handling A constant defining the handling of elements which are not a number
56    * @access public
57    */
58    public function setNANHandling($nan_handling = NAN_HANDLING_REMOVE)
59    {
60        $this->nan_handling = $nan_handling;
61    }
62
63    /**
64    * Get the handling of elements which are not a number
65    *
66    * Returns NAN_HANDLING_REMOVE if all elements which are not a number will be removed.
67    * Returns NAN_HANDLING_ZERO if all elements which are not a number will be set to zero.
68    *
69    * @return integer A constant defining the handling of elements which are not a number
70    * @access public
71    */
72    public function getNANHandling()
73    {
74        return $this->nan_handling;
75    }
76
77    /**
78    * Sets the data and checks for invalid values
79    *
80    * @param array $stat_data An array containing the numeric data
81    * @access public
82    */
83    public function setData($stat_data)
84    {
85        $this->stat_data = array_values($stat_data);
86        $this->validate();
87    }
88
89    /**
90    * Returns the numeric value array containing the data
91    *
92    * @return array An array containing the sorted numeric data
93    * @access public
94    */
95    public function getData()
96    {
97        return $this->stat_data;
98    }
99
100    /**
101    * Calculates the minimum value
102    *
103    * @return mixed The minimum value or false, if no minimum exists
104    * @see max()
105    * @access  public
106    */
107    public function min()
108    {
109        if (count($this->stat_data)) {
110            $min = min($this->stat_data);
111        } else {
112            $min = false;
113        }
114        return $min;
115    }
116
117    /**
118    * Calculates the maximum value
119    *
120    * @return mixed The maximum value or false, if no maximum exists
121    * @see min()
122    * @access  public
123    */
124    public function max()
125    {
126        if (count($this->stat_data)) {
127            $max = max($this->stat_data);
128        } else {
129            $max = false;
130        }
131        return $max;
132    }
133
134    /**
135    * Calculates number of data values
136    *
137    * @return mixed The number of data values
138    * @access  public
139    */
140    public function count()
141    {
142        return count($this->stat_data);
143    }
144
145    /**
146    * Calculates the sum of x_1^n + x_2^n + ... + x_i^n
147    *
148    * @param numeric $n The exponent
149    * @return mixed The sum of x_1^n + x_2^n + ... + x_i^n or false, if no values exist
150    * @access  public
151    */
152    public function sum_n($n)
153    {
154        $sum_n = false;
155        if (count($this->stat_data)) {
156            $sum_n = 0;
157            foreach ($this->stat_data as $value) {
158                $sum_n += pow((double) $value, (double) $n);
159            }
160        }
161        return $sum_n;
162    }
163
164    /**
165    * Calculates the sum of x_1 + x_2 + ... + x_i
166    *
167    * @return mixed The sum of x_1 + x_2 + ... + x_i or false, if no values exist
168    * @access  public
169    */
170    public function sum()
171    {
172        return $this->sum_n(1);
173    }
174
175
176    /**
177    * Calculates the sum of x_1^2 + x_2^2 + ... + x_i^2
178    *
179    * @return mixed The sum of x_1^2 + x_2^2 + ... + x_i^2 or false, if no values exist
180    * @access  public
181    */
182    public function sum2()
183    {
184        return $this->sum_n(2);
185    }
186
187    /**
188    * Calculates the product of x_1^n * x_2^n * ... * x_i^n
189    *
190    * @param numeric $n The exponent
191    * @return mixed The product of x_1^n * x_2^n * ... * x_i^n or false, if no values exist
192    * @access  public
193    */
194    public function product_n($n)
195    {
196        $prod_n = false;
197        if (count($this->stat_data)) {
198            if ($this->min() === 0) {
199                return 0.0;
200            }
201            $prod_n = 1.0;
202            foreach ($this->stat_data as $value) {
203                $prod_n *= pow((double) $value, (double) $n);
204            }
205        }
206        return $prod_n;
207    }
208
209    /**
210    * Calculates the product of x_1 * x_2 * ... * x_i
211    *
212    * @param numeric $n The exponent
213    * @return mixed The product of x_1 * x_2 * ... * x_i or false, if no values exist
214    * @access  public
215    */
216    public function product($n)
217    {
218        return $this->product_n(1);
219    }
220
221    /**
222    * Arithmetic mean of the data values
223    * xbar = (1/n)*∑x_i
224    *
225    * @return mixed The arithmetic mean or false, if there is an error or no values
226    * @access  public
227    */
228    public function arithmetic_mean()
229    {
230        $sum = $this->sum();
231        if ($sum === false) {
232            return false;
233        }
234        $count = $this->count();
235        if ($count == 0) {
236            return false;
237        }
238        return (double) ($sum / $count);
239    }
240
241    /**
242    * Geometric mean of the data values
243    * geometric_mean = (x_1 * x_2 * ... * x_n)^(1/n)
244    *
245    * The geometric mean of a set of positive data is defined as the product of all
246    * the members of the set, raised to a power equal to the reciprocal of the number
247    * of members.
248    *
249    * @return mixed The geometric mean or false, if there is an error or no values
250    * @access  public
251    */
252    public function geometric_mean()
253    {
254        $prod = $this->product();
255        if (($prod === false) or ($prod === 0)) {
256            return false;
257        }
258        $count = $this->count();
259        if ($count == 0) {
260            return false;
261        }
262        return pow((double) $prod, (double) (1 / $count));
263    }
264
265    /**
266    * Harmonic mean of the data values
267    * harmonic_mean = n/(1/x_1 + 1/x_2 + ... + 1/x_n)
268    *
269    * @return mixed The harmonic mean or false, if there is an error or no values
270    * @access  public
271    */
272    public function harmonic_mean()
273    {
274        $min = $this->min();
275        if (($min === false) or ($min === 0)) {
276            return false;
277        }
278        $count = $this->count();
279        if ($count == 0) {
280            return false;
281        }
282        $sum = 0;
283        foreach ($this->stat_data as $value) {
284            $sum += 1 / $value;
285        }
286        return $count / $sum;
287    }
288
289    /**
290    * Median of the data values
291    *
292    * @return mixed The median or false, if there are no data values
293    * @access  public
294    */
295    public function median()
296    {
297        $median = false;
298        if (count($this->stat_data)) {
299            $median = 0;
300            $count = $this->count();
301            if ((count($this->stat_data) % 2) == 0) {
302                $median = ($this->stat_data[($count / 2) - 1] + $this->stat_data[($count / 2)]) / 2;
303            } else {
304                $median = $this->stat_data[(($count + 1) / 2) - 1];
305            }
306        }
307        return $median;
308    }
309
310    /**
311    * Returns the rank of a given value
312    *
313    * @return mixed The rank, if the value exists in the data, otherwise false
314    * @access  public
315    */
316    public function rank($value)
317    {
318        if (!is_numeric($value)) {
319            return false;
320        }
321        $rank = array_search($value, $this->stat_data);
322        if ($rank !== false) {
323            $rank = $this->count() - $rank;
324        }
325        return $rank;
326    }
327
328    /**
329    * Returns the rank of the median
330    *
331    * This method is different from the rank method because the median could
332    * be the arithmetic mean of the two middle values when the data size is even.
333    * In this case the median could a value which is not part of the data set.
334    *
335    * @return mixed The rank of the median, otherwise false
336    * @access  public
337    */
338    public function rank_median()
339    {
340        $count = $this->count();
341        if ($count == 0) {
342            return false;
343        }
344
345        if (($count % 2) == 0) {
346            $rank_median = ($count + 1) / 2;
347        } else {
348            $rank_median = ($count + 1) / 2;
349        }
350        return $rank_median;
351    }
352
353    /**
354    * n-Quantile of the data values
355    *
356    * @param double $n A value between 0 an 100 calculating the n-Quantile
357    * @return mixed The n-quantile or false, if there are no data values
358    * @access  public
359    */
360    public function quantile($n)
361    {
362        $count = $this->count();
363        if ($count == 0) {
364            return false;
365        }
366        $nprod = ($n / 100) * $count;
367        if (intval($nprod) == $nprod) {
368            $k = $nprod;
369            if ($k == 0) {
370                return $this->stat_data[$k];
371            } elseif ($k == $count) {
372                return $this->stat_data[$k - 1];
373            } else {
374                return ($this->stat_data[$k - 1] + $this->stat_data[$k]) / 2;
375            }
376        } else {
377            $k = ceil($nprod);
378            return $this->stat_data[$k - 1];
379        }
380    }
381
382    /**
383    * Validates the numeric data and handles values which are not a number
384    * according to the $nan_handling variable. After validation the data
385    * is sorted.
386    *
387    * @return boolean Returns true on success, otherwise false
388    * @access private
389    */
390    public function validate()
391    {
392        $result = true;
393        foreach ($this->stat_data as $key => $value) {
394            if (!is_numeric($value)) {
395                switch ($this->nan_handling) {
396                    case NAN_HANDLING_REMOVE:
397                        unset($this->stat_data[$key]);
398                        break;
399                    case NAN_HANDLING_ZERO:
400                        $this->stat_data[$key] = 0;
401                        break;
402                    default:
403                        $result = false;
404                }
405            }
406        }
407        sort($this->stat_data);
408        return $result;
409    }
410}
411