1<?php
2/**
3 * PSquare
4 *
5 * Represents a running, online estimate of a p-quantile for a series
6 * of observations using the P-squared algorithm, as documented in
7 * "The P-Square Algorithm for Dynamic Calculation of Percentiles and
8 * Histograms without Storing Observations," Communications of the ACM,
9 * October 1985 by R. Jain and I. Chlamtac.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
24 * http://www.gnu.org/copyleft/gpl.html
25 *
26 * @file
27 * @author Ori Livneh <ori@wikimedia.org>
28 */
29
30namespace Wikimedia;
31
32/**
33 * Represents a running, online estimate of a p-quantile for a series
34 * of observations using the P-squared algorithm.
35 */
36class PSquare {
37
38	/** @var float Percentile to estimate. **/
39	private $p;
40
41	/** @var float[] Height of each marker. **/
42	private $heights = [];
43
44	/** @var int[] Position of each marker. **/
45	private $positions = [];
46
47	/** @var float[] Desired position of each marker. **/
48	private $desired = [];
49
50	/** @var int Number of observations. **/
51	private $numObservations = 0;
52
53	/**
54	 * Constructor.
55	 *
56	 * @param float $p the percentile (defaults to 0.5, or median).
57	 */
58	public function __construct( $p = 0.5 ) {
59		$this->p = $p;
60		$this->positions = [ 0, 1, 2, 3, 4 ];
61		$this->desired = [ 0, ( 2 * $p ), ( 4 * $p ), 2 + ( 2 * $p ), 4 ];
62		$this->increments = [ 0, ( $p / 2 ), $p, ( ( 1 + $p ) / 2 ), 1 ];
63	}
64
65	/**
66	 * Get the total number of accumulated observations.
67	 *
68	 * @return int
69	 */
70	public function getCount() {
71		return $this->numObservations;
72	}
73
74	/**
75	 * Add an observation.
76	 *
77	 * @param int|float $x Value to add
78	 */
79	public function addObservation( $x ) {
80		$this->numObservations++;
81
82		if ( $this->numObservations <= 5 ) {
83			$this->heights[] = $x;
84			if ( $this->numObservations === 5 ) {
85				sort( $this->heights );
86			}
87			return;
88		}
89
90		if ( $x < $this->heights[0] ) {
91			$this->heights[0] = $x;
92			$k = 0;
93		} elseif ( $x >= $this->heights[4] ) {
94			$this->heights[4] = $x;
95			$k = 3;
96		} else {
97			for ( $i = 1; $i < 5; $i++ ) {
98				if ( $x < $this->heights[$i] ) {
99					$k = $i - 1;
100					break;
101				}
102			}
103		}
104
105		for ( $i = $k + 1; $i < 5; $i++ ) {
106			$this->positions[$i]++;
107		}
108
109		for ( $i = 0; $i < 5; $i++ ) {
110			$this->desired[$i] += $this->increments[$i];
111		}
112
113		for ( $i = 1; $i < 4; $i++ ) {
114			$n     = $this->positions[$i];
115			$nPrev = $this->positions[$i - 1];
116			$nNext = $this->positions[$i + 1];
117
118			$d = $this->desired[$i] - $n;
119
120			if ( ( $d >= 1 && $nNext - $n > 1 ) || ( $d <= -1 && $nPrev - $n < -1 ) ) {
121				$d = ( $d < 0 ) ? -1 : 1;
122
123				$q     = $this->computeParabolic( $i, $d );
124				$qPrev = $this->heights[$i - 1];
125				$qNext = $this->heights[$i + 1];
126
127				if ( $qPrev < $q && $q < $qNext ) {
128					$this->heights[$i] = $q;
129				} else {
130					$this->heights[$i] = $this->computeLinear( $i, $d );
131				}
132
133				$this->positions[$i] += $d;
134			}
135		}
136	}
137
138	/**
139	 * Use piecewise parabolic prediction to predict the ideal
140	 * height of a marker.
141	 *
142	 * @param int $i index of marker to adjust
143	 * @param int $d always -1 or 1
144	 * @return float ideal height of marker
145	 */
146	private function computeParabolic( $i, $d ) {
147		$q     = $this->heights[$i];
148		$qPrev = $this->heights[$i - 1];
149		$qNext = $this->heights[$i + 1];
150
151		$n     = $this->positions[$i];
152		$nPrev = $this->positions[$i - 1];
153		$nNext = $this->positions[$i + 1];
154
155		return ( $q +
156			$d / ( $nNext - $nPrev ) *
157			(
158				( $n - $nPrev + $d ) * ( $qNext - $q ) / ( $nNext - $n ) +
159				( $nNext - $n - $d ) * ( $q - $qPrev ) / ( $n - $nPrev )
160			)
161		);
162	}
163
164	/**
165	 * Linear formula to predict ideal position of a marker.
166	 *
167	 * @param int $i index of marker to adjust
168	 * @param int $d always -1 or 1
169	 * @return float ideal height of marker
170	 */
171	private function computeLinear( $i, $d ) {
172		$q = $this->heights[$i];
173		$n = $this->positions[$i];
174		return ( $q + $d *
175			( $this->heights[$i + $d] - $q ) /
176			( $this->positions[$i + $d] - $n )
177		);
178	}
179
180	/**
181	 * Get the estimated p-quantile value.
182	 *
183	 * @return float
184	 */
185	public function getValue() {
186		// If we have five samples or fewer, fall back to a naive method.
187		if ( $this->getCount() <= 5 ) {
188			sort( $this->heights );
189			$i = $this->p * count( $this->heights );
190			if ( $i === floor( $i ) ) {
191				return ( $this->heights[$i - 1] + $this->heights[$i] ) / 2;
192			} else {
193				return $this->heights[floor( $i )];
194			}
195		}
196
197		return $this->heights[2];
198	}
199}
200