1<?php 2/** 3 * PSquare 4 * 5 * Represents a running, online estimate of a p-quantile for a series 6 * of observations using the P-squared algorithm, as documented in 7 * "The P-Square Algorithm for Dynamic Calculation of Percentiles and 8 * Histograms without Storing Observations," Communications of the ACM, 9 * October 1985 by R. Jain and I. Chlamtac. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License as published by 13 * the Free Software Foundation; either version 2 of the License, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License along 22 * with this program; if not, write to the Free Software Foundation, Inc., 23 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 24 * http://www.gnu.org/copyleft/gpl.html 25 * 26 * @file 27 * @author Ori Livneh <ori@wikimedia.org> 28 */ 29 30namespace Wikimedia; 31 32/** 33 * Represents a running, online estimate of a p-quantile for a series 34 * of observations using the P-squared algorithm. 35 */ 36class PSquare { 37 38 /** @var float Percentile to estimate. **/ 39 private $p; 40 41 /** @var float[] Height of each marker. **/ 42 private $heights = []; 43 44 /** @var int[] Position of each marker. **/ 45 private $positions = []; 46 47 /** @var float[] Desired position of each marker. **/ 48 private $desired = []; 49 50 /** @var int Number of observations. **/ 51 private $numObservations = 0; 52 53 /** 54 * Constructor. 55 * 56 * @param float $p the percentile (defaults to 0.5, or median). 57 */ 58 public function __construct( $p = 0.5 ) { 59 $this->p = $p; 60 $this->positions = [ 0, 1, 2, 3, 4 ]; 61 $this->desired = [ 0, ( 2 * $p ), ( 4 * $p ), 2 + ( 2 * $p ), 4 ]; 62 $this->increments = [ 0, ( $p / 2 ), $p, ( ( 1 + $p ) / 2 ), 1 ]; 63 } 64 65 /** 66 * Get the total number of accumulated observations. 67 * 68 * @return int 69 */ 70 public function getCount() { 71 return $this->numObservations; 72 } 73 74 /** 75 * Add an observation. 76 * 77 * @param int|float $x Value to add 78 */ 79 public function addObservation( $x ) { 80 $this->numObservations++; 81 82 if ( $this->numObservations <= 5 ) { 83 $this->heights[] = $x; 84 if ( $this->numObservations === 5 ) { 85 sort( $this->heights ); 86 } 87 return; 88 } 89 90 if ( $x < $this->heights[0] ) { 91 $this->heights[0] = $x; 92 $k = 0; 93 } elseif ( $x >= $this->heights[4] ) { 94 $this->heights[4] = $x; 95 $k = 3; 96 } else { 97 for ( $i = 1; $i < 5; $i++ ) { 98 if ( $x < $this->heights[$i] ) { 99 $k = $i - 1; 100 break; 101 } 102 } 103 } 104 105 for ( $i = $k + 1; $i < 5; $i++ ) { 106 $this->positions[$i]++; 107 } 108 109 for ( $i = 0; $i < 5; $i++ ) { 110 $this->desired[$i] += $this->increments[$i]; 111 } 112 113 for ( $i = 1; $i < 4; $i++ ) { 114 $n = $this->positions[$i]; 115 $nPrev = $this->positions[$i - 1]; 116 $nNext = $this->positions[$i + 1]; 117 118 $d = $this->desired[$i] - $n; 119 120 if ( ( $d >= 1 && $nNext - $n > 1 ) || ( $d <= -1 && $nPrev - $n < -1 ) ) { 121 $d = ( $d < 0 ) ? -1 : 1; 122 123 $q = $this->computeParabolic( $i, $d ); 124 $qPrev = $this->heights[$i - 1]; 125 $qNext = $this->heights[$i + 1]; 126 127 if ( $qPrev < $q && $q < $qNext ) { 128 $this->heights[$i] = $q; 129 } else { 130 $this->heights[$i] = $this->computeLinear( $i, $d ); 131 } 132 133 $this->positions[$i] += $d; 134 } 135 } 136 } 137 138 /** 139 * Use piecewise parabolic prediction to predict the ideal 140 * height of a marker. 141 * 142 * @param int $i index of marker to adjust 143 * @param int $d always -1 or 1 144 * @return float ideal height of marker 145 */ 146 private function computeParabolic( $i, $d ) { 147 $q = $this->heights[$i]; 148 $qPrev = $this->heights[$i - 1]; 149 $qNext = $this->heights[$i + 1]; 150 151 $n = $this->positions[$i]; 152 $nPrev = $this->positions[$i - 1]; 153 $nNext = $this->positions[$i + 1]; 154 155 return ( $q + 156 $d / ( $nNext - $nPrev ) * 157 ( 158 ( $n - $nPrev + $d ) * ( $qNext - $q ) / ( $nNext - $n ) + 159 ( $nNext - $n - $d ) * ( $q - $qPrev ) / ( $n - $nPrev ) 160 ) 161 ); 162 } 163 164 /** 165 * Linear formula to predict ideal position of a marker. 166 * 167 * @param int $i index of marker to adjust 168 * @param int $d always -1 or 1 169 * @return float ideal height of marker 170 */ 171 private function computeLinear( $i, $d ) { 172 $q = $this->heights[$i]; 173 $n = $this->positions[$i]; 174 return ( $q + $d * 175 ( $this->heights[$i + $d] - $q ) / 176 ( $this->positions[$i + $d] - $n ) 177 ); 178 } 179 180 /** 181 * Get the estimated p-quantile value. 182 * 183 * @return float 184 */ 185 public function getValue() { 186 // If we have five samples or fewer, fall back to a naive method. 187 if ( $this->getCount() <= 5 ) { 188 sort( $this->heights ); 189 $i = $this->p * count( $this->heights ); 190 if ( $i === floor( $i ) ) { 191 return ( $this->heights[$i - 1] + $this->heights[$i] ) / 2; 192 } else { 193 return $this->heights[floor( $i )]; 194 } 195 } 196 197 return $this->heights[2]; 198 } 199} 200