1 /* 2 * This file is part of ELKI: 3 * Environment for Developing KDD-Applications Supported by Index-Structures 4 * 5 * Copyright (C) 2018 6 * ELKI Development Team 7 * 8 * This program is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU Affero General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU Affero General Public License for more details. 17 * 18 * You should have received a copy of the GNU Affero General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 package de.lmu.ifi.dbs.elki.math.statistics.distribution; 22 23 import java.util.Random; 24 25 import de.lmu.ifi.dbs.elki.math.MathUtil; 26 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference; 27 import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException; 28 import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID; 29 import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints; 30 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization; 31 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter; 32 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter; 33 import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory; 34 import net.jafama.FastMath; 35 36 /** 37 * INCOMPLETE implementation of the poisson distribution. 38 * <p> 39 * TODO: continue implementing, CDF, invcdf and nextRandom are missing 40 * <p> 41 * References: 42 * <p> 43 * Catherine Loader<br> 44 * Fast and Accurate Computation of Binomial Probabilities. 45 * 46 * @author Erich Schubert 47 * @since 0.5.0 48 */ 49 public class PoissonDistribution extends AbstractDistribution { 50 /** 51 * Number of tries 52 */ 53 private int n; 54 55 /** 56 * Success probability 57 */ 58 private double p; 59 60 /** Stirling error constants: 1./12 */ 61 private static final double S0 = 0.08333333333333333333333d; 62 63 /** Stirling error constants: 1./360 */ 64 private static final double S1 = 0.0027777777777777777777777777778d; 65 66 /** Stirling error constants: 1./1260 */ 67 private static final double S2 = 0.00079365079365079365079365d; 68 69 /** Stirling error constants: 1./1680 */ 70 private static final double S3 = 0.000595238095238095238095238095d; 71 72 /** Stirling error constants: 1./1188 */ 73 private static final double S4 = 0.00084175084175084175084175084175d; 74 75 /** 76 * Exact table values for n <= 15 in steps of 0.5 77 * <p> 78 * sfe[n] = ln( (n!*e^n)/((n^n)*sqrt(2*pi*n)) ) 79 */ 80 private static final double[] STIRLING_EXACT_ERROR = { // 81 0.0, // 0.0 82 0.1534264097200273452913848, // 0.5 83 0.0810614667953272582196702, // 1.0 84 0.0548141210519176538961390, // 1.5 85 0.0413406959554092940938221, // 2.0 86 0.03316287351993628748511048, // 2.5 87 0.02767792568499833914878929, // 3.0 88 0.02374616365629749597132920, // 3.5 89 0.02079067210376509311152277, // 4.0 90 0.01848845053267318523077934, // 4.5 91 0.01664469118982119216319487, // 5.0 92 0.01513497322191737887351255, // 5.5 93 0.01387612882307074799874573, // 6.0 94 0.01281046524292022692424986, // 6.5 95 0.01189670994589177009505572, // 7.0 96 0.01110455975820691732662991, // 7.5 97 0.010411265261972096497478567, // 8.0 98 0.009799416126158803298389475, // 8.5 99 0.009255462182712732917728637, // 9.0 100 0.008768700134139385462952823, // 9.5 101 0.008330563433362871256469318, // 10.0 102 0.007934114564314020547248100, // 10.5 103 0.007573675487951840794972024, // 11.0 104 0.007244554301320383179543912, // 11.5 105 0.006942840107209529865664152, // 12.0 106 0.006665247032707682442354394, // 12.5 107 0.006408994188004207068439631, // 13.0 108 0.006171712263039457647532867, // 13.5 109 0.005951370112758847735624416, // 14.0 110 0.005746216513010115682023589, // 14.5 111 0.0055547335519628013710386900 // 15.0 112 }; 113 114 /** 115 * Constructor. 116 * 117 * @param n Number of tries 118 * @param p Success probability 119 */ PoissonDistribution(int n, double p)120 public PoissonDistribution(int n, double p) { 121 this(n, p, (Random) null); 122 } 123 124 /** 125 * Constructor. 126 * 127 * @param n Number of tries 128 * @param p Success probability 129 * @param random Random generator 130 */ PoissonDistribution(int n, double p, Random random)131 public PoissonDistribution(int n, double p, Random random) { 132 super(random); 133 this.n = n; 134 this.p = p; 135 } 136 137 /** 138 * Constructor. 139 * 140 * @param n Number of tries 141 * @param p Success probability 142 * @param random Random generator 143 */ PoissonDistribution(int n, double p, RandomFactory random)144 public PoissonDistribution(int n, double p, RandomFactory random) { 145 super(random); 146 this.n = n; 147 this.p = p; 148 } 149 150 /** 151 * Poisson probability mass function (PMF) for integer values. 152 * 153 * @param x integer values 154 * @return Probability 155 */ pmf(int x)156 public double pmf(int x) { 157 return pmf(x, n, p); 158 } 159 160 @Override pdf(double x)161 public double pdf(double x) { 162 // FIXME: return 0 for non-integer x? 163 return pmf(x, n, p); 164 } 165 166 @Override logpdf(double x)167 public double logpdf(double x) { 168 return FastMath.log(pmf(x, n, p)); 169 } 170 171 /** 172 * Poisson probability mass function (PMF) for integer values. 173 * 174 * @param x integer values 175 * @return Probability 176 */ 177 @Reference(title = "Fast and accurate computation of binomial probabilities", // 178 authors = "C. Loader", booktitle = "", // 179 url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf", // 180 bibkey = "web/Loader00") pmf(double x, int n, double p)181 public static double pmf(double x, int n, double p) { 182 // Invalid values 183 if(x < 0 || x > n) { 184 return 0.; 185 } 186 // Extreme probabilities 187 if(p <= 0.) { 188 return x == 0 ? 1. : 0.; 189 } 190 if(p >= 1.) { 191 return x == n ? 1. : 0.; 192 } 193 final double q = 1 - p; 194 // FIXME: check for x to be integer, return 0 otherwise? 195 196 // Extreme values of x 197 if(x == 0) { 198 if(p < .1) { 199 return FastMath.exp(-devianceTerm(n, n * q) - n * p); 200 } 201 else { 202 return FastMath.exp(n * FastMath.log(q)); 203 } 204 } 205 if(x == n) { 206 if(p > .9) { 207 return FastMath.exp(-devianceTerm(n, n * p) - n * q); 208 } 209 else { 210 return FastMath.exp(n * FastMath.log(p)); 211 } 212 } 213 final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * q); 214 final double f = (MathUtil.TWOPI * x * (n - x)) / n; 215 return FastMath.exp(lc) / FastMath.sqrt(f); 216 } 217 218 /** 219 * Poisson probability mass function (PMF) for integer values. 220 * 221 * @param x integer values 222 * @return Probability 223 */ logpmf(double x, int n, double p)224 public static double logpmf(double x, int n, double p) { 225 // Invalid values 226 if(x < 0 || x > n) { 227 return Double.NEGATIVE_INFINITY; 228 } 229 // Extreme probabilities 230 if(p <= 0.) { 231 return x == 0 ? 0. : Double.NEGATIVE_INFINITY; 232 } 233 if(p >= 1.) { 234 return x == n ? 0. : Double.NEGATIVE_INFINITY; 235 } 236 final double q = 1 - p; 237 // FIXME: check for x to be integer, return 0 otherwise? 238 239 // Extreme values of x 240 if(x == 0) { 241 if(p < .1) { 242 return -devianceTerm(n, n * q) - n * p; 243 } 244 else { 245 return n * FastMath.log(q); 246 } 247 } 248 if(x == n) { 249 if(p > .9) { 250 return -devianceTerm(n, n * p) - n * q; 251 } 252 else { 253 return n * FastMath.log(p); 254 } 255 } 256 final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * q); 257 final double f = (MathUtil.TWOPI * x * (n - x)) / n; 258 return lc - .5 * FastMath.log(f); 259 } 260 261 @Override cdf(double val)262 public double cdf(double val) { 263 // FIXME: implement! 264 throw new NotImplementedException(); 265 } 266 267 @Override quantile(double val)268 public double quantile(double val) { 269 // FIXME: implement! 270 throw new NotImplementedException(); 271 } 272 273 @Override nextRandom()274 public double nextRandom() { 275 // FIXME: implement! 276 throw new NotImplementedException(); 277 } 278 279 /** 280 * Compute the poisson distribution PDF with an offset of + 1 281 * <p> 282 * pdf(x_plus_1 - 1, lambda) 283 * 284 * @param x_plus_1 x+1 285 * @param lambda Lambda 286 * @return pdf 287 */ poissonPDFm1(double x_plus_1, double lambda)288 public static double poissonPDFm1(double x_plus_1, double lambda) { 289 if(Double.isInfinite(lambda)) { 290 return 0.; 291 } 292 if(x_plus_1 > 1) { 293 return rawProbability(x_plus_1 - 1, lambda); 294 } 295 if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) { 296 return FastMath.exp(-lambda - GammaDistribution.logGamma(x_plus_1)); 297 } 298 else { 299 return rawProbability(x_plus_1, lambda) * (x_plus_1 / lambda); 300 } 301 } 302 303 /** 304 * Compute the poisson distribution PDF with an offset of + 1 305 * <p> 306 * log pdf(x_plus_1 - 1, lambda) 307 * 308 * @param x_plus_1 x+1 309 * @param lambda Lambda 310 * @return pdf 311 */ logpoissonPDFm1(double x_plus_1, double lambda)312 public static double logpoissonPDFm1(double x_plus_1, double lambda) { 313 if(Double.isInfinite(lambda)) { 314 return Double.NEGATIVE_INFINITY; 315 } 316 if(x_plus_1 > 1) { 317 return rawLogProbability(x_plus_1 - 1, lambda); 318 } 319 if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) { 320 return -lambda - GammaDistribution.logGamma(x_plus_1); 321 } 322 else { 323 return rawLogProbability(x_plus_1, lambda) + FastMath.log(x_plus_1 / lambda); 324 } 325 } 326 327 /** 328 * Calculates the Stirling Error 329 * <p> 330 * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n) 331 * 332 * @param n Parameter n 333 * @return Stirling error 334 */ 335 @Reference(title = "Fast and accurate computation of binomial probabilities", // 336 authors = "C. Loader", booktitle = "", // 337 url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf", // 338 bibkey = "web/Loader00") stirlingError(int n)339 private static double stirlingError(int n) { 340 // Try to use a table value: 341 if(n < 16) { 342 return STIRLING_EXACT_ERROR[n << 1]; 343 } 344 final double nn = n * n; 345 // Use the appropriate number of terms 346 if(n > 500) { 347 return (S0 - S1 / nn) / n; 348 } 349 if(n > 80) { 350 return ((S0 - (S1 - S2 / nn)) / nn) / n; 351 } 352 if(n > 35) { 353 return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n); 354 } 355 return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n); 356 } 357 358 /** 359 * Calculates the Stirling Error 360 * <p> 361 * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n) 362 * 363 * @param n Parameter n 364 * @return Stirling error 365 */ 366 @Reference(title = "Fast and accurate computation of binomial probabilities", // 367 authors = "C. Loader", booktitle = "", // 368 url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf", // 369 bibkey = "web/Loader00") stirlingError(double n)370 private static double stirlingError(double n) { 371 if(n < 16.0) { 372 // Our table has a step size of 0.5 373 final double n2 = 2.0 * n; 374 if(FastMath.floor(n2) == n2) { // Exact match 375 return STIRLING_EXACT_ERROR[(int) n2]; 376 } 377 else { 378 return GammaDistribution.logGamma(n + 1.0) - (n + 0.5) * FastMath.log(n) + n - MathUtil.LOGSQRTTWOPI; 379 } 380 } 381 final double nn = n * n; 382 if(n > 500.0) { 383 return (S0 - S1 / nn) / n; 384 } 385 if(n > 80.0) { 386 return ((S0 - (S1 - S2 / nn)) / nn) / n; 387 } 388 if(n > 35.0) { 389 return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n); 390 } 391 return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n); 392 } 393 394 /** 395 * Evaluate the deviance term of the saddle point approximation. 396 * <p> 397 * bd0(x,np) = x*ln(x/np)+np-x 398 * 399 * @param x probability density function position 400 * @param np product of trials and success probability: n*p 401 * @return Deviance term 402 */ 403 @Reference(title = "Fast and accurate computation of binomial probabilities", // 404 authors = "C. Loader", booktitle = "", // 405 url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf", // 406 bibkey = "web/Loader00") devianceTerm(double x, double np)407 private static double devianceTerm(double x, double np) { 408 if(Math.abs(x - np) < 0.1 * (x + np)) { 409 final double v = (x - np) / (x + np); 410 411 double s = (x - np) * v; 412 double ej = 2.0d * x * v; 413 for(int j = 1;; j++) { 414 ej *= v * v; 415 final double s1 = s + ej / (2 * j + 1); 416 if(s1 == s) { 417 return s1; 418 } 419 s = s1; 420 } 421 } 422 return x * FastMath.log(x / np) + np - x; 423 } 424 425 /** 426 * Poisson distribution probability, but also for non-integer arguments. 427 * <p> 428 * lb^x exp(-lb) / x! 429 * 430 * @param x X 431 * @param lambda lambda 432 * @return Poisson distribution probability 433 */ rawProbability(double x, double lambda)434 public static double rawProbability(double x, double lambda) { 435 // Extreme lambda 436 if(lambda == 0) { 437 return ((x == 0) ? 1. : 0.); 438 } 439 // Extreme values 440 if(Double.isInfinite(lambda) || x < 0) { 441 return 0.; 442 } 443 if(x <= lambda * Double.MIN_NORMAL) { 444 return FastMath.exp(-lambda); 445 } 446 if(lambda < x * Double.MIN_NORMAL) { 447 double r = -lambda + x * FastMath.log(lambda) - GammaDistribution.logGamma(x + 1); 448 return FastMath.exp(r); 449 } 450 final double f = MathUtil.TWOPI * x; 451 final double y = -stirlingError(x) - devianceTerm(x, lambda); 452 return FastMath.exp(y) / FastMath.sqrt(f); 453 } 454 455 /** 456 * Poisson distribution probability, but also for non-integer arguments. 457 * <p> 458 * lb^x exp(-lb) / x! 459 * 460 * @param x X 461 * @param lambda lambda 462 * @return Poisson distribution probability 463 */ rawLogProbability(double x, double lambda)464 public static double rawLogProbability(double x, double lambda) { 465 // Extreme lambda 466 if(lambda == 0) { 467 return ((x == 0) ? 1. : Double.NEGATIVE_INFINITY); 468 } 469 // Extreme values 470 if(Double.isInfinite(lambda) || x < 0) { 471 return Double.NEGATIVE_INFINITY; 472 } 473 if(x <= lambda * Double.MIN_NORMAL) { 474 return -lambda; 475 } 476 if(lambda < x * Double.MIN_NORMAL) { 477 return -lambda + x * FastMath.log(lambda) - GammaDistribution.logGamma(x + 1); 478 } 479 final double f = MathUtil.TWOPI * x; 480 final double y = -stirlingError(x) - devianceTerm(x, lambda); 481 return -0.5 * FastMath.log(f) + y; 482 } 483 484 @Override toString()485 public String toString() { 486 return "PoissonDistribution(n=" + n + ", p=" + p + ")"; 487 } 488 489 /** 490 * Parameterization class 491 * 492 * @author Erich Schubert 493 */ 494 public static class Parameterizer extends AbstractDistribution.Parameterizer { 495 /** 496 * Number of trials. 497 */ 498 public static final OptionID N_ID = new OptionID("distribution.poisson.n", "Number of trials."); 499 500 /** 501 * Success probability. 502 */ 503 public static final OptionID PROB_ID = new OptionID("distribution.poisson.probability", "Success probability."); 504 505 /** 506 * Number of trials. 507 */ 508 int n; 509 510 /** 511 * Success probability. 512 */ 513 double p; 514 515 @Override makeOptions(Parameterization config)516 protected void makeOptions(Parameterization config) { 517 super.makeOptions(config); 518 519 IntParameter nP = new IntParameter(N_ID) // 520 .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT); 521 if(config.grab(nP)) { 522 n = nP.intValue(); 523 } 524 525 DoubleParameter probP = new DoubleParameter(PROB_ID) // 526 .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE) // 527 .addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE); 528 if(config.grab(probP)) { 529 p = probP.doubleValue(); 530 } 531 } 532 533 @Override makeInstance()534 protected PoissonDistribution makeInstance() { 535 return new PoissonDistribution(n, p, rnd); 536 } 537 } 538 } 539