1 /*
2  * This file is part of ELKI:
3  * Environment for Developing KDD-Applications Supported by Index-Structures
4  *
5  * Copyright (C) 2018
6  * ELKI Development Team
7  *
8  * This program is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Affero General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
20  */
21 package de.lmu.ifi.dbs.elki.math.statistics.distribution;
22 
23 import java.util.Random;
24 
25 import de.lmu.ifi.dbs.elki.math.MathUtil;
26 import de.lmu.ifi.dbs.elki.utilities.documentation.Reference;
27 import de.lmu.ifi.dbs.elki.utilities.exceptions.NotImplementedException;
28 import de.lmu.ifi.dbs.elki.utilities.optionhandling.OptionID;
29 import de.lmu.ifi.dbs.elki.utilities.optionhandling.constraints.CommonConstraints;
30 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameterization.Parameterization;
31 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.DoubleParameter;
32 import de.lmu.ifi.dbs.elki.utilities.optionhandling.parameters.IntParameter;
33 import de.lmu.ifi.dbs.elki.utilities.random.RandomFactory;
34 import net.jafama.FastMath;
35 
36 /**
37  * INCOMPLETE implementation of the poisson distribution.
38  * <p>
39  * TODO: continue implementing, CDF, invcdf and nextRandom are missing
40  * <p>
41  * References:
42  * <p>
43  * Catherine Loader<br>
44  * Fast and Accurate Computation of Binomial Probabilities.
45  *
46  * @author Erich Schubert
47  * @since 0.5.0
48  */
49 public class PoissonDistribution extends AbstractDistribution {
50   /**
51    * Number of tries
52    */
53   private int n;
54 
55   /**
56    * Success probability
57    */
58   private double p;
59 
60   /** Stirling error constants: 1./12 */
61   private static final double S0 = 0.08333333333333333333333d;
62 
63   /** Stirling error constants: 1./360 */
64   private static final double S1 = 0.0027777777777777777777777777778d;
65 
66   /** Stirling error constants: 1./1260 */
67   private static final double S2 = 0.00079365079365079365079365d;
68 
69   /** Stirling error constants: 1./1680 */
70   private static final double S3 = 0.000595238095238095238095238095d;
71 
72   /** Stirling error constants: 1./1188 */
73   private static final double S4 = 0.00084175084175084175084175084175d;
74 
75   /**
76    * Exact table values for n &lt;= 15 in steps of 0.5
77    * <p>
78    * sfe[n] = ln( (n!*e^n)/((n^n)*sqrt(2*pi*n)) )
79    */
80   private static final double[] STIRLING_EXACT_ERROR = { //
81       0.0, // 0.0
82       0.1534264097200273452913848, // 0.5
83       0.0810614667953272582196702, // 1.0
84       0.0548141210519176538961390, // 1.5
85       0.0413406959554092940938221, // 2.0
86       0.03316287351993628748511048, // 2.5
87       0.02767792568499833914878929, // 3.0
88       0.02374616365629749597132920, // 3.5
89       0.02079067210376509311152277, // 4.0
90       0.01848845053267318523077934, // 4.5
91       0.01664469118982119216319487, // 5.0
92       0.01513497322191737887351255, // 5.5
93       0.01387612882307074799874573, // 6.0
94       0.01281046524292022692424986, // 6.5
95       0.01189670994589177009505572, // 7.0
96       0.01110455975820691732662991, // 7.5
97       0.010411265261972096497478567, // 8.0
98       0.009799416126158803298389475, // 8.5
99       0.009255462182712732917728637, // 9.0
100       0.008768700134139385462952823, // 9.5
101       0.008330563433362871256469318, // 10.0
102       0.007934114564314020547248100, // 10.5
103       0.007573675487951840794972024, // 11.0
104       0.007244554301320383179543912, // 11.5
105       0.006942840107209529865664152, // 12.0
106       0.006665247032707682442354394, // 12.5
107       0.006408994188004207068439631, // 13.0
108       0.006171712263039457647532867, // 13.5
109       0.005951370112758847735624416, // 14.0
110       0.005746216513010115682023589, // 14.5
111       0.0055547335519628013710386900 // 15.0
112   };
113 
114   /**
115    * Constructor.
116    *
117    * @param n Number of tries
118    * @param p Success probability
119    */
PoissonDistribution(int n, double p)120   public PoissonDistribution(int n, double p) {
121     this(n, p, (Random) null);
122   }
123 
124   /**
125    * Constructor.
126    *
127    * @param n Number of tries
128    * @param p Success probability
129    * @param random Random generator
130    */
PoissonDistribution(int n, double p, Random random)131   public PoissonDistribution(int n, double p, Random random) {
132     super(random);
133     this.n = n;
134     this.p = p;
135   }
136 
137   /**
138    * Constructor.
139    *
140    * @param n Number of tries
141    * @param p Success probability
142    * @param random Random generator
143    */
PoissonDistribution(int n, double p, RandomFactory random)144   public PoissonDistribution(int n, double p, RandomFactory random) {
145     super(random);
146     this.n = n;
147     this.p = p;
148   }
149 
150   /**
151    * Poisson probability mass function (PMF) for integer values.
152    *
153    * @param x integer values
154    * @return Probability
155    */
pmf(int x)156   public double pmf(int x) {
157     return pmf(x, n, p);
158   }
159 
160   @Override
pdf(double x)161   public double pdf(double x) {
162     // FIXME: return 0 for non-integer x?
163     return pmf(x, n, p);
164   }
165 
166   @Override
logpdf(double x)167   public double logpdf(double x) {
168     return FastMath.log(pmf(x, n, p));
169   }
170 
171   /**
172    * Poisson probability mass function (PMF) for integer values.
173    *
174    * @param x integer values
175    * @return Probability
176    */
177   @Reference(title = "Fast and accurate computation of binomial probabilities", //
178       authors = "C. Loader", booktitle = "", //
179       url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf", //
180       bibkey = "web/Loader00")
pmf(double x, int n, double p)181   public static double pmf(double x, int n, double p) {
182     // Invalid values
183     if(x < 0 || x > n) {
184       return 0.;
185     }
186     // Extreme probabilities
187     if(p <= 0.) {
188       return x == 0 ? 1. : 0.;
189     }
190     if(p >= 1.) {
191       return x == n ? 1. : 0.;
192     }
193     final double q = 1 - p;
194     // FIXME: check for x to be integer, return 0 otherwise?
195 
196     // Extreme values of x
197     if(x == 0) {
198       if(p < .1) {
199         return FastMath.exp(-devianceTerm(n, n * q) - n * p);
200       }
201       else {
202         return FastMath.exp(n * FastMath.log(q));
203       }
204     }
205     if(x == n) {
206       if(p > .9) {
207         return FastMath.exp(-devianceTerm(n, n * p) - n * q);
208       }
209       else {
210         return FastMath.exp(n * FastMath.log(p));
211       }
212     }
213     final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * q);
214     final double f = (MathUtil.TWOPI * x * (n - x)) / n;
215     return FastMath.exp(lc) / FastMath.sqrt(f);
216   }
217 
218   /**
219    * Poisson probability mass function (PMF) for integer values.
220    *
221    * @param x integer values
222    * @return Probability
223    */
logpmf(double x, int n, double p)224   public static double logpmf(double x, int n, double p) {
225     // Invalid values
226     if(x < 0 || x > n) {
227       return Double.NEGATIVE_INFINITY;
228     }
229     // Extreme probabilities
230     if(p <= 0.) {
231       return x == 0 ? 0. : Double.NEGATIVE_INFINITY;
232     }
233     if(p >= 1.) {
234       return x == n ? 0. : Double.NEGATIVE_INFINITY;
235     }
236     final double q = 1 - p;
237     // FIXME: check for x to be integer, return 0 otherwise?
238 
239     // Extreme values of x
240     if(x == 0) {
241       if(p < .1) {
242         return -devianceTerm(n, n * q) - n * p;
243       }
244       else {
245         return n * FastMath.log(q);
246       }
247     }
248     if(x == n) {
249       if(p > .9) {
250         return -devianceTerm(n, n * p) - n * q;
251       }
252       else {
253         return n * FastMath.log(p);
254       }
255     }
256     final double lc = stirlingError(n) - stirlingError(x) - stirlingError(n - x) - devianceTerm(x, n * p) - devianceTerm(n - x, n * q);
257     final double f = (MathUtil.TWOPI * x * (n - x)) / n;
258     return lc - .5 * FastMath.log(f);
259   }
260 
261   @Override
cdf(double val)262   public double cdf(double val) {
263     // FIXME: implement!
264     throw new NotImplementedException();
265   }
266 
267   @Override
quantile(double val)268   public double quantile(double val) {
269     // FIXME: implement!
270     throw new NotImplementedException();
271   }
272 
273   @Override
nextRandom()274   public double nextRandom() {
275     // FIXME: implement!
276     throw new NotImplementedException();
277   }
278 
279   /**
280    * Compute the poisson distribution PDF with an offset of + 1
281    * <p>
282    * pdf(x_plus_1 - 1, lambda)
283    *
284    * @param x_plus_1 x+1
285    * @param lambda Lambda
286    * @return pdf
287    */
poissonPDFm1(double x_plus_1, double lambda)288   public static double poissonPDFm1(double x_plus_1, double lambda) {
289     if(Double.isInfinite(lambda)) {
290       return 0.;
291     }
292     if(x_plus_1 > 1) {
293       return rawProbability(x_plus_1 - 1, lambda);
294     }
295     if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
296       return FastMath.exp(-lambda - GammaDistribution.logGamma(x_plus_1));
297     }
298     else {
299       return rawProbability(x_plus_1, lambda) * (x_plus_1 / lambda);
300     }
301   }
302 
303   /**
304    * Compute the poisson distribution PDF with an offset of + 1
305    * <p>
306    * log pdf(x_plus_1 - 1, lambda)
307    *
308    * @param x_plus_1 x+1
309    * @param lambda Lambda
310    * @return pdf
311    */
logpoissonPDFm1(double x_plus_1, double lambda)312   public static double logpoissonPDFm1(double x_plus_1, double lambda) {
313     if(Double.isInfinite(lambda)) {
314       return Double.NEGATIVE_INFINITY;
315     }
316     if(x_plus_1 > 1) {
317       return rawLogProbability(x_plus_1 - 1, lambda);
318     }
319     if(lambda > Math.abs(x_plus_1 - 1) * MathUtil.LOG2 * Double.MAX_EXPONENT / 1e-14) {
320       return -lambda - GammaDistribution.logGamma(x_plus_1);
321     }
322     else {
323       return rawLogProbability(x_plus_1, lambda) + FastMath.log(x_plus_1 / lambda);
324     }
325   }
326 
327   /**
328    * Calculates the Stirling Error
329    * <p>
330    * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n)
331    *
332    * @param n Parameter n
333    * @return Stirling error
334    */
335   @Reference(title = "Fast and accurate computation of binomial probabilities", //
336       authors = "C. Loader", booktitle = "", //
337       url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf", //
338       bibkey = "web/Loader00")
stirlingError(int n)339   private static double stirlingError(int n) {
340     // Try to use a table value:
341     if(n < 16) {
342       return STIRLING_EXACT_ERROR[n << 1];
343     }
344     final double nn = n * n;
345     // Use the appropriate number of terms
346     if(n > 500) {
347       return (S0 - S1 / nn) / n;
348     }
349     if(n > 80) {
350       return ((S0 - (S1 - S2 / nn)) / nn) / n;
351     }
352     if(n > 35) {
353       return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
354     }
355     return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
356   }
357 
358   /**
359    * Calculates the Stirling Error
360    * <p>
361    * stirlerr(n) = ln(n!) - ln(sqrt(2*pi*n)*(n/e)^n)
362    *
363    * @param n Parameter n
364    * @return Stirling error
365    */
366   @Reference(title = "Fast and accurate computation of binomial probabilities", //
367       authors = "C. Loader", booktitle = "", //
368       url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf", //
369       bibkey = "web/Loader00")
stirlingError(double n)370   private static double stirlingError(double n) {
371     if(n < 16.0) {
372       // Our table has a step size of 0.5
373       final double n2 = 2.0 * n;
374       if(FastMath.floor(n2) == n2) { // Exact match
375         return STIRLING_EXACT_ERROR[(int) n2];
376       }
377       else {
378         return GammaDistribution.logGamma(n + 1.0) - (n + 0.5) * FastMath.log(n) + n - MathUtil.LOGSQRTTWOPI;
379       }
380     }
381     final double nn = n * n;
382     if(n > 500.0) {
383       return (S0 - S1 / nn) / n;
384     }
385     if(n > 80.0) {
386       return ((S0 - (S1 - S2 / nn)) / nn) / n;
387     }
388     if(n > 35.0) {
389       return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n);
390     }
391     return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n);
392   }
393 
394   /**
395    * Evaluate the deviance term of the saddle point approximation.
396    * <p>
397    * bd0(x,np) = x*ln(x/np)+np-x
398    *
399    * @param x probability density function position
400    * @param np product of trials and success probability: n*p
401    * @return Deviance term
402    */
403   @Reference(title = "Fast and accurate computation of binomial probabilities", //
404       authors = "C. Loader", booktitle = "", //
405       url = "http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf", //
406       bibkey = "web/Loader00")
devianceTerm(double x, double np)407   private static double devianceTerm(double x, double np) {
408     if(Math.abs(x - np) < 0.1 * (x + np)) {
409       final double v = (x - np) / (x + np);
410 
411       double s = (x - np) * v;
412       double ej = 2.0d * x * v;
413       for(int j = 1;; j++) {
414         ej *= v * v;
415         final double s1 = s + ej / (2 * j + 1);
416         if(s1 == s) {
417           return s1;
418         }
419         s = s1;
420       }
421     }
422     return x * FastMath.log(x / np) + np - x;
423   }
424 
425   /**
426    * Poisson distribution probability, but also for non-integer arguments.
427    * <p>
428    * lb^x exp(-lb) / x!
429    *
430    * @param x X
431    * @param lambda lambda
432    * @return Poisson distribution probability
433    */
rawProbability(double x, double lambda)434   public static double rawProbability(double x, double lambda) {
435     // Extreme lambda
436     if(lambda == 0) {
437       return ((x == 0) ? 1. : 0.);
438     }
439     // Extreme values
440     if(Double.isInfinite(lambda) || x < 0) {
441       return 0.;
442     }
443     if(x <= lambda * Double.MIN_NORMAL) {
444       return FastMath.exp(-lambda);
445     }
446     if(lambda < x * Double.MIN_NORMAL) {
447       double r = -lambda + x * FastMath.log(lambda) - GammaDistribution.logGamma(x + 1);
448       return FastMath.exp(r);
449     }
450     final double f = MathUtil.TWOPI * x;
451     final double y = -stirlingError(x) - devianceTerm(x, lambda);
452     return FastMath.exp(y) / FastMath.sqrt(f);
453   }
454 
455   /**
456    * Poisson distribution probability, but also for non-integer arguments.
457    * <p>
458    * lb^x exp(-lb) / x!
459    *
460    * @param x X
461    * @param lambda lambda
462    * @return Poisson distribution probability
463    */
rawLogProbability(double x, double lambda)464   public static double rawLogProbability(double x, double lambda) {
465     // Extreme lambda
466     if(lambda == 0) {
467       return ((x == 0) ? 1. : Double.NEGATIVE_INFINITY);
468     }
469     // Extreme values
470     if(Double.isInfinite(lambda) || x < 0) {
471       return Double.NEGATIVE_INFINITY;
472     }
473     if(x <= lambda * Double.MIN_NORMAL) {
474       return -lambda;
475     }
476     if(lambda < x * Double.MIN_NORMAL) {
477       return -lambda + x * FastMath.log(lambda) - GammaDistribution.logGamma(x + 1);
478     }
479     final double f = MathUtil.TWOPI * x;
480     final double y = -stirlingError(x) - devianceTerm(x, lambda);
481     return -0.5 * FastMath.log(f) + y;
482   }
483 
484   @Override
toString()485   public String toString() {
486     return "PoissonDistribution(n=" + n + ", p=" + p + ")";
487   }
488 
489   /**
490    * Parameterization class
491    *
492    * @author Erich Schubert
493    */
494   public static class Parameterizer extends AbstractDistribution.Parameterizer {
495     /**
496      * Number of trials.
497      */
498     public static final OptionID N_ID = new OptionID("distribution.poisson.n", "Number of trials.");
499 
500     /**
501      * Success probability.
502      */
503     public static final OptionID PROB_ID = new OptionID("distribution.poisson.probability", "Success probability.");
504 
505     /**
506      * Number of trials.
507      */
508     int n;
509 
510     /**
511      * Success probability.
512      */
513     double p;
514 
515     @Override
makeOptions(Parameterization config)516     protected void makeOptions(Parameterization config) {
517       super.makeOptions(config);
518 
519       IntParameter nP = new IntParameter(N_ID) //
520           .addConstraint(CommonConstraints.GREATER_EQUAL_ONE_INT);
521       if(config.grab(nP)) {
522         n = nP.intValue();
523       }
524 
525       DoubleParameter probP = new DoubleParameter(PROB_ID) //
526           .addConstraint(CommonConstraints.GREATER_EQUAL_ZERO_DOUBLE) //
527           .addConstraint(CommonConstraints.LESS_EQUAL_ONE_DOUBLE);
528       if(config.grab(probP)) {
529         p = probP.doubleValue();
530       }
531     }
532 
533     @Override
makeInstance()534     protected PoissonDistribution makeInstance() {
535       return new PoissonDistribution(n, p, rnd);
536     }
537   }
538 }
539