1 ////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (C) 2006-2021 The Octave Project Developers
4 //
5 // See the file COPYRIGHT.md in the top-level directory of this
6 // distribution or <https://octave.org/copyright/>.
7 //
8 // This file is part of Octave.
9 //
10 // Octave is free software: you can redistribute it and/or modify it
11 // under the terms of the GNU General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // Octave is distributed in the hope that it will be useful, but
16 // WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 // GNU General Public License for more details.
19 //
20 // You should have received a copy of the GNU General Public License
21 // along with Octave; see the file COPYING.  If not, see
22 // <https://www.gnu.org/licenses/>.
23 //
24 ////////////////////////////////////////////////////////////////////////
25 
26 /* Original version written by Paul Kienzle distributed as free
27    software in the in the public domain.  */
28 
29 #if defined (HAVE_CONFIG_H)
30 #  include "config.h"
31 #endif
32 
33 #include <cmath>
34 #include <cstddef>
35 
36 #include "f77-fcn.h"
37 #include "lo-error.h"
38 #include "lo-ieee.h"
39 #include "randmtzig.h"
40 #include "randpoisson.h"
41 
42 namespace octave
43 {
xlgamma(double x)44   static double xlgamma (double x)
45   {
46     return std::lgamma (x);
47   }
48 
49   /* ---- pprsc.c from Stadloeber's winrand --- */
50 
51   /* flogfak(k) = ln(k!) */
flogfak(double k)52   static double flogfak (double k)
53   {
54 #define C0  9.18938533204672742e-01
55 #define C1  8.33333333333333333e-02
56 #define C3 -2.77777777777777778e-03
57 #define C5  7.93650793650793651e-04
58 #define C7 -5.95238095238095238e-04
59 
60     static double logfak[30L] =
61       {
62         0.00000000000000000,   0.00000000000000000,   0.69314718055994531,
63         1.79175946922805500,   3.17805383034794562,   4.78749174278204599,
64         6.57925121201010100,   8.52516136106541430,  10.60460290274525023,
65         12.80182748008146961,  15.10441257307551530,  17.50230784587388584,
66         19.98721449566188615,  22.55216385312342289,  25.19122118273868150,
67         27.89927138384089157,  30.67186010608067280,  33.50507345013688888,
68         36.39544520803305358,  39.33988418719949404,  42.33561646075348503,
69         45.38013889847690803,  48.47118135183522388,  51.60667556776437357,
70         54.78472939811231919,  58.00360522298051994,  61.26170176100200198,
71         64.55753862700633106,  67.88974313718153498,  71.25703896716800901
72       };
73 
74     double r, rr;
75 
76     if (k >= 30.0)
77       {
78         r  = 1.0 / k;
79         rr = r * r;
80         return ((k + 0.5)*std::log (k) - k + C0
81                 + r*(C1 + rr*(C3 + rr*(C5 + rr*C7))));
82       }
83     else
84       return (logfak[static_cast<int> (k)]);
85   }
86 
87   /******************************************************************
88    *                                                                *
89    * Poisson Distribution - Patchwork Rejection/Inversion           *
90    *                                                                *
91    ******************************************************************
92    *                                                                *
93    * For parameter my < 10, Tabulated Inversion is applied.         *
94    * For my >= 10, Patchwork Rejection is employed:                 *
95    * The area below the histogram function f(x) is rearranged in    *
96    * its body by certain point reflections. Within a large center   *
97    * interval variates are sampled efficiently by rejection from    *
98    * uniform hats. Rectangular immediate acceptance regions speed   *
99    * up the generation. The remaining tails are covered by          *
100    * exponential functions.                                         *
101    *                                                                *
102    ******************************************************************
103    *                                                                *
104    * FUNCTION :   - pprsc samples a random number from the Poisson  *
105    *                distribution with parameter my > 0.             *
106    * REFERENCE :  - H. Zechner (1994): Efficient sampling from      *
107    *                continuous and discrete unimodal distributions, *
108    *                Doctoral Dissertation, 156 pp., Technical       *
109    *                University Graz, Austria.                       *
110    * SUBPROGRAM : - drand(seed) ... (0,1)-Uniform generator with    *
111    *                unsigned long integer *seed.                    *
112    *                                                                *
113    * Implemented by H. Zechner, January 1994                        *
114    * Revised by F. Niederl, July 1994                               *
115    *                                                                *
116    ******************************************************************/
117 
f(double k,double l_nu,double c_pm)118   static double f (double k, double l_nu, double c_pm)
119   {
120     return exp (k * l_nu - flogfak (k) - c_pm);
121   }
122 
pprsc(double my)123   static double pprsc (double my)
124   {
125     static double my_last = -1.0;
126     static double m,  k2, k4, k1, k5;
127     static double dl, dr, r1, r2, r4, r5, ll, lr, l_my, c_pm,
128       f1, f2, f4, f5, p1, p2, p3, p4, p5, p6;
129     double        Dk, X, Y;
130     double        Ds, U, V, W;
131 
132     if (my != my_last)
133       {                               /* set-up           */
134         my_last = my;
135         /* approximate deviation of reflection points k2, k4 from my - 1/2 */
136         Ds = std::sqrt (my + 0.25);
137 
138         /* mode m, reflection points k2 and k4, and points k1 and k5,      */
139         /* which delimit the centre region of h(x)                         */
140         m  = std::floor (my);
141         k2 = ceil (my - 0.5 - Ds);
142         k4 = std::floor (my - 0.5 + Ds);
143         k1 = k2 + k2 - m + 1L;
144         k5 = k4 + k4 - m;
145 
146         /* range width of the critical left and right centre region        */
147         dl = (k2 - k1);
148         dr = (k5 - k4);
149 
150         /* recurrence constants r(k)=p(k)/p(k-1) at k = k1, k2, k4+1, k5+1 */
151         r1 = my / k1;
152         r2 = my / k2;
153         r4 = my / (k4 + 1.0);
154         r5 = my / (k5 + 1.0);
155 
156         /* reciprocal values of the scale parameters of exp. tail envelope */
157         ll =  std::log (r1);                        /* expon. tail left */
158         lr = -std::log (r5);                        /* expon. tail right*/
159 
160         /* Poisson constants, necessary for computing function values f(k) */
161         l_my = std::log (my);
162         c_pm = m * l_my - flogfak (m);
163 
164         /* function values f(k) = p(k)/p(m) at k = k2, k4, k1, k5          */
165         f2 = f (k2, l_my, c_pm);
166         f4 = f (k4, l_my, c_pm);
167         f1 = f (k1, l_my, c_pm);
168         f5 = f (k5, l_my, c_pm);
169 
170         /* area of the two centre and the two exponential tail regions     */
171         /* area of the two immediate acceptance regions between k2, k4     */
172         p1 = f2 * (dl + 1.0);                            /* immed. left    */
173         p2 = f2 * dl         + p1;                       /* centre left    */
174         p3 = f4 * (dr + 1.0) + p2;                       /* immed. right   */
175         p4 = f4 * dr         + p3;                       /* centre right   */
176         p5 = f1 / ll         + p4;                       /* exp. tail left */
177         p6 = f5 / lr         + p5;                       /* exp. tail right*/
178       }
179 
180     for (;;)
181       {
182         /* generate uniform number U -- U(0, p6)                           */
183         /* case distinction corresponding to U                             */
184         if ((U = rand_uniform<double> () * p6) < p2)
185           {                                            /* centre left      */
186 
187             /* immediate acceptance region
188                R2 = [k2, m) *[0, f2),  X = k2, ... m -1 */
189             if ((V = U - p1) < 0.0)  return (k2 + std::floor (U/f2));
190             /* immediate acceptance region
191                R1 = [k1, k2)*[0, f1),  X = k1, ... k2-1 */
192             if ((W = V / dl) < f1 )  return (k1 + std::floor (V/f1));
193 
194             /* computation of candidate X < k2, and its counterpart Y > k2 */
195             /* either squeeze-acceptance of X or acceptance-rejection of Y */
196             Dk = std::floor (dl * rand_uniform<double> ()) + 1.0;
197             if (W <= f2 - Dk * (f2 - f2/r2))
198               {                                        /* quick accept of  */
199                 return (k2 - Dk);                      /* X = k2 - Dk      */
200               }
201             if ((V = f2 + f2 - W) < 1.0)
202               {                                        /* quick reject of Y*/
203                 Y = k2 + Dk;
204                 if (V <= f2 + Dk * (1.0 - f2)/(dl + 1.0))
205                   {                                    /* quick accept of  */
206                     return (Y);                        /* Y = k2 + Dk      */
207                   }
208                 if (V <= f (Y, l_my, c_pm))  return (Y); /* final accept of Y*/
209               }
210             X = k2 - Dk;
211           }
212         else if (U < p4)
213           {                                            /* centre right     */
214             /*  immediate acceptance region
215                 R3 = [m, k4+1)*[0, f4), X = m, ... k4    */
216             if ((V = U - p3) < 0.0)  return (k4 - std::floor ((U - p2)/f4));
217             /* immediate acceptance region
218                R4 = [k4+1, k5+1)*[0, f5)                */
219             if ((W = V / dr) < f5 )  return (k5 - std::floor (V/f5));
220 
221             /* computation of candidate X > k4, and its counterpart Y < k4 */
222             /* either squeeze-acceptance of X or acceptance-rejection of Y */
223             Dk = std::floor (dr * rand_uniform<double> ()) + 1.0;
224             if (W <= f4 - Dk * (f4 - f4*r4))
225               {                                        /* quick accept of  */
226                 return (k4 + Dk);                      /* X = k4 + Dk      */
227               }
228             if ((V = f4 + f4 - W) < 1.0)
229               {                                        /* quick reject of Y*/
230                 Y = k4 - Dk;
231                 if (V <= f4 + Dk * (1.0 - f4)/ dr)
232                   {                                    /* quick accept of  */
233                     return (Y);                        /* Y = k4 - Dk      */
234                   }
235                 if (V <= f (Y, l_my, c_pm))  return (Y); /* final accept of Y*/
236               }
237             X = k4 + Dk;
238           }
239         else
240           {
241             W = rand_uniform<double> ();
242             if (U < p5)
243               {                                        /* expon. tail left */
244                 Dk = std::floor (1.0 - std::log (W)/ll);
245                 if ((X = k1 - Dk) < 0L)  continue;     /* 0 <= X <= k1 - 1 */
246                 W *= (U - p4) * ll;                    /* W -- U(0, h(x))  */
247                 if (W <= f1 - Dk * (f1 - f1/r1))
248                   return (X);                          /* quick accept of X*/
249               }
250             else
251               {                                        /* expon. tail right*/
252                 Dk = std::floor (1.0 - std::log (W)/lr);
253                 X  = k5 + Dk;                          /* X >= k5 + 1      */
254                 W *= (U - p5) * lr;                    /* W -- U(0, h(x))  */
255                 if (W <= f5 - Dk * (f5 - f5*r5))
256                   return (X);                          /* quick accept of X*/
257               }
258           }
259 
260         /* acceptance-rejection test of candidate X from the original area */
261         /* test, whether  W <= f(k),    with  W = U*h(x)  and  U -- U(0, 1)*/
262         /* log f(X) = (X - m)*log(my) - log X! + log m!                    */
263         if (std::log (W) <= X * l_my - flogfak (X) - c_pm)  return (X);
264       }
265   }
266   /* ---- pprsc.c end ------ */
267 
268   /* The remainder of the file is by Paul Kienzle */
269 
270   /* Table size is predicated on the maximum value of lambda
271    * we want to store in the table, and the maximum value of
272    * returned by the uniform random number generator on [0,1).
273    * With lambda==10 and u_max = 1 - 1/(2^32+1), we
274    * have poisson_pdf(lambda,36) < 1-u_max.  If instead our
275    * generator uses more bits of mantissa or returns a value
276    * in the range [0,1], then for lambda==10 we need a table
277    * size of 46 instead.  For long doubles, the table size
278    * will need to be longer still.  */
279 #define TABLESIZE 46
280 
281   /* Given uniform u, find x such that CDF(L,x)==u.  Return x. */
282 
283   template <typename T>
284   static void
poisson_cdf_lookup(double lambda,T * p,std::size_t n)285   poisson_cdf_lookup (double lambda, T *p, std::size_t n)
286   {
287     double t[TABLESIZE];
288 
289     /* Precompute the table for the u up to and including 0.458.
290      * We will almost certainly need it. */
291     int intlambda = static_cast<int> (std::floor (lambda));
292     double P;
293     int tableidx;
294     std::size_t i = n;
295 
296     t[0] = P = exp (-lambda);
297     for (tableidx = 1; tableidx <= intlambda; tableidx++)
298       {
299         P = P*lambda/static_cast<double> (tableidx);
300         t[tableidx] = t[tableidx-1] + P;
301       }
302 
303     while (i-- > 0)
304       {
305         double u = rand_uniform<double> ();
306 
307         /* If u > 0.458 we know we can jump to floor(lambda) before
308          * comparing (this observation is based on Stadlober's winrand
309          * code). For lambda >= 1, this will be a win.  Lambda < 1
310          * is already fast, so adding an extra comparison is not a
311          * problem. */
312         int k = (u > 0.458 ? intlambda : 0);
313 
314         /* We aren't using a for loop here because when we find the
315          * right k we want to jump to the next iteration of the
316          * outer loop, and the continue statement will only work for
317          * the inner loop. */
318       nextk:
319         if (u <= t[k])
320           {
321             p[i] = static_cast<T> (k);
322             continue;
323           }
324         if (++k < tableidx)
325           goto nextk;
326 
327         /* We only need high values of the table very rarely so we
328          * don't automatically compute the entire table. */
329         while (tableidx < TABLESIZE)
330           {
331             P = P*lambda/static_cast<double> (tableidx);
332             t[tableidx] = t[tableidx-1] + P;
333             /* Make sure we converge to 1.0 just in case u is uniform
334              * on [0,1] rather than [0,1). */
335             if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0;
336             tableidx++;
337             if (u <= t[tableidx-1]) break;
338           }
339 
340         /* We are assuming that the table size is big enough here.
341          * This should be true even if rand_uniform is returning values in
342          * the range [0,1] rather than [0,1). */
343         p[i] = static_cast<T> (tableidx-1);
344       }
345   }
346 
347   /* From Press, et al., Numerical Recipes */
348   template <typename T>
349   static void
poisson_rejection(double lambda,T * p,std::size_t n)350   poisson_rejection (double lambda, T *p, std::size_t n)
351   {
352     double sq = std::sqrt (2.0*lambda);
353     double alxm = std::log (lambda);
354     double g = lambda*alxm - xlgamma (lambda+1.0);
355     std::size_t i;
356 
357     for (i = 0; i < n; i++)
358       {
359         double y, em, t;
360         do
361           {
362             do
363               {
364                 y = tan (M_PI*rand_uniform<double> ());
365                 em = sq * y + lambda;
366               } while (em < 0.0);
367             em = std::floor (em);
368             t = 0.9*(1.0+y*y)*exp (em*alxm-flogfak (em)-g);
369           } while (rand_uniform<double> () > t);
370         p[i] = em;
371       }
372   }
373 
374   /* The cutoff of L <= 1e8 in the following two functions before using
375    * the normal approximation is based on:
376    *   > L=1e8; x=floor(linspace(0,2*L,1000));
377    *   > max(abs(normal_pdf(x,L,L)-poisson_pdf(x,L)))
378    *   ans = 1.1376e-28
379    * For L=1e7, the max is around 1e-9, which is within the step size of
380    * rand_uniform.  For L>1e10 the pprsc function breaks down, as I saw
381    * from the histogram of a large sample, so 1e8 is both small enough
382    * and large enough. */
383 
384   /* Generate a set of poisson numbers with the same distribution */
rand_poisson(T L_arg,octave_idx_type n,T * p)385   template <typename T> void rand_poisson (T L_arg, octave_idx_type n, T *p)
386   {
387     double L = L_arg;
388     octave_idx_type i;
389     if (L < 0.0 || lo_ieee_isinf (L))
390       {
391         for (i=0; i<n; i++)
392           p[i] = numeric_limits<T>::NaN ();
393       }
394     else if (L <= 10.0)
395       {
396         poisson_cdf_lookup<T> (L, p, n);
397       }
398     else if (L <= 1e8)
399       {
400         for (i=0; i<n; i++)
401           p[i] = pprsc (L);
402       }
403     else
404       {
405         /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
406         const double sqrtL = std::sqrt (L);
407         for (i = 0; i < n; i++)
408           {
409             p[i] = std::floor (rand_normal<T> () * sqrtL + L + 0.5);
410             if (p[i] < 0.0)
411               p[i] = 0.0; /* will probably never happen */
412           }
413       }
414   }
415 
416   template void rand_poisson<double> (double, octave_idx_type, double *);
417   template void rand_poisson<float> (float, octave_idx_type, float *);
418 
419   /* Generate one poisson variate */
rand_poisson(T L_arg)420   template <typename T> T rand_poisson (T L_arg)
421   {
422     double L = L_arg;
423     T ret;
424     if (L < 0.0) ret = numeric_limits<T>::NaN ();
425     else if (L <= 12.0)
426       {
427         /* From Press, et al. Numerical recipes */
428         double g = exp (-L);
429         int em = -1;
430         double t = 1.0;
431         do
432           {
433             ++em;
434             t *= rand_uniform<T> ();
435           } while (t > g);
436         ret = em;
437       }
438     else if (L <= 1e8)
439       {
440         /* numerical recipes */
441         poisson_rejection<T> (L, &ret, 1);
442       }
443     else if (lo_ieee_isinf (L))
444       {
445         /* FIXME: R uses NaN, but the normal approximation suggests that
446          * limit should be Inf.  Which is correct? */
447         ret = numeric_limits<T>::NaN ();
448       }
449     else
450       {
451         /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */
452         ret = std::floor (rand_normal<T> () * std::sqrt (L) + L + 0.5);
453         if (ret < 0.0) ret = 0.0; /* will probably never happen */
454       }
455     return ret;
456   }
457 
458   template double rand_poisson<double> (double);
459   template float rand_poisson<float> (float);
460 }
461