1 /*
2 
3 This software was written by Mike Giles, copyright University of Oxford,
4 and is provided under the terms of the GNU GPLv3 license:
5 http://www.gnu.org/licenses/gpl.html
6 
7 Commercial users who would like to use the software under a more
8 permissive license, such as BSD, should contact the author:
9 mike.giles@maths.ox.ac.uk
10 
11 
12 The code below requires the functions:
13 
14 erfc    defined in the GCC math library
15 erfcinv defined in the next header file
16 
17 Used in OpenTURNS with the written permission of the Author, see COPYING.poissinv.
18 
19 */
20 
21 #ifndef POISSINV_H
22 #define POISSINV_H
23 
24 #ifdef __cplusplus
25 extern "C" {
26 #endif
27 
28 /* Replace the initial implementation of erfinv by some OpenTURNS equivalent */
29 // #include <erfinv.h>
30 #include "openturns/DistFunc.hxx"
31 
32 
33 //
34 // This double precision function computes the inverse
35 // of the Poisson CDF using the vector algorithm
36 //
37 // u   = CDF value in range (0,1)
38 // lam = Poisson rate
39 //
40 // For lam < 1e15,  max |error| no more than 1
41 //  ave |error| < 1e-16*max(4,lam) for lam < 1e9
42 //              < 1e-6             for lam < 1e15
43 //
44 // For lam > 1e15, the errors will be about 1 ulp.
45 //
46 
poissinv_vector(double U,double Lam)47 inline double poissinv_vector(double U, double Lam) {
48   int    i;
49   double X=0.0, Xi, T, Del, Rm, R, R2, S, S2, Eta, B0, B1;
50 
51 // large lam
52 
53   if (Lam > 4.0) {
54 
55     S   = OT::DistFunc::qNormal(U)/sqrt(Lam);
56     //    S   = - sqrt(2.0)*erfcinv(2.0*U)/sqrt(Lam);
57 
58     Del = 2.0e-6;
59 
60 // use polynomial approximations in central region
61 
62     if ( (S>-0.6833501) && (S<1.777993) ) {;
63 
64 //  polynomial approximation to f^{-1}(s) - 1
65 
66       Rm =  2.82298751e-07;
67       Rm = -2.58136133e-06 + Rm*S;
68       Rm =  1.02118025e-05 + Rm*S;
69       Rm = -2.37996199e-05 + Rm*S;
70       Rm =  4.05347462e-05 + Rm*S;
71       Rm = -6.63730967e-05 + Rm*S;
72       Rm =  0.000124762566 + Rm*S;
73       Rm = -0.000256970731 + Rm*S;
74       Rm =  0.000558953132 + Rm*S;
75       Rm =  -0.00133129194 + Rm*S;
76       Rm =   0.00370367937 + Rm*S;
77       Rm =   -0.0138888706 + Rm*S;
78       Rm =     0.166666667 + Rm*S;
79       S +=                S*(Rm*S);
80       Rm = S;
81 
82 //  polynomial approximation to correction c0(r)
83 
84       T  =   1.86386867e-05;
85       T  =  -0.000207319499 + T*Rm;
86       T  =     0.0009689451 + T*Rm;
87       T  =   -0.00247340054 + T*Rm;
88       T  =    0.00379952985 + T*Rm;
89       T  =   -0.00386717047 + T*Rm;
90       T  =    0.00346960934 + T*Rm;
91       T  =   -0.00414125511 + T*Rm;
92       T  =    0.00586752093 + T*Rm;
93       T  =   -0.00838583787 + T*Rm;
94       T  =     0.0132793933 + T*Rm;
95       T  =     -0.027775536 + T*Rm;
96       T  =      0.333333333 + T*Rm;
97 
98 //  O(1/lam) correction
99 
100       X  =   -0.00014585224;
101       X  =    0.00146121529 + X*Rm;
102       X  =   -0.00610328845 + X*Rm;
103       X  =     0.0138117964 + X*Rm;
104       X  =    -0.0186988746 + X*Rm;
105       X  =     0.0168155118 + X*Rm;
106       X  =     -0.013394797 + X*Rm;
107       X  =     0.0135698573 + X*Rm;
108       X  =    -0.0155377333 + X*Rm;
109       X  =     0.0174065334 + X*Rm;
110       X  =    -0.0198011178 + X*Rm;
111       X  =  X / Lam;
112 
113 //    sum from smallest to largest to minimise rounding error
114 //    rounding down final sum is important
115 
116       S = Lam + (((X+Del)+T)+Lam*S);
117     }
118 
119 // otherwise use Newton iteration
120 
121     else if (S > -sqrt(2.0)) {
122 
123       R = 1.0 + S;
124       if (R<0.1) R = 0.1;
125 
126       do {
127         T  = log(R);
128         R2 = R;
129         S2 = sqrt(2.0*(1.0 - R + R*T));
130         if (R<1.0) S2 = -S2;
131         R = R2 - (S2-S)*S2/T;
132         if (R<0.1*R2) R = 0.1*R2;
133       } while (fabs(R-R2)>1e-5);
134 
135       T   = log(R);
136       S   = Lam*R + log(sqrt(2.0*R*(1.0-R+R*T))/fabs(R-1.0)) / T;
137       S   = S - (8.2/405.0)/(S+0.025*Lam);
138       Del = 0.01/S;
139       S   = S + Del;
140     }
141 
142 // if x>10, round down to nearest integer, and check accuracy
143 
144     X = floor(S);
145 
146     if (S>10.0 && S<X+2.0*Del) {
147 
148 // correction procedure based on Temme approximation (double precision)
149 
150       if (X>0.5*Lam && X<2.0*Lam) {
151 
152         Xi = 1.0 / X;
153         Eta = X / Lam;
154         Eta = sqrt(2.0*(1.0-Eta+Eta*log(Eta))/Eta);
155         if (X>Lam) Eta = -Eta;
156 
157         B1 =  8.0995211567045583e-16;              S = B1;
158         B0 = -1.9752288294349411e-15;              S = B0 + S*Eta;
159         B1 = -5.1391118342426808e-16 + 25.0*B1*Xi; S = B1 + S*Eta;
160         B0 =  2.8534893807047458e-14 + 24.0*B0*Xi; S = B0 + S*Eta;
161         B1 = -1.3923887224181616e-13 + 23.0*B1*Xi; S = B1 + S*Eta;
162         B0 =  3.3717632624009806e-13 + 22.0*B0*Xi; S = B0 + S*Eta;
163         B1 =  1.1004392031956284e-13 + 21.0*B1*Xi; S = B1 + S*Eta;
164         B0 = -5.0276692801141763e-12 + 20.0*B0*Xi; S = B0 + S*Eta;
165         B1 =  2.4361948020667402e-11 + 19.0*B1*Xi; S = B1 + S*Eta;
166         B0 = -5.8307721325504166e-11 + 18.0*B0*Xi; S = B0 + S*Eta;
167         B1 = -2.5514193994946487e-11 + 17.0*B1*Xi; S = B1 + S*Eta;
168         B0 =  9.1476995822367933e-10 + 16.0*B0*Xi; S = B0 + S*Eta;
169         B1 = -4.3820360184533521e-09 + 15.0*B1*Xi; S = B1 + S*Eta;
170         B0 =  1.0261809784240299e-08 + 14.0*B0*Xi; S = B0 + S*Eta;
171         B1 =  6.7078535434015332e-09 + 13.0*B1*Xi; S = B1 + S*Eta;
172         B0 = -1.7665952736826086e-07 + 12.0*B0*Xi; S = B0 + S*Eta;
173         B1 =  8.2967113409530833e-07 + 11.0*B1*Xi; S = B1 + S*Eta;
174         B0 = -1.8540622107151585e-06 + 10.0*B0*Xi; S = B0 + S*Eta;
175         B1 = -2.1854485106799979e-06 +  9.0*B1*Xi; S = B1 + S*Eta;
176         B0 =  3.9192631785224383e-05 +  8.0*B0*Xi; S = B0 + S*Eta;
177         B1 = -0.00017875514403292177 +  7.0*B1*Xi; S = B1 + S*Eta;
178         B0 =  0.00035273368606701921 +  6.0*B0*Xi; S = B0 + S*Eta;
179         B1 =   0.0011574074074074078 +  5.0*B1*Xi; S = B1 + S*Eta;
180         B0 =   -0.014814814814814815 +  4.0*B0*Xi; S = B0 + S*Eta;
181         B1 =    0.083333333333333329 +  3.0*B1*Xi; S = B1 + S*Eta;
182         B0 =    -0.33333333333333331 +  2.0*B0*Xi; S = B0 + S*Eta;
183         S  = S / (1.0 + B1*Xi);
184 
185         S = S*exp(-0.5*X*Eta*Eta)/sqrt(2.0*3.141592653589793*X);
186         if (X<Lam) {
187           S += 0.5*erfc(Eta*sqrt(0.5*X));
188           if (S > U) X -= 1.0;
189         }
190         else {
191           S -= 0.5*erfc(-Eta*sqrt(0.5*X));
192           if (S > U-1.0) X -= 1.0;
193         }
194       }
195 
196 // sum downwards or upwards
197 
198       else {
199         Xi = 1.0 / X;
200         S = - (691.0/360360.0);
201         S =   (1.0/1188.0) + S*Xi*Xi;
202         S = - (1.0/1680.0) + S*Xi*Xi;
203         S =   (1.0/1260.0) + S*Xi*Xi;
204         S = - (1.0/360.0)  + S*Xi*Xi;
205         S =   (1.0/12.0)   + S*Xi*Xi;
206         S =                  S*Xi;
207         S = (X - Lam) - X*log(X/Lam) - S;
208 
209         if (X<Lam) {
210           T  = exp(-0.5*S);
211           S  = 1.0 - T*(U*T) * sqrt(2.0*3.141592653589793*Xi) * Lam;
212           T  = 1.0;
213           Xi = X;
214           for (i=1; i<50; i++) {
215             Xi -= 1.0;
216             T  *= Xi/Lam;
217             S  += T;
218           }
219           if (S > 0.0) X -= 1.0;
220         }
221 
222         else {
223           T  = exp(-0.5*S);
224           S  = 1.0 - T*((1.0-U)*T) * sqrt(2.0*3.141592653589793*X);
225           Xi = X;
226           for (i=0; i<50; i++) {
227             Xi += 1.0;
228             S   = S*Xi/Lam + 1.0;
229           }
230           if (S < 0.0) X -= 1.0;
231         }
232       }
233     }
234   }
235 
236 // bottom-up summation
237 
238   if (X<10.0) {
239     X   = 0.0;
240     T   = exp(0.5*Lam);
241     Del = 0.0;
242     if (U>0.5) Del = T*(1e-13*T);
243     S   = 1.0 - T*(U*T) + Del;
244 
245     while (S<0.0) {
246       X  += 1.0;
247       T   = X/Lam;
248       Del = T*Del;
249       S   = T*S + 1.0;
250     }
251 
252 // top-down summation if needed
253 
254     if (S < 2.0*Del) {
255       Del = 1e13*Del;
256       T   = 1e15*Del;
257 
258       while (Del<T) {
259         X   += 1.0;
260         Del *= X/Lam;
261       }
262 
263       S = (1.0-U)*Del;
264       T = 1.0;
265       while (S>0.0) {
266         T *= X/Lam;
267         S -= T;
268         X -= 1.0;
269       }
270     }
271   }
272 
273   return X;
274 }
275 
276 //
277 // This double precision function computes the inverse
278 // of the Poisson CDF using the scalar algorithm
279 //
280 // u   = CDF value in range (0,1)
281 // lam = Poisson rate
282 //
283 // For lam < 1e15,  max |error| no more than 1
284 //  ave |error| < 1e-16*max(4,lam) for lam < 1e9
285 //              < 1e-6             for lam < 1e15
286 //
287 // For lam > 1e15, the errors will be about 1 ulp.
288 //
289 
poissinv_scalar(double U,double Lam)290 inline double poissinv_scalar(double U, double Lam) {
291   int    i;
292   double X=0.0, Xi, W, T, Del, R, R2, S, S2, Eta, B0, B1;
293 
294 // large lam
295 
296   if (Lam > 4.0) {
297 
298     W = OT::DistFunc::qNormal(U);
299     //    W = - sqrt(2.0)*erfcinv(2.0*U);
300 
301 // use polynomial approximations in central region
302 
303     if ( fabs(W)<3.0 ) {;
304       double Lam_root = sqrt(Lam);
305 
306       S = Lam_root*W + (1.0/3.0 + (1.0/6.0)*W*W)*(1.0 - W/(12.0*Lam_root));
307 
308       Del = (1.0 /160.0);
309       Del = (1.0 / 80.0) + Del*(W*W);
310       Del = (1.0 / 40.0) + Del*(W*W);
311       Del = Del / Lam;
312 
313       S = Lam + (S + Del);
314     }
315 
316 // otherwise use Newton iteration
317 
318     else {
319       S = W / sqrt(Lam);
320       R = 1.0 + S;
321       if (R<0.1) R = 0.1;
322 
323       do {
324         T  = log(R);
325         R2 = R;
326         S2 = sqrt(2.0*((1.0-R) + R*T));
327         if (R<1.0) S2 = -S2;
328         R = R2 - (S2-S)*S2/T;
329         if (R<0.1*R2) R = 0.1*R2;
330       } while (fabs(R-R2)>1e-8);
331 
332       T   = log(R);
333       S   = Lam*R + log(sqrt(2.0*R*((1.0-R)+R*T))/fabs(R-1.0)) / T;
334       S   = S - 0.0218/(S+0.065*Lam);
335       Del = 0.01/S;
336       S   = S + Del;
337     }
338 
339 // if x>10, round down to nearest integer, and check accuracy
340 
341     X = floor(S);
342 
343     if (S>10.0 && S<X+2.0*Del) {
344 
345 // correction procedure based on Temme approximation (double precision)
346 
347       if (X>0.5*Lam && X<2.0*Lam) {
348 
349         Xi = 1.0 / X;
350         Eta = X / Lam;
351         Eta = sqrt(2.0*(1.0-Eta+Eta*log(Eta))/Eta);
352         if (X>Lam) Eta = -Eta;
353 
354         B1 =  8.0995211567045583e-16;              S = B1;
355         B0 = -1.9752288294349411e-15;              S = B0 + S*Eta;
356         B1 = -5.1391118342426808e-16 + 25.0*B1*Xi; S = B1 + S*Eta;
357         B0 =  2.8534893807047458e-14 + 24.0*B0*Xi; S = B0 + S*Eta;
358         B1 = -1.3923887224181616e-13 + 23.0*B1*Xi; S = B1 + S*Eta;
359         B0 =  3.3717632624009806e-13 + 22.0*B0*Xi; S = B0 + S*Eta;
360         B1 =  1.1004392031956284e-13 + 21.0*B1*Xi; S = B1 + S*Eta;
361         B0 = -5.0276692801141763e-12 + 20.0*B0*Xi; S = B0 + S*Eta;
362         B1 =  2.4361948020667402e-11 + 19.0*B1*Xi; S = B1 + S*Eta;
363         B0 = -5.8307721325504166e-11 + 18.0*B0*Xi; S = B0 + S*Eta;
364         B1 = -2.5514193994946487e-11 + 17.0*B1*Xi; S = B1 + S*Eta;
365         B0 =  9.1476995822367933e-10 + 16.0*B0*Xi; S = B0 + S*Eta;
366         B1 = -4.3820360184533521e-09 + 15.0*B1*Xi; S = B1 + S*Eta;
367         B0 =  1.0261809784240299e-08 + 14.0*B0*Xi; S = B0 + S*Eta;
368         B1 =  6.7078535434015332e-09 + 13.0*B1*Xi; S = B1 + S*Eta;
369         B0 = -1.7665952736826086e-07 + 12.0*B0*Xi; S = B0 + S*Eta;
370         B1 =  8.2967113409530833e-07 + 11.0*B1*Xi; S = B1 + S*Eta;
371         B0 = -1.8540622107151585e-06 + 10.0*B0*Xi; S = B0 + S*Eta;
372         B1 = -2.1854485106799979e-06 +  9.0*B1*Xi; S = B1 + S*Eta;
373         B0 =  3.9192631785224383e-05 +  8.0*B0*Xi; S = B0 + S*Eta;
374         B1 = -0.00017875514403292177 +  7.0*B1*Xi; S = B1 + S*Eta;
375         B0 =  0.00035273368606701921 +  6.0*B0*Xi; S = B0 + S*Eta;
376         B1 =   0.0011574074074074078 +  5.0*B1*Xi; S = B1 + S*Eta;
377         B0 =   -0.014814814814814815 +  4.0*B0*Xi; S = B0 + S*Eta;
378         B1 =    0.083333333333333329 +  3.0*B1*Xi; S = B1 + S*Eta;
379         B0 =    -0.33333333333333331 +  2.0*B0*Xi; S = B0 + S*Eta;
380         S  = S / (1.0 + B1*Xi);
381 
382         S = S*exp(-0.5*X*Eta*Eta)/sqrt(2.0*3.141592653589793*X);
383         if (X<Lam) {
384           S += 0.5*erfc(Eta*sqrt(0.5*X));
385           if (S > U) X -= 1.0;
386         }
387         else {
388           S -= 0.5*erfc(-Eta*sqrt(0.5*X));
389           if (S > U-1.0) X -= 1.0;
390         }
391       }
392 
393 // sum downwards or upwards
394 
395       else {
396         Xi = 1.0 / X;
397         S = - (691.0/360360.0);
398         S =   (1.0/1188.0) + S*Xi*Xi;
399         S = - (1.0/1680.0) + S*Xi*Xi;
400         S =   (1.0/1260.0) + S*Xi*Xi;
401         S = - (1.0/360.0)  + S*Xi*Xi;
402         S =   (1.0/12.0)   + S*Xi*Xi;
403         S =                  S*Xi;
404         S = (X - Lam) - X*log(X/Lam) - S;
405 
406         if (X<Lam) {
407           T  = exp(-0.5*S);
408           S  = 1.0 - T*(U*T) * sqrt(2.0*3.141592653589793*Xi) * Lam;
409           T  = 1.0;
410           Xi = X;
411           for (i=1; i<50; i++) {
412             Xi -= 1.0;
413             T  *= Xi/Lam;
414             S  += T;
415           }
416           if (S > 0.0) X -= 1.0;
417         }
418 
419         else {
420           T  = exp(-0.5*S);
421           S  = 1.0 - T*((1.0-U)*T) * sqrt(2.0*3.141592653589793*X);
422           Xi = X;
423           for (i=0; i<50; i++) {
424             Xi += 1.0;
425             S   = S*Xi/Lam + 1.0;
426           }
427           if (S < 0.0) X -= 1.0;
428         }
429       }
430     }
431   }
432 
433 // bottom-up summation
434 
435   if (X<10.0) {
436     X   = 0.0;
437     T   = exp(0.5*Lam);
438     Del = 0.0;
439     if (U>0.5) Del = T*(1e-13*T);
440     S   = 1.0 - T*(U*T) + Del;
441 
442     while (S<0.0) {
443       X  += 1.0;
444       T   = X/Lam;
445       Del = T*Del;
446       S   = T*S + 1.0;
447     }
448 
449 // top-down summation if needed
450 
451     if (S < 2.0*Del) {
452       Del = 1e13*Del;
453       T   = 1e15*Del;
454 
455       while (Del<T) {
456         X   += 1.0;
457         Del *= X/Lam;
458       }
459 
460       S = (1.0-U)*Del;
461       T = 1.0;
462       while (S>0.0) {
463         T *= X/Lam;
464         S -= T;
465         X -= 1.0;
466       }
467     }
468   }
469 
470   return X;
471 }
472 
473 #ifdef __cplusplus
474 }
475 #endif
476 
477 #endif
478