1 /*
2
3 This software was written by Mike Giles, copyright University of Oxford,
4 and is provided under the terms of the GNU GPLv3 license:
5 http://www.gnu.org/licenses/gpl.html
6
7 Commercial users who would like to use the software under a more
8 permissive license, such as BSD, should contact the author:
9 mike.giles@maths.ox.ac.uk
10
11
12 The code below requires the functions:
13
14 erfc defined in the GCC math library
15 erfcinv defined in the next header file
16
17 Used in OpenTURNS with the written permission of the Author, see COPYING.poissinv.
18
19 */
20
21 #ifndef POISSINV_H
22 #define POISSINV_H
23
24 #ifdef __cplusplus
25 extern "C" {
26 #endif
27
28 /* Replace the initial implementation of erfinv by some OpenTURNS equivalent */
29 // #include <erfinv.h>
30 #include "openturns/DistFunc.hxx"
31
32
33 //
34 // This double precision function computes the inverse
35 // of the Poisson CDF using the vector algorithm
36 //
37 // u = CDF value in range (0,1)
38 // lam = Poisson rate
39 //
40 // For lam < 1e15, max |error| no more than 1
41 // ave |error| < 1e-16*max(4,lam) for lam < 1e9
42 // < 1e-6 for lam < 1e15
43 //
44 // For lam > 1e15, the errors will be about 1 ulp.
45 //
46
poissinv_vector(double U,double Lam)47 inline double poissinv_vector(double U, double Lam) {
48 int i;
49 double X=0.0, Xi, T, Del, Rm, R, R2, S, S2, Eta, B0, B1;
50
51 // large lam
52
53 if (Lam > 4.0) {
54
55 S = OT::DistFunc::qNormal(U)/sqrt(Lam);
56 // S = - sqrt(2.0)*erfcinv(2.0*U)/sqrt(Lam);
57
58 Del = 2.0e-6;
59
60 // use polynomial approximations in central region
61
62 if ( (S>-0.6833501) && (S<1.777993) ) {;
63
64 // polynomial approximation to f^{-1}(s) - 1
65
66 Rm = 2.82298751e-07;
67 Rm = -2.58136133e-06 + Rm*S;
68 Rm = 1.02118025e-05 + Rm*S;
69 Rm = -2.37996199e-05 + Rm*S;
70 Rm = 4.05347462e-05 + Rm*S;
71 Rm = -6.63730967e-05 + Rm*S;
72 Rm = 0.000124762566 + Rm*S;
73 Rm = -0.000256970731 + Rm*S;
74 Rm = 0.000558953132 + Rm*S;
75 Rm = -0.00133129194 + Rm*S;
76 Rm = 0.00370367937 + Rm*S;
77 Rm = -0.0138888706 + Rm*S;
78 Rm = 0.166666667 + Rm*S;
79 S += S*(Rm*S);
80 Rm = S;
81
82 // polynomial approximation to correction c0(r)
83
84 T = 1.86386867e-05;
85 T = -0.000207319499 + T*Rm;
86 T = 0.0009689451 + T*Rm;
87 T = -0.00247340054 + T*Rm;
88 T = 0.00379952985 + T*Rm;
89 T = -0.00386717047 + T*Rm;
90 T = 0.00346960934 + T*Rm;
91 T = -0.00414125511 + T*Rm;
92 T = 0.00586752093 + T*Rm;
93 T = -0.00838583787 + T*Rm;
94 T = 0.0132793933 + T*Rm;
95 T = -0.027775536 + T*Rm;
96 T = 0.333333333 + T*Rm;
97
98 // O(1/lam) correction
99
100 X = -0.00014585224;
101 X = 0.00146121529 + X*Rm;
102 X = -0.00610328845 + X*Rm;
103 X = 0.0138117964 + X*Rm;
104 X = -0.0186988746 + X*Rm;
105 X = 0.0168155118 + X*Rm;
106 X = -0.013394797 + X*Rm;
107 X = 0.0135698573 + X*Rm;
108 X = -0.0155377333 + X*Rm;
109 X = 0.0174065334 + X*Rm;
110 X = -0.0198011178 + X*Rm;
111 X = X / Lam;
112
113 // sum from smallest to largest to minimise rounding error
114 // rounding down final sum is important
115
116 S = Lam + (((X+Del)+T)+Lam*S);
117 }
118
119 // otherwise use Newton iteration
120
121 else if (S > -sqrt(2.0)) {
122
123 R = 1.0 + S;
124 if (R<0.1) R = 0.1;
125
126 do {
127 T = log(R);
128 R2 = R;
129 S2 = sqrt(2.0*(1.0 - R + R*T));
130 if (R<1.0) S2 = -S2;
131 R = R2 - (S2-S)*S2/T;
132 if (R<0.1*R2) R = 0.1*R2;
133 } while (fabs(R-R2)>1e-5);
134
135 T = log(R);
136 S = Lam*R + log(sqrt(2.0*R*(1.0-R+R*T))/fabs(R-1.0)) / T;
137 S = S - (8.2/405.0)/(S+0.025*Lam);
138 Del = 0.01/S;
139 S = S + Del;
140 }
141
142 // if x>10, round down to nearest integer, and check accuracy
143
144 X = floor(S);
145
146 if (S>10.0 && S<X+2.0*Del) {
147
148 // correction procedure based on Temme approximation (double precision)
149
150 if (X>0.5*Lam && X<2.0*Lam) {
151
152 Xi = 1.0 / X;
153 Eta = X / Lam;
154 Eta = sqrt(2.0*(1.0-Eta+Eta*log(Eta))/Eta);
155 if (X>Lam) Eta = -Eta;
156
157 B1 = 8.0995211567045583e-16; S = B1;
158 B0 = -1.9752288294349411e-15; S = B0 + S*Eta;
159 B1 = -5.1391118342426808e-16 + 25.0*B1*Xi; S = B1 + S*Eta;
160 B0 = 2.8534893807047458e-14 + 24.0*B0*Xi; S = B0 + S*Eta;
161 B1 = -1.3923887224181616e-13 + 23.0*B1*Xi; S = B1 + S*Eta;
162 B0 = 3.3717632624009806e-13 + 22.0*B0*Xi; S = B0 + S*Eta;
163 B1 = 1.1004392031956284e-13 + 21.0*B1*Xi; S = B1 + S*Eta;
164 B0 = -5.0276692801141763e-12 + 20.0*B0*Xi; S = B0 + S*Eta;
165 B1 = 2.4361948020667402e-11 + 19.0*B1*Xi; S = B1 + S*Eta;
166 B0 = -5.8307721325504166e-11 + 18.0*B0*Xi; S = B0 + S*Eta;
167 B1 = -2.5514193994946487e-11 + 17.0*B1*Xi; S = B1 + S*Eta;
168 B0 = 9.1476995822367933e-10 + 16.0*B0*Xi; S = B0 + S*Eta;
169 B1 = -4.3820360184533521e-09 + 15.0*B1*Xi; S = B1 + S*Eta;
170 B0 = 1.0261809784240299e-08 + 14.0*B0*Xi; S = B0 + S*Eta;
171 B1 = 6.7078535434015332e-09 + 13.0*B1*Xi; S = B1 + S*Eta;
172 B0 = -1.7665952736826086e-07 + 12.0*B0*Xi; S = B0 + S*Eta;
173 B1 = 8.2967113409530833e-07 + 11.0*B1*Xi; S = B1 + S*Eta;
174 B0 = -1.8540622107151585e-06 + 10.0*B0*Xi; S = B0 + S*Eta;
175 B1 = -2.1854485106799979e-06 + 9.0*B1*Xi; S = B1 + S*Eta;
176 B0 = 3.9192631785224383e-05 + 8.0*B0*Xi; S = B0 + S*Eta;
177 B1 = -0.00017875514403292177 + 7.0*B1*Xi; S = B1 + S*Eta;
178 B0 = 0.00035273368606701921 + 6.0*B0*Xi; S = B0 + S*Eta;
179 B1 = 0.0011574074074074078 + 5.0*B1*Xi; S = B1 + S*Eta;
180 B0 = -0.014814814814814815 + 4.0*B0*Xi; S = B0 + S*Eta;
181 B1 = 0.083333333333333329 + 3.0*B1*Xi; S = B1 + S*Eta;
182 B0 = -0.33333333333333331 + 2.0*B0*Xi; S = B0 + S*Eta;
183 S = S / (1.0 + B1*Xi);
184
185 S = S*exp(-0.5*X*Eta*Eta)/sqrt(2.0*3.141592653589793*X);
186 if (X<Lam) {
187 S += 0.5*erfc(Eta*sqrt(0.5*X));
188 if (S > U) X -= 1.0;
189 }
190 else {
191 S -= 0.5*erfc(-Eta*sqrt(0.5*X));
192 if (S > U-1.0) X -= 1.0;
193 }
194 }
195
196 // sum downwards or upwards
197
198 else {
199 Xi = 1.0 / X;
200 S = - (691.0/360360.0);
201 S = (1.0/1188.0) + S*Xi*Xi;
202 S = - (1.0/1680.0) + S*Xi*Xi;
203 S = (1.0/1260.0) + S*Xi*Xi;
204 S = - (1.0/360.0) + S*Xi*Xi;
205 S = (1.0/12.0) + S*Xi*Xi;
206 S = S*Xi;
207 S = (X - Lam) - X*log(X/Lam) - S;
208
209 if (X<Lam) {
210 T = exp(-0.5*S);
211 S = 1.0 - T*(U*T) * sqrt(2.0*3.141592653589793*Xi) * Lam;
212 T = 1.0;
213 Xi = X;
214 for (i=1; i<50; i++) {
215 Xi -= 1.0;
216 T *= Xi/Lam;
217 S += T;
218 }
219 if (S > 0.0) X -= 1.0;
220 }
221
222 else {
223 T = exp(-0.5*S);
224 S = 1.0 - T*((1.0-U)*T) * sqrt(2.0*3.141592653589793*X);
225 Xi = X;
226 for (i=0; i<50; i++) {
227 Xi += 1.0;
228 S = S*Xi/Lam + 1.0;
229 }
230 if (S < 0.0) X -= 1.0;
231 }
232 }
233 }
234 }
235
236 // bottom-up summation
237
238 if (X<10.0) {
239 X = 0.0;
240 T = exp(0.5*Lam);
241 Del = 0.0;
242 if (U>0.5) Del = T*(1e-13*T);
243 S = 1.0 - T*(U*T) + Del;
244
245 while (S<0.0) {
246 X += 1.0;
247 T = X/Lam;
248 Del = T*Del;
249 S = T*S + 1.0;
250 }
251
252 // top-down summation if needed
253
254 if (S < 2.0*Del) {
255 Del = 1e13*Del;
256 T = 1e15*Del;
257
258 while (Del<T) {
259 X += 1.0;
260 Del *= X/Lam;
261 }
262
263 S = (1.0-U)*Del;
264 T = 1.0;
265 while (S>0.0) {
266 T *= X/Lam;
267 S -= T;
268 X -= 1.0;
269 }
270 }
271 }
272
273 return X;
274 }
275
276 //
277 // This double precision function computes the inverse
278 // of the Poisson CDF using the scalar algorithm
279 //
280 // u = CDF value in range (0,1)
281 // lam = Poisson rate
282 //
283 // For lam < 1e15, max |error| no more than 1
284 // ave |error| < 1e-16*max(4,lam) for lam < 1e9
285 // < 1e-6 for lam < 1e15
286 //
287 // For lam > 1e15, the errors will be about 1 ulp.
288 //
289
poissinv_scalar(double U,double Lam)290 inline double poissinv_scalar(double U, double Lam) {
291 int i;
292 double X=0.0, Xi, W, T, Del, R, R2, S, S2, Eta, B0, B1;
293
294 // large lam
295
296 if (Lam > 4.0) {
297
298 W = OT::DistFunc::qNormal(U);
299 // W = - sqrt(2.0)*erfcinv(2.0*U);
300
301 // use polynomial approximations in central region
302
303 if ( fabs(W)<3.0 ) {;
304 double Lam_root = sqrt(Lam);
305
306 S = Lam_root*W + (1.0/3.0 + (1.0/6.0)*W*W)*(1.0 - W/(12.0*Lam_root));
307
308 Del = (1.0 /160.0);
309 Del = (1.0 / 80.0) + Del*(W*W);
310 Del = (1.0 / 40.0) + Del*(W*W);
311 Del = Del / Lam;
312
313 S = Lam + (S + Del);
314 }
315
316 // otherwise use Newton iteration
317
318 else {
319 S = W / sqrt(Lam);
320 R = 1.0 + S;
321 if (R<0.1) R = 0.1;
322
323 do {
324 T = log(R);
325 R2 = R;
326 S2 = sqrt(2.0*((1.0-R) + R*T));
327 if (R<1.0) S2 = -S2;
328 R = R2 - (S2-S)*S2/T;
329 if (R<0.1*R2) R = 0.1*R2;
330 } while (fabs(R-R2)>1e-8);
331
332 T = log(R);
333 S = Lam*R + log(sqrt(2.0*R*((1.0-R)+R*T))/fabs(R-1.0)) / T;
334 S = S - 0.0218/(S+0.065*Lam);
335 Del = 0.01/S;
336 S = S + Del;
337 }
338
339 // if x>10, round down to nearest integer, and check accuracy
340
341 X = floor(S);
342
343 if (S>10.0 && S<X+2.0*Del) {
344
345 // correction procedure based on Temme approximation (double precision)
346
347 if (X>0.5*Lam && X<2.0*Lam) {
348
349 Xi = 1.0 / X;
350 Eta = X / Lam;
351 Eta = sqrt(2.0*(1.0-Eta+Eta*log(Eta))/Eta);
352 if (X>Lam) Eta = -Eta;
353
354 B1 = 8.0995211567045583e-16; S = B1;
355 B0 = -1.9752288294349411e-15; S = B0 + S*Eta;
356 B1 = -5.1391118342426808e-16 + 25.0*B1*Xi; S = B1 + S*Eta;
357 B0 = 2.8534893807047458e-14 + 24.0*B0*Xi; S = B0 + S*Eta;
358 B1 = -1.3923887224181616e-13 + 23.0*B1*Xi; S = B1 + S*Eta;
359 B0 = 3.3717632624009806e-13 + 22.0*B0*Xi; S = B0 + S*Eta;
360 B1 = 1.1004392031956284e-13 + 21.0*B1*Xi; S = B1 + S*Eta;
361 B0 = -5.0276692801141763e-12 + 20.0*B0*Xi; S = B0 + S*Eta;
362 B1 = 2.4361948020667402e-11 + 19.0*B1*Xi; S = B1 + S*Eta;
363 B0 = -5.8307721325504166e-11 + 18.0*B0*Xi; S = B0 + S*Eta;
364 B1 = -2.5514193994946487e-11 + 17.0*B1*Xi; S = B1 + S*Eta;
365 B0 = 9.1476995822367933e-10 + 16.0*B0*Xi; S = B0 + S*Eta;
366 B1 = -4.3820360184533521e-09 + 15.0*B1*Xi; S = B1 + S*Eta;
367 B0 = 1.0261809784240299e-08 + 14.0*B0*Xi; S = B0 + S*Eta;
368 B1 = 6.7078535434015332e-09 + 13.0*B1*Xi; S = B1 + S*Eta;
369 B0 = -1.7665952736826086e-07 + 12.0*B0*Xi; S = B0 + S*Eta;
370 B1 = 8.2967113409530833e-07 + 11.0*B1*Xi; S = B1 + S*Eta;
371 B0 = -1.8540622107151585e-06 + 10.0*B0*Xi; S = B0 + S*Eta;
372 B1 = -2.1854485106799979e-06 + 9.0*B1*Xi; S = B1 + S*Eta;
373 B0 = 3.9192631785224383e-05 + 8.0*B0*Xi; S = B0 + S*Eta;
374 B1 = -0.00017875514403292177 + 7.0*B1*Xi; S = B1 + S*Eta;
375 B0 = 0.00035273368606701921 + 6.0*B0*Xi; S = B0 + S*Eta;
376 B1 = 0.0011574074074074078 + 5.0*B1*Xi; S = B1 + S*Eta;
377 B0 = -0.014814814814814815 + 4.0*B0*Xi; S = B0 + S*Eta;
378 B1 = 0.083333333333333329 + 3.0*B1*Xi; S = B1 + S*Eta;
379 B0 = -0.33333333333333331 + 2.0*B0*Xi; S = B0 + S*Eta;
380 S = S / (1.0 + B1*Xi);
381
382 S = S*exp(-0.5*X*Eta*Eta)/sqrt(2.0*3.141592653589793*X);
383 if (X<Lam) {
384 S += 0.5*erfc(Eta*sqrt(0.5*X));
385 if (S > U) X -= 1.0;
386 }
387 else {
388 S -= 0.5*erfc(-Eta*sqrt(0.5*X));
389 if (S > U-1.0) X -= 1.0;
390 }
391 }
392
393 // sum downwards or upwards
394
395 else {
396 Xi = 1.0 / X;
397 S = - (691.0/360360.0);
398 S = (1.0/1188.0) + S*Xi*Xi;
399 S = - (1.0/1680.0) + S*Xi*Xi;
400 S = (1.0/1260.0) + S*Xi*Xi;
401 S = - (1.0/360.0) + S*Xi*Xi;
402 S = (1.0/12.0) + S*Xi*Xi;
403 S = S*Xi;
404 S = (X - Lam) - X*log(X/Lam) - S;
405
406 if (X<Lam) {
407 T = exp(-0.5*S);
408 S = 1.0 - T*(U*T) * sqrt(2.0*3.141592653589793*Xi) * Lam;
409 T = 1.0;
410 Xi = X;
411 for (i=1; i<50; i++) {
412 Xi -= 1.0;
413 T *= Xi/Lam;
414 S += T;
415 }
416 if (S > 0.0) X -= 1.0;
417 }
418
419 else {
420 T = exp(-0.5*S);
421 S = 1.0 - T*((1.0-U)*T) * sqrt(2.0*3.141592653589793*X);
422 Xi = X;
423 for (i=0; i<50; i++) {
424 Xi += 1.0;
425 S = S*Xi/Lam + 1.0;
426 }
427 if (S < 0.0) X -= 1.0;
428 }
429 }
430 }
431 }
432
433 // bottom-up summation
434
435 if (X<10.0) {
436 X = 0.0;
437 T = exp(0.5*Lam);
438 Del = 0.0;
439 if (U>0.5) Del = T*(1e-13*T);
440 S = 1.0 - T*(U*T) + Del;
441
442 while (S<0.0) {
443 X += 1.0;
444 T = X/Lam;
445 Del = T*Del;
446 S = T*S + 1.0;
447 }
448
449 // top-down summation if needed
450
451 if (S < 2.0*Del) {
452 Del = 1e13*Del;
453 T = 1e15*Del;
454
455 while (Del<T) {
456 X += 1.0;
457 Del *= X/Lam;
458 }
459
460 S = (1.0-U)*Del;
461 T = 1.0;
462 while (S>0.0) {
463 T *= X/Lam;
464 S -= T;
465 X -= 1.0;
466 }
467 }
468 }
469
470 return X;
471 }
472
473 #ifdef __cplusplus
474 }
475 #endif
476
477 #endif
478