1 /*
2 * Mathlib : A C Library of Special Functions
3 * Copyright (C) 1998 Ross Ihaka
4 * Copyright (C) 2000-2012 The R Core Team
5 * Copyright (C) 2005 The R Foundation
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, a copy is available at
19 * http://www.r-project.org/Licenses/
20 *
21 * SYNOPSIS
22 *
23 * #include <Rmath.h>
24 * double rhyper(double NR, double NB, double n);
25 *
26 * DESCRIPTION
27 *
28 * Random variates from the hypergeometric distribution.
29 * Returns the number of white balls drawn when kk balls
30 * are drawn at random from an urn containing nn1 white
31 * and nn2 black balls.
32 *
33 * REFERENCE
34 *
35 * V. Kachitvichyanukul and B. Schmeiser (1985).
36 * ``Computer generation of hypergeometric random variates,''
37 * Journal of Statistical Computation and Simulation 22, 127-145.
38 *
39 * The original algorithm had a bug -- R bug report PR#7314 --
40 * giving numbers slightly too small in case III h2pe
41 * where (m < 100 || ix <= 50) , see below.
42 */
43
44 #include "nmath.h"
45
46 /* afc(i) := ln( i! ) [logarithm of the factorial i.
47 * If (i > 7), use Stirling's approximation, otherwise use table lookup.
48 */
49
afc(int i)50 static double afc(int i)
51 {
52 const static double al[9] =
53 {
54 0.0,
55 0.0,/*ln(0!)=ln(1)*/
56 0.0,/*ln(1!)=ln(1)*/
57 0.69314718055994530941723212145817,/*ln(2) */
58 1.79175946922805500081247735838070,/*ln(6) */
59 3.17805383034794561964694160129705,/*ln(24)*/
60 4.78749174278204599424770093452324,
61 6.57925121201010099506017829290394,
62 8.52516136106541430016553103634712
63 /*, 10.60460290274525022841722740072165*/
64 };
65 double di, value;
66
67 if (i < 0) {
68 MATHLIB_WARNING(("rhyper.c: afc(i), i=%d < 0 -- SHOULD NOT HAPPEN!\n"),
69 i);
70 return -1;/* unreached (Wall) */
71 } else if (i <= 7) {
72 value = al[i + 1];
73 } else {
74 di = i;
75 value = (di + 0.5) * log(di) - di + 0.08333333333333 / di
76 - 0.00277777777777 / di / di / di + 0.9189385332;
77 }
78 return value;
79 }
80
rhyper(double nn1in,double nn2in,double kkin)81 double rhyper(double nn1in, double nn2in, double kkin)
82 {
83 const static double con = 57.56462733;
84 const static double deltal = 0.0078;
85 const static double deltau = 0.0034;
86 const static double scale = 1e25;
87
88 /* extern double afc(int); */
89
90 int nn1, nn2, kk;
91 int i, ix;
92 Rboolean reject, setup1, setup2;
93
94 double e, f, g, p, r, t, u, v, y;
95 double de, dg, dr, ds, dt, gl, gu, nk, nm, ub;
96 double xk, xm, xn, y1, ym, yn, yk, alv;
97
98 /* These should become `thread_local globals' : */
99 static int ks = -1;
100 static int n1s = -1, n2s = -1;
101
102 static int k, m;
103 static int minjx, maxjx, n1, n2;
104
105 static double a, d, s, w;
106 static double tn, xl, xr, kl, kr, lamdl, lamdr, p1, p2, p3;
107
108
109 /* check parameter validity */
110
111 if(!R_FINITE(nn1in) || !R_FINITE(nn2in) || !R_FINITE(kkin))
112 ML_ERR_return_NAN;
113
114 nn1 = (int) floor(nn1in+0.5);
115 nn2 = (int) floor(nn2in+0.5);
116 kk = (int) floor(kkin +0.5);
117
118 if (nn1 < 0 || nn2 < 0 || kk < 0 || kk > nn1 + nn2)
119 ML_ERR_return_NAN;
120
121 /* if new parameter values, initialize */
122 reject = TRUE;
123 if (nn1 != n1s || nn2 != n2s) {
124 setup1 = TRUE; setup2 = TRUE;
125 } else if (kk != ks) {
126 setup1 = FALSE; setup2 = TRUE;
127 } else {
128 setup1 = FALSE; setup2 = FALSE;
129 }
130 if (setup1) {
131 n1s = nn1;
132 n2s = nn2;
133 tn = nn1 + nn2;
134 if (nn1 <= nn2) {
135 n1 = nn1;
136 n2 = nn2;
137 } else {
138 n1 = nn2;
139 n2 = nn1;
140 }
141 }
142 if (setup2) {
143 ks = kk;
144 if (kk + kk >= tn) {
145 k = (int)(tn - kk);
146 } else {
147 k = kk;
148 }
149 }
150 if (setup1 || setup2) {
151 m = (int) ((k + 1.0) * (n1 + 1.0) / (tn + 2.0));
152 minjx = imax2(0, k - n2);
153 maxjx = imin2(n1, k);
154 }
155 /* generate random variate --- Three basic cases */
156
157 if (minjx == maxjx) { /* I: degenerate distribution ---------------- */
158 ix = maxjx;
159 /* return ix;
160 No, need to unmangle <TSL>*/
161 /* return appropriate variate */
162
163 if (kk + kk >= tn) {
164 if (nn1 > nn2) {
165 ix = kk - nn2 + ix;
166 } else {
167 ix = nn1 - ix;
168 }
169 } else {
170 if (nn1 > nn2)
171 ix = kk - ix;
172 }
173 return ix;
174
175 } else if (m - minjx < 10) { /* II: inverse transformation ---------- */
176 if (setup1 || setup2) {
177 if (k < n2) {
178 w = exp(con + afc(n2) + afc(n1 + n2 - k)
179 - afc(n2 - k) - afc(n1 + n2));
180 } else {
181 w = exp(con + afc(n1) + afc(k)
182 - afc(k - n2) - afc(n1 + n2));
183 }
184 }
185 L10:
186 p = w;
187 ix = minjx;
188 u = unif_rand() * scale;
189 L20:
190 if (u > p) {
191 u -= p;
192 p *= (n1 - ix) * (k - ix);
193 ix++;
194 p = p / ix / (n2 - k + ix);
195 if (ix > maxjx)
196 goto L10;
197 goto L20;
198 }
199 } else { /* III : h2pe --------------------------------------------- */
200
201 if (setup1 || setup2) {
202 s = sqrt((tn - k) * k * n1 * n2 / (tn - 1) / tn / tn);
203
204 /* remark: d is defined in reference without int. */
205 /* the truncation centers the cell boundaries at 0.5 */
206
207 d = (int) (1.5 * s) + .5;
208 xl = m - d + .5;
209 xr = m + d + .5;
210 a = afc(m) + afc(n1 - m) + afc(k - m) + afc(n2 - k + m);
211 kl = exp(a - afc((int) (xl)) - afc((int) (n1 - xl))
212 - afc((int) (k - xl))
213 - afc((int) (n2 - k + xl)));
214 kr = exp(a - afc((int) (xr - 1))
215 - afc((int) (n1 - xr + 1))
216 - afc((int) (k - xr + 1))
217 - afc((int) (n2 - k + xr - 1)));
218 lamdl = -log(xl * (n2 - k + xl) / (n1 - xl + 1) / (k - xl + 1));
219 lamdr = -log((n1 - xr + 1) * (k - xr + 1) / xr / (n2 - k + xr));
220 p1 = d + d;
221 p2 = p1 + kl / lamdl;
222 p3 = p2 + kr / lamdr;
223 }
224 L30:
225 u = unif_rand() * p3;
226 v = unif_rand();
227 if (u < p1) { /* rectangular region */
228 ix = (int) (xl + u);
229 } else if (u <= p2) { /* left tail */
230 ix = (int) (xl + log(v) / lamdl);
231 if (ix < minjx)
232 goto L30;
233 v = v * (u - p1) * lamdl;
234 } else { /* right tail */
235 ix = (int) (xr - log(v) / lamdr);
236 if (ix > maxjx)
237 goto L30;
238 v = v * (u - p2) * lamdr;
239 }
240
241 /* acceptance/rejection test */
242
243 if (m < 100 || ix <= 50) {
244 /* explicit evaluation */
245 /* The original algorithm (and TOMS 668) have
246 f = f * i * (n2 - k + i) / (n1 - i) / (k - i);
247 in the (m > ix) case, but the definition of the
248 recurrence relation on p134 shows that the +1 is
249 needed. */
250 f = 1.0;
251 if (m < ix) {
252 for (i = m + 1; i <= ix; i++)
253 f = f * (n1 - i + 1) * (k - i + 1) / (n2 - k + i) / i;
254 } else if (m > ix) {
255 for (i = ix + 1; i <= m; i++)
256 f = f * i * (n2 - k + i) / (n1 - i + 1) / (k - i + 1);
257 }
258 if (v <= f) {
259 reject = FALSE;
260 }
261 } else {
262 /* squeeze using upper and lower bounds */
263 y = ix;
264 y1 = y + 1.0;
265 ym = y - m;
266 yn = n1 - y + 1.0;
267 yk = k - y + 1.0;
268 nk = n2 - k + y1;
269 r = -ym / y1;
270 s = ym / yn;
271 t = ym / yk;
272 e = -ym / nk;
273 g = yn * yk / (y1 * nk) - 1.0;
274 dg = 1.0;
275 if (g < 0.0)
276 dg = 1.0 + g;
277 gu = g * (1.0 + g * (-0.5 + g / 3.0));
278 gl = gu - .25 * (g * g * g * g) / dg;
279 xm = m + 0.5;
280 xn = n1 - m + 0.5;
281 xk = k - m + 0.5;
282 nm = n2 - k + xm;
283 ub = y * gu - m * gl + deltau
284 + xm * r * (1. + r * (-0.5 + r / 3.0))
285 + xn * s * (1. + s * (-0.5 + s / 3.0))
286 + xk * t * (1. + t * (-0.5 + t / 3.0))
287 + nm * e * (1. + e * (-0.5 + e / 3.0));
288 /* test against upper bound */
289 alv = log(v);
290 if (alv > ub) {
291 reject = TRUE;
292 } else {
293 /* test against lower bound */
294 dr = xm * (r * r * r * r);
295 if (r < 0.0)
296 dr /= (1.0 + r);
297 ds = xn * (s * s * s * s);
298 if (s < 0.0)
299 ds /= (1.0 + s);
300 dt = xk * (t * t * t * t);
301 if (t < 0.0)
302 dt /= (1.0 + t);
303 de = nm * (e * e * e * e);
304 if (e < 0.0)
305 de /= (1.0 + e);
306 if (alv < ub - 0.25 * (dr + ds + dt + de)
307 + (y + m) * (gl - gu) - deltal) {
308 reject = FALSE;
309 }
310 else {
311 /* * Stirling's formula to machine accuracy
312 */
313 if (alv <= (a - afc(ix) - afc(n1 - ix)
314 - afc(k - ix) - afc(n2 - k + ix))) {
315 reject = FALSE;
316 } else {
317 reject = TRUE;
318 }
319 }
320 }
321 }
322 if (reject)
323 goto L30;
324 }
325
326 /* return appropriate variate */
327
328 if (kk + kk >= tn) {
329 if (nn1 > nn2) {
330 ix = kk - nn2 + ix;
331 } else {
332 ix = nn1 - ix;
333 }
334 } else {
335 if (nn1 > nn2)
336 ix = kk - ix;
337 }
338 return ix;
339 }
340