1 /*
2  *  Mathlib : A C Library of Special Functions
3  *  Copyright (C) 1998 Ross Ihaka
4  *  Copyright (C) 2000-2012 The R Core Team
5  *  Copyright (C) 2005	The R Foundation
6  *
7  *  This program is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  This program is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with this program; if not, a copy is available at
19  *  http://www.r-project.org/Licenses/
20  *
21  *  SYNOPSIS
22  *
23  *    #include <Rmath.h>
24  *    double rhyper(double NR, double NB, double n);
25  *
26  *  DESCRIPTION
27  *
28  *    Random variates from the hypergeometric distribution.
29  *    Returns the number of white balls drawn when kk balls
30  *    are drawn at random from an urn containing nn1 white
31  *    and nn2 black balls.
32  *
33  *  REFERENCE
34  *
35  *    V. Kachitvichyanukul and B. Schmeiser (1985).
36  *    ``Computer generation of hypergeometric random variates,''
37  *    Journal of Statistical Computation and Simulation 22, 127-145.
38  *
39  *    The original algorithm had a bug -- R bug report PR#7314 --
40  *    giving numbers slightly too small in case III h2pe
41  *    where (m < 100 || ix <= 50) , see below.
42  */
43 
44 #include "nmath.h"
45 
46 /* afc(i) :=  ln( i! )	[logarithm of the factorial i.
47  *	   If (i > 7), use Stirling's approximation, otherwise use table lookup.
48 */
49 
afc(int i)50 static double afc(int i)
51 {
52     const static double al[9] =
53     {
54 	0.0,
55 	0.0,/*ln(0!)=ln(1)*/
56 	0.0,/*ln(1!)=ln(1)*/
57 	0.69314718055994530941723212145817,/*ln(2) */
58 	1.79175946922805500081247735838070,/*ln(6) */
59 	3.17805383034794561964694160129705,/*ln(24)*/
60 	4.78749174278204599424770093452324,
61 	6.57925121201010099506017829290394,
62 	8.52516136106541430016553103634712
63 	/*, 10.60460290274525022841722740072165*/
64     };
65     double di, value;
66 
67     if (i < 0) {
68       MATHLIB_WARNING(("rhyper.c: afc(i), i=%d < 0 -- SHOULD NOT HAPPEN!\n"),
69 		      i);
70       return -1;/* unreached (Wall) */
71     } else if (i <= 7) {
72 	value = al[i + 1];
73     } else {
74 	di = i;
75 	value = (di + 0.5) * log(di) - di + 0.08333333333333 / di
76 	    - 0.00277777777777 / di / di / di + 0.9189385332;
77     }
78     return value;
79 }
80 
rhyper(double nn1in,double nn2in,double kkin)81 double rhyper(double nn1in, double nn2in, double kkin)
82 {
83     const static double con = 57.56462733;
84     const static double deltal = 0.0078;
85     const static double deltau = 0.0034;
86     const static double scale = 1e25;
87 
88     /* extern double afc(int); */
89 
90     int nn1, nn2, kk;
91     int i, ix;
92     Rboolean reject, setup1, setup2;
93 
94     double e, f, g, p, r, t, u, v, y;
95     double de, dg, dr, ds, dt, gl, gu, nk, nm, ub;
96     double xk, xm, xn, y1, ym, yn, yk, alv;
97 
98     /* These should become `thread_local globals' : */
99     static int ks = -1;
100     static int n1s = -1, n2s = -1;
101 
102     static int k, m;
103     static int minjx, maxjx, n1, n2;
104 
105     static double a, d, s, w;
106     static double tn, xl, xr, kl, kr, lamdl, lamdr, p1, p2, p3;
107 
108 
109     /* check parameter validity */
110 
111     if(!R_FINITE(nn1in) || !R_FINITE(nn2in) || !R_FINITE(kkin))
112 	ML_ERR_return_NAN;
113 
114     nn1 = (int) floor(nn1in+0.5);
115     nn2 = (int) floor(nn2in+0.5);
116     kk	= (int) floor(kkin +0.5);
117 
118     if (nn1 < 0 || nn2 < 0 || kk < 0 || kk > nn1 + nn2)
119 	ML_ERR_return_NAN;
120 
121     /* if new parameter values, initialize */
122     reject = TRUE;
123     if (nn1 != n1s || nn2 != n2s) {
124 	setup1 = TRUE;	setup2 = TRUE;
125     } else if (kk != ks) {
126 	setup1 = FALSE;	setup2 = TRUE;
127     } else {
128 	setup1 = FALSE;	setup2 = FALSE;
129     }
130     if (setup1) {
131 	n1s = nn1;
132 	n2s = nn2;
133 	tn = nn1 + nn2;
134 	if (nn1 <= nn2) {
135 	    n1 = nn1;
136 	    n2 = nn2;
137 	} else {
138 	    n1 = nn2;
139 	    n2 = nn1;
140 	}
141     }
142     if (setup2) {
143 	ks = kk;
144 	if (kk + kk >= tn) {
145 	    k = (int)(tn - kk);
146 	} else {
147 	    k = kk;
148 	}
149     }
150     if (setup1 || setup2) {
151 	m = (int) ((k + 1.0) * (n1 + 1.0) / (tn + 2.0));
152 	minjx = imax2(0, k - n2);
153 	maxjx = imin2(n1, k);
154     }
155     /* generate random variate --- Three basic cases */
156 
157     if (minjx == maxjx) { /* I: degenerate distribution ---------------- */
158 	ix = maxjx;
159 	/* return ix;
160 	   No, need to unmangle <TSL>*/
161 	/* return appropriate variate */
162 
163 	if (kk + kk >= tn) {
164 	  if (nn1 > nn2) {
165 	    ix = kk - nn2 + ix;
166 	  } else {
167 	    ix = nn1 - ix;
168 	  }
169 	} else {
170 	  if (nn1 > nn2)
171 	    ix = kk - ix;
172 	}
173 	return ix;
174 
175     } else if (m - minjx < 10) { /* II: inverse transformation ---------- */
176 	if (setup1 || setup2) {
177 	    if (k < n2) {
178 		w = exp(con + afc(n2) + afc(n1 + n2 - k)
179 			- afc(n2 - k) - afc(n1 + n2));
180 	    } else {
181 		w = exp(con + afc(n1) + afc(k)
182 			- afc(k - n2) - afc(n1 + n2));
183 	    }
184 	}
185       L10:
186 	p = w;
187 	ix = minjx;
188 	u = unif_rand() * scale;
189       L20:
190 	if (u > p) {
191 	    u -= p;
192 	    p *= (n1 - ix) * (k - ix);
193 	    ix++;
194 	    p = p / ix / (n2 - k + ix);
195 	    if (ix > maxjx)
196 		goto L10;
197 	    goto L20;
198 	}
199     } else { /* III : h2pe --------------------------------------------- */
200 
201 	if (setup1 || setup2) {
202 	    s = sqrt((tn - k) * k * n1 * n2 / (tn - 1) / tn / tn);
203 
204 	    /* remark: d is defined in reference without int. */
205 	    /* the truncation centers the cell boundaries at 0.5 */
206 
207 	    d = (int) (1.5 * s) + .5;
208 	    xl = m - d + .5;
209 	    xr = m + d + .5;
210 	    a = afc(m) + afc(n1 - m) + afc(k - m) + afc(n2 - k + m);
211 	    kl = exp(a - afc((int) (xl)) - afc((int) (n1 - xl))
212 		     - afc((int) (k - xl))
213 		     - afc((int) (n2 - k + xl)));
214 	    kr = exp(a - afc((int) (xr - 1))
215 		     - afc((int) (n1 - xr + 1))
216 		     - afc((int) (k - xr + 1))
217 		     - afc((int) (n2 - k + xr - 1)));
218 	    lamdl = -log(xl * (n2 - k + xl) / (n1 - xl + 1) / (k - xl + 1));
219 	    lamdr = -log((n1 - xr + 1) * (k - xr + 1) / xr / (n2 - k + xr));
220 	    p1 = d + d;
221 	    p2 = p1 + kl / lamdl;
222 	    p3 = p2 + kr / lamdr;
223 	}
224       L30:
225 	u = unif_rand() * p3;
226 	v = unif_rand();
227 	if (u < p1) {		/* rectangular region */
228 	    ix = (int) (xl + u);
229 	} else if (u <= p2) {	/* left tail */
230 	    ix = (int) (xl + log(v) / lamdl);
231 	    if (ix < minjx)
232 		goto L30;
233 	    v = v * (u - p1) * lamdl;
234 	} else {		/* right tail */
235 	    ix = (int) (xr - log(v) / lamdr);
236 	    if (ix > maxjx)
237 		goto L30;
238 	    v = v * (u - p2) * lamdr;
239 	}
240 
241 	/* acceptance/rejection test */
242 
243 	if (m < 100 || ix <= 50) {
244 	    /* explicit evaluation */
245 	    /* The original algorithm (and TOMS 668) have
246 		   f = f * i * (n2 - k + i) / (n1 - i) / (k - i);
247 	       in the (m > ix) case, but the definition of the
248 	       recurrence relation on p134 shows that the +1 is
249 	       needed. */
250 	    f = 1.0;
251 	    if (m < ix) {
252 		for (i = m + 1; i <= ix; i++)
253 		    f = f * (n1 - i + 1) * (k - i + 1) / (n2 - k + i) / i;
254 	    } else if (m > ix) {
255 		for (i = ix + 1; i <= m; i++)
256 		    f = f * i * (n2 - k + i) / (n1 - i + 1) / (k - i + 1);
257 	    }
258 	    if (v <= f) {
259 		reject = FALSE;
260 	    }
261 	} else {
262 	    /* squeeze using upper and lower bounds */
263 	    y = ix;
264 	    y1 = y + 1.0;
265 	    ym = y - m;
266 	    yn = n1 - y + 1.0;
267 	    yk = k - y + 1.0;
268 	    nk = n2 - k + y1;
269 	    r = -ym / y1;
270 	    s = ym / yn;
271 	    t = ym / yk;
272 	    e = -ym / nk;
273 	    g = yn * yk / (y1 * nk) - 1.0;
274 	    dg = 1.0;
275 	    if (g < 0.0)
276 		dg = 1.0 + g;
277 	    gu = g * (1.0 + g * (-0.5 + g / 3.0));
278 	    gl = gu - .25 * (g * g * g * g) / dg;
279 	    xm = m + 0.5;
280 	    xn = n1 - m + 0.5;
281 	    xk = k - m + 0.5;
282 	    nm = n2 - k + xm;
283 	    ub = y * gu - m * gl + deltau
284 		+ xm * r * (1. + r * (-0.5 + r / 3.0))
285 		+ xn * s * (1. + s * (-0.5 + s / 3.0))
286 		+ xk * t * (1. + t * (-0.5 + t / 3.0))
287 		+ nm * e * (1. + e * (-0.5 + e / 3.0));
288 	    /* test against upper bound */
289 	    alv = log(v);
290 	    if (alv > ub) {
291 		reject = TRUE;
292 	    } else {
293 				/* test against lower bound */
294 		dr = xm * (r * r * r * r);
295 		if (r < 0.0)
296 		    dr /= (1.0 + r);
297 		ds = xn * (s * s * s * s);
298 		if (s < 0.0)
299 		    ds /= (1.0 + s);
300 		dt = xk * (t * t * t * t);
301 		if (t < 0.0)
302 		    dt /= (1.0 + t);
303 		de = nm * (e * e * e * e);
304 		if (e < 0.0)
305 		    de /= (1.0 + e);
306 		if (alv < ub - 0.25 * (dr + ds + dt + de)
307 		    + (y + m) * (gl - gu) - deltal) {
308 		    reject = FALSE;
309 		}
310 		else {
311 		    /* * Stirling's formula to machine accuracy
312 		     */
313 		    if (alv <= (a - afc(ix) - afc(n1 - ix)
314 				- afc(k - ix) - afc(n2 - k + ix))) {
315 			reject = FALSE;
316 		    } else {
317 			reject = TRUE;
318 		    }
319 		}
320 	    }
321 	}
322 	if (reject)
323 	    goto L30;
324     }
325 
326     /* return appropriate variate */
327 
328     if (kk + kk >= tn) {
329 	if (nn1 > nn2) {
330 	    ix = kk - nn2 + ix;
331 	} else {
332 	    ix = nn1 - ix;
333 	}
334     } else {
335 	if (nn1 > nn2)
336 	    ix = kk - ix;
337     }
338     return ix;
339 }
340