1 /*
2 Copyright � 1999 CERN - European Organization for Nuclear Research.
3 Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
4 is hereby granted without fee, provided that the above copyright notice appear in all copies and
5 that both that copyright notice and this permission notice appear in supporting documentation.
6 CERN makes no representations about the suitability of this software for any purpose.
7 It is provided "as is" without expressed or implied warranty.
8 */
9 package cern.jet.random;
10
11 import cern.jet.math.Arithmetic;
12 import cern.jet.random.engine.RandomEngine;
13 /**
14 * HyperGeometric distribution; See the <A HREF="http://library.advanced.org/10030/6atpdvah.htm"> math definition</A>
15 *
16 * The hypergeometric distribution with parameters <tt>N</tt>, <tt>n</tt> and <tt>s</tt> is the probability distribution of the random variable X,
17 * whose value is the number of successes in a sample of <tt>n</tt> items from a population of size <tt>N</tt> that has <tt>s</tt> 'success' items and <tt>N - s</tt> 'failure' items.
18 * <p>
19 * <tt>p(k) = C(s,k) * C(N-s,n-k) / C(N,n)</tt> where <tt>C(a,b) = a! / (b! * (a-b)!)</tt>.
20 * <p>
21 * valid for N >= 2, s,n <= N.
22 * <p>
23 * Instance methods operate on a user supplied uniform random number generator; they are unsynchronized.
24 * <dt>
25 * Static methods operate on a default uniform random number generator; they are synchronized.
26 * <p>
27 * <b>Implementation:</b> High performance implementation.
28 * Patchwork Rejection/Inversion method.
29 * <dt>This is a port of <tt>hprsc.c</tt> from the <A HREF="http://www.cis.tu-graz.ac.at/stat/stadl/random.html">C-RAND / WIN-RAND</A> library.
30 * C-RAND's implementation, in turn, is based upon
31 * <p>
32 * H. Zechner (1994): Efficient sampling from continuous and discrete unimodal distributions,
33 * Doctoral Dissertation, 156 pp., Technical University Graz, Austria.
34 *
35 * @author wolfgang.hoschek@cern.ch
36 * @version 1.0, 09/24/99
37 */
38 public class HyperGeometric extends AbstractDiscreteDistribution {
39 protected int my_N;
40 protected int my_s;
41 protected int my_n;
42
43 // cached vars shared by hmdu(...) and hprs(...)
44 private int N_last = -1, M_last = -1, n_last = -1;
45 private int N_Mn, m;
46
47 // cached vars for hmdu(...)
48 private int mp, b;
49 private double Mp, np, fm;
50
51 // cached vars for hprs(...)
52 private int k2, k4, k1, k5;
53 private double dl, dr, r1, r2, r4, r5, ll, lr, c_pm,
54 f1, f2, f4, f5, p1, p2, p3, p4, p5, p6;
55
56
57 // The uniform random number generated shared by all <b>static</b> methods.
58 protected static HyperGeometric shared = new HyperGeometric(1,1,1,makeDefaultGenerator());
59 /**
60 * Constructs a HyperGeometric distribution.
61 */
HyperGeometric(int N, int s, int n, RandomEngine randomGenerator)62 public HyperGeometric(int N, int s, int n, RandomEngine randomGenerator) {
63 setRandomGenerator(randomGenerator);
64 setState(N,s,n);
65 }
fc_lnpk(int k, int N_Mn, int M, int n)66 private static double fc_lnpk(int k, int N_Mn, int M, int n) {
67 return(Arithmetic.logFactorial(k) + Arithmetic.logFactorial(M - k) + Arithmetic.logFactorial(n - k) + Arithmetic.logFactorial(N_Mn + k));
68 }
69 /**
70 * Returns a random number from the distribution.
71 */
hmdu(int N, int M, int n, RandomEngine randomGenerator)72 protected int hmdu(int N, int M, int n, RandomEngine randomGenerator) {
73
74 int I, K;
75 double p, nu, c, d, U;
76
77 if (N != N_last || M != M_last || n != n_last) { // set-up */
78 N_last = N;
79 M_last = M;
80 n_last = n;
81
82 Mp = (double) (M + 1);
83 np = (double) (n + 1); N_Mn = N - M - n;
84
85 p = Mp / (N + 2.0);
86 nu = np * p; /* mode, real */
87 if ((m = (int) nu) == nu && p == 0.5) { /* mode, integer */
88 mp = m--;
89 }
90 else {
91 mp = m + 1; /* mp = m + 1 */
92 }
93
94 /* mode probability, using the external function flogfak(k) = ln(k!) */
95 fm = Math.exp(Arithmetic.logFactorial(N - M) - Arithmetic.logFactorial(N_Mn + m) - Arithmetic.logFactorial(n - m)
96 + Arithmetic.logFactorial(M) - Arithmetic.logFactorial(M - m) - Arithmetic.logFactorial(m)
97 - Arithmetic.logFactorial(N) + Arithmetic.logFactorial(N - n) + Arithmetic.logFactorial(n) );
98
99 /* safety bound - guarantees at least 17 significant decimal digits */
100 /* b = min(n, (long int)(nu + k*c')) */
101 b = (int) (nu + 11.0 * Math.sqrt(nu * (1.0 - p) * (1.0 - n/(double)N) + 1.0));
102 if (b > n) b = n;
103 }
104
105 for (;;) {
106 if ((U = randomGenerator.raw() - fm) <= 0.0) return(m);
107 c = d = fm;
108
109 /* down- and upward search from the mode */
110 for (I = 1; I <= m; I++) {
111 K = mp - I; /* downward search */
112 c *= (double)K/(np - K) * ((double)(N_Mn + K)/(Mp - K));
113 if ((U -= c) <= 0.0) return(K - 1);
114
115 K = m + I; /* upward search */
116 d *= (np - K)/(double)K * ((Mp - K)/(double)(N_Mn + K));
117 if ((U -= d) <= 0.0) return(K);
118 }
119
120 /* upward search from K = 2m + 1 to K = b */
121 for (K = mp + m; K <= b; K++) {
122 d *= (np - K)/(double)K * ((Mp - K)/(double)(N_Mn + K));
123 if ((U -= d) <= 0.0) return(K);
124 }
125 }
126 }
127 /**
128 * Returns a random number from the distribution.
129 */
hprs(int N, int M, int n, RandomEngine randomGenerator)130 protected int hprs(int N, int M, int n, RandomEngine randomGenerator) {
131 int Dk, X, V;
132 double Mp, np, p, nu, U, Y, W; /* (X, Y) <-> (V, W) */
133
134 if (N != N_last || M != M_last || n != n_last) { /* set-up */
135 N_last = N;
136 M_last = M;
137 n_last = n;
138
139 Mp = (double) (M + 1);
140 np = (double) (n + 1); N_Mn = N - M - n;
141
142 p = Mp / (N + 2.0); nu = np * p; // main parameters
143
144 // approximate deviation of reflection points k2, k4 from nu - 1/2
145 U = Math.sqrt(nu * (1.0 - p) * (1.0 - (n + 2.0)/(N + 3.0)) + 0.25);
146
147 // mode m, reflection points k2 and k4, and points k1 and k5, which
148 // delimit the centre region of h(x)
149 // k2 = ceil (nu - 1/2 - U), k1 = 2*k2 - (m - 1 + delta_ml)
150 // k4 = floor(nu - 1/2 + U), k5 = 2*k4 - (m + 1 - delta_mr)
151
152 m = (int) nu;
153 k2 = (int) Math.ceil(nu - 0.5 - U); if (k2 >= m) k2 = m - 1;
154 k4 = (int) (nu - 0.5 + U);
155 k1 = k2 + k2 - m + 1; // delta_ml = 0
156 k5 = k4 + k4 - m; // delta_mr = 1
157
158 // range width of the critical left and right centre region
159 dl = (double) (k2 - k1);
160 dr = (double) (k5 - k4);
161
162 // recurrence constants r(k) = p(k)/p(k-1) at k = k1, k2, k4+1, k5+1
163 r1 = (np/(double) k1 - 1.0) * (Mp - k1)/(double)(N_Mn + k1);
164 r2 = (np/(double) k2 - 1.0) * (Mp - k2)/(double)(N_Mn + k2);
165 r4 = (np/(double)(k4 + 1) - 1.0) * (M - k4)/(double)(N_Mn + k4 + 1);
166 r5 = (np/(double)(k5 + 1) - 1.0) * (M - k5)/(double)(N_Mn + k5 + 1);
167
168 // reciprocal values of the scale parameters of expon. tail envelopes
169 ll = Math.log(r1); // expon. tail left //
170 lr = -Math.log(r5); // expon. tail right //
171
172 // hypergeom. constant, necessary for computing function values f(k)
173 c_pm = fc_lnpk(m, N_Mn, M, n);
174
175 // function values f(k) = p(k)/p(m) at k = k2, k4, k1, k5
176 f2 = Math.exp(c_pm - fc_lnpk(k2, N_Mn, M, n));
177 f4 = Math.exp(c_pm - fc_lnpk(k4, N_Mn, M, n));
178 f1 = Math.exp(c_pm - fc_lnpk(k1, N_Mn, M, n));
179 f5 = Math.exp(c_pm - fc_lnpk(k5, N_Mn, M, n));
180
181 // area of the two centre and the two exponential tail regions
182 // area of the two immediate acceptance regions between k2, k4
183 p1 = f2 * (dl + 1.0); // immed. left
184 p2 = f2 * dl + p1; // centre left
185 p3 = f4 * (dr + 1.0) + p2; // immed. right
186 p4 = f4 * dr + p3; // centre right
187 p5 = f1 / ll + p4; // expon. tail left
188 p6 = f5 / lr + p5; // expon. tail right
189 }
190
191 for (;;) {
192 // generate uniform number U -- U(0, p6)
193 // case distinction corresponding to U
194 if ((U = randomGenerator.raw() * p6) < p2) { // centre left
195
196 // immediate acceptance region R2 = [k2, m) *[0, f2), X = k2, ... m -1
197 if ((W = U - p1) < 0.0) return(k2 + (int)(U/f2));
198 // immediate acceptance region R1 = [k1, k2)*[0, f1), X = k1, ... k2-1
199 if ((Y = W / dl) < f1 ) return(k1 + (int)(W/f1));
200
201 // computation of candidate X < k2, and its counterpart V > k2
202 // either squeeze-acceptance of X or acceptance-rejection of V
203 Dk = (int)(dl * randomGenerator.raw()) + 1;
204 if (Y <= f2 - Dk * (f2 - f2/r2)) { // quick accept of
205 return(k2 - Dk); // X = k2 - Dk
206 }
207 if ((W = f2 + f2 - Y) < 1.0) { // quick reject of V
208 V = k2 + Dk;
209 if (W <= f2 + Dk * (1.0 - f2)/(dl + 1.0)) { // quick accept of
210 return(V); // V = k2 + Dk
211 }
212 if (Math.log(W) <= c_pm - fc_lnpk(V, N_Mn, M, n)) {
213 return(V); // final accept of V
214 }
215 }
216 X = k2 - Dk;
217 }
218 else if (U < p4) { // centre right
219
220 // immediate acceptance region R3 = [m, k4+1)*[0, f4), X = m, ... k4
221 if ((W = U - p3) < 0.0) return(k4 - (int)((U - p2)/f4));
222 // immediate acceptance region R4 = [k4+1, k5+1)*[0, f5)
223 if ((Y = W / dr) < f5 ) return(k5 - (int)(W/f5));
224
225 // computation of candidate X > k4, and its counterpart V < k4
226 // either squeeze-acceptance of X or acceptance-rejection of V
227 Dk = (int)(dr * randomGenerator.raw()) + 1;
228 if (Y <= f4 - Dk * (f4 - f4*r4)) { // quick accept of
229 return(k4 + Dk); // X = k4 + Dk
230 }
231 if ((W = f4 + f4 - Y) < 1.0) { // quick reject of V
232 V = k4 - Dk;
233 if (W <= f4 + Dk * (1.0 - f4)/dr) { // quick accept of
234 return(V); // V = k4 - Dk
235 }
236 if (Math.log(W) <= c_pm - fc_lnpk(V, N_Mn, M, n)) {
237 return(V); // final accept of V
238 }
239 }
240 X = k4 + Dk;
241 }
242 else {
243 Y = randomGenerator.raw();
244 if (U < p5) { // expon. tail left
245 Dk = (int)(1.0 - Math.log(Y)/ll);
246 if ((X = k1 - Dk) < 0) continue; // 0 <= X <= k1 - 1
247 Y *= (U - p4) * ll; // Y -- U(0, h(x))
248 if (Y <= f1 - Dk * (f1 - f1/r1)) {
249 return(X); // quick accept of X
250 }
251 }
252 else { // expon. tail right
253 Dk = (int)(1.0 - Math.log(Y)/lr);
254 if ((X = k5 + Dk) > n ) continue; // k5 + 1 <= X <= n
255 Y *= (U - p5) * lr; // Y -- U(0, h(x)) /
256 if (Y <= f5 - Dk * (f5 - f5*r5)) {
257 return(X); // quick accept of X
258 }
259 }
260 }
261
262 // acceptance-rejection test of candidate X from the original area
263 // test, whether Y <= f(X), with Y = U*h(x) and U -- U(0, 1)
264 // log f(X) = log( m! (M - m)! (n - m)! (N - M - n + m)! )
265 // - log( X! (M - X)! (n - X)! (N - M - n + X)! )
266 // by using an external function for log k!
267 if (Math.log(Y) <= c_pm - fc_lnpk(X, N_Mn, M, n)) return(X);
268 }
269 }
270 /**
271 * Returns a random number from the distribution.
272 */
nextInt()273 public int nextInt() {
274 return nextInt(this.my_N, this.my_s, this.my_n, this.randomGenerator);
275 }
276 /**
277 * Returns a random number from the distribution; bypasses the internal state.
278 */
nextInt(int N, int s, int n)279 public int nextInt(int N, int s, int n) {
280 return nextInt(N,s,n,this.randomGenerator);
281 }
282 /**
283 * Returns a random number from the distribution; bypasses the internal state.
284 */
nextInt(int N, int M, int n, RandomEngine randomGenerator)285 protected int nextInt(int N, int M, int n, RandomEngine randomGenerator) {
286 /******************************************************************
287 * *
288 * Hypergeometric Distribution - Patchwork Rejection/Inversion *
289 * *
290 ******************************************************************
291 * *
292 * The basic algorithms work for parameters 1 <= n <= M <= N/2. *
293 * Otherwise parameters are re-defined in the set-up step and the *
294 * random number K is adapted before delivering. *
295 * For l = m-max(0,n-N+M) < 10 Inversion method hmdu is applied: *
296 * The random numbers are generated via modal down-up search, *
297 * starting at the mode m. The cumulative probabilities *
298 * are avoided by using the technique of chop-down. *
299 * For l >= 10 the Patchwork Rejection method hprs is employed: *
300 * The area below the histogram function f(x) in its *
301 * body is rearranged by certain point reflections. Within a *
302 * large center interval variates are sampled efficiently by *
303 * rejection from uniform hats. Rectangular immediate acceptance *
304 * regions speed up the generation. The remaining tails are *
305 * covered by exponential functions. *
306 * *
307 ******************************************************************
308 * *
309 * FUNCTION : - hprsc samples a random number from the *
310 * Hypergeometric distribution with parameters *
311 * N (number of red and black balls), M (number *
312 * of red balls) and n (number of trials) *
313 * valid for N >= 2, M,n <= N. *
314 * REFERENCE : - H. Zechner (1994): Efficient sampling from *
315 * continuous and discrete unimodal distributions, *
316 * Doctoral Dissertation, 156 pp., Technical *
317 * University Graz, Austria. *
318 * SUBPROGRAMS: - flogfak(k) ... log(k!) with long integer k *
319 * - drand(seed) ... (0,1)-Uniform generator with *
320 * unsigned long integer *seed. *
321 * - hmdu(seed,N,M,n) ... Hypergeometric generator *
322 * for l<10 *
323 * - hprs(seed,N,M,n) ... Hypergeometric generator *
324 * for l>=10 with unsigned long integer *seed, *
325 * long integer N , M , n. *
326 * *
327 ******************************************************************/
328 int Nhalf, n_le_Nhalf, M_le_Nhalf, K;
329
330 Nhalf = N / 2;
331 n_le_Nhalf = (n <= Nhalf) ? n : N - n;
332 M_le_Nhalf = (M <= Nhalf) ? M : N - M;
333
334 if ((n*M/N) < 10) {
335 K = (n_le_Nhalf <= M_le_Nhalf)
336 ? hmdu(N, M_le_Nhalf, n_le_Nhalf, randomGenerator)
337 : hmdu(N, n_le_Nhalf, M_le_Nhalf, randomGenerator);
338 }
339 else {
340 K = (n_le_Nhalf <= M_le_Nhalf)
341 ? hprs(N, M_le_Nhalf, n_le_Nhalf, randomGenerator)
342 : hprs(N, n_le_Nhalf, M_le_Nhalf, randomGenerator);
343 }
344
345 if (n <= Nhalf) {
346 return (M <= Nhalf) ? K : n - K;
347 }
348 else {
349 return (M <= Nhalf) ? M - K : n - N + M + K;
350 }
351 }
352 /**
353 * Returns the probability distribution function.
354 */
pdf(int k)355 public double pdf(int k) {
356 return Arithmetic.binomial(my_s, k) * Arithmetic.binomial(my_N - my_s, my_n - k)
357 / Arithmetic.binomial(my_N, my_n);
358 }
359 /**
360 * Sets the parameters.
361 */
setState(int N, int s, int n)362 public void setState(int N, int s, int n) {
363 this.my_N = N;
364 this.my_s = s;
365 this.my_n = n;
366 }
367 /**
368 * Returns a random number from the distribution.
369 */
staticNextInt(int N, int M, int n)370 public static double staticNextInt(int N, int M, int n) {
371 synchronized (shared) {
372 return shared.nextInt(N,M,n);
373 }
374 }
375 /**
376 * Returns a String representation of the receiver.
377 */
toString()378 public String toString() {
379 return this.getClass().getName()+"("+my_N+","+my_s+","+my_n+")";
380 }
381 /**
382 * Sets the uniform random number generated shared by all <b>static</b> methods.
383 * @param randomGenerator the new uniform random number generator to be shared.
384 */
xstaticSetRandomGenerator(RandomEngine randomGenerator)385 private static void xstaticSetRandomGenerator(RandomEngine randomGenerator) {
386 synchronized (shared) {
387 shared.setRandomGenerator(randomGenerator);
388 }
389 }
390 }
391