1 /*
2 Copyright � 1999 CERN - European Organization for Nuclear Research.
3 Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
4 is hereby granted without fee, provided that the above copyright notice appear in all copies and
5 that both that copyright notice and this permission notice appear in supporting documentation.
6 CERN makes no representations about the suitability of this software for any purpose.
7 It is provided "as is" without expressed or implied warranty.
8 */
9 package cern.jet.random;
10 
11 import cern.jet.math.Arithmetic;
12 import cern.jet.random.engine.RandomEngine;
13 /**
14  * HyperGeometric distribution; See the <A HREF="http://library.advanced.org/10030/6atpdvah.htm"> math definition</A>
15  *
16  * The hypergeometric distribution with parameters <tt>N</tt>, <tt>n</tt> and <tt>s</tt> is the probability distribution of the random variable X,
17  * whose value is the number of successes in a sample of <tt>n</tt> items from a population of size <tt>N</tt> that has <tt>s</tt> 'success' items and <tt>N - s</tt> 'failure' items.
18  * <p>
19  * <tt>p(k) = C(s,k) * C(N-s,n-k) / C(N,n)</tt> where <tt>C(a,b) = a! / (b! * (a-b)!)</tt>.
20  * <p>
21  * valid for N >= 2, s,n <= N.
22  * <p>
23  * Instance methods operate on a user supplied uniform random number generator; they are unsynchronized.
24  * <dt>
25  * Static methods operate on a default uniform random number generator; they are synchronized.
26  * <p>
27  * <b>Implementation:</b> High performance implementation.
28  * Patchwork Rejection/Inversion method.
29  * <dt>This is a port of <tt>hprsc.c</tt> from the <A HREF="http://www.cis.tu-graz.ac.at/stat/stadl/random.html">C-RAND / WIN-RAND</A> library.
30  * C-RAND's implementation, in turn, is based upon
31  * <p>
32  * H. Zechner (1994): Efficient sampling from continuous and discrete unimodal distributions,
33  * Doctoral Dissertation, 156 pp., Technical University Graz, Austria.
34  *
35  * @author wolfgang.hoschek@cern.ch
36  * @version 1.0, 09/24/99
37  */
38 public class HyperGeometric extends AbstractDiscreteDistribution {
39 	protected       int my_N;
40 	protected       int my_s;
41 	protected       int my_n;
42 
43 	// cached vars shared by hmdu(...) and hprs(...)
44 	private int     N_last = -1, M_last = -1, n_last = -1;
45 	private int     N_Mn, m;
46 
47 	// cached vars for hmdu(...)
48 	private int     mp, b;
49 	private double  Mp, np, fm;
50 
51 	// cached vars for hprs(...)
52 	private int     k2, k4, k1, k5;
53 	private double  dl, dr, r1, r2, r4, r5, ll, lr, c_pm,
54 					f1, f2, f4, f5, p1, p2, p3, p4, p5, p6;
55 
56 
57 	// The uniform random number generated shared by all <b>static</b> methods.
58 	protected static HyperGeometric shared = new HyperGeometric(1,1,1,makeDefaultGenerator());
59 /**
60  * Constructs a HyperGeometric distribution.
61  */
HyperGeometric(int N, int s, int n, RandomEngine randomGenerator)62 public HyperGeometric(int N, int s, int n, RandomEngine randomGenerator) {
63 	setRandomGenerator(randomGenerator);
64 	setState(N,s,n);
65 }
fc_lnpk(int k, int N_Mn, int M, int n)66 private static double fc_lnpk(int k, int N_Mn, int M, int n) {
67 	return(Arithmetic.logFactorial(k) + Arithmetic.logFactorial(M - k) + Arithmetic.logFactorial(n - k) + Arithmetic.logFactorial(N_Mn + k));
68 }
69 /**
70  * Returns a random number from the distribution.
71  */
hmdu(int N, int M, int n, RandomEngine randomGenerator)72 protected int hmdu(int N, int M, int n, RandomEngine randomGenerator) {
73 
74   int            I, K;
75   double              p, nu, c, d, U;
76 
77   if (N != N_last || M != M_last || n != n_last) {   // set-up           */
78 		N_last = N;
79 	 M_last = M;
80 	 n_last = n;
81 
82 	 Mp = (double) (M + 1);
83 	 np = (double) (n + 1);  N_Mn = N - M - n;
84 
85 	 p  = Mp / (N + 2.0);
86 	 nu = np * p;                             /* mode, real       */
87 	 if ((m = (int) nu) == nu && p == 0.5) {     /* mode, integer    */
88 		mp = m--;
89 	 }
90 	 else {
91 		mp = m + 1;                           /* mp = m + 1       */
92 		}
93 
94  /* mode probability, using the external function flogfak(k) = ln(k!)    */
95 	 fm = Math.exp(Arithmetic.logFactorial(N - M) - Arithmetic.logFactorial(N_Mn + m) - Arithmetic.logFactorial(n - m)
96 		 + Arithmetic.logFactorial(M)     - Arithmetic.logFactorial(M - m)    - Arithmetic.logFactorial(m)
97 		 - Arithmetic.logFactorial(N)     + Arithmetic.logFactorial(N - n)    + Arithmetic.logFactorial(n)   );
98 
99  /* safety bound  -  guarantees at least 17 significant decimal digits   */
100  /*                  b = min(n, (long int)(nu + k*c')) */
101 		b = (int) (nu + 11.0 * Math.sqrt(nu * (1.0 - p) * (1.0 - n/(double)N) + 1.0));
102 		if (b > n) b = n;
103 	}
104 
105 	for (;;) {
106 		if ((U = randomGenerator.raw() - fm) <= 0.0)  return(m);
107 		c = d = fm;
108 
109  /* down- and upward search from the mode                                */
110 		for (I = 1; I <= m; I++) {
111 			K  = mp - I;                                   /* downward search  */
112 			c *= (double)K/(np - K) * ((double)(N_Mn + K)/(Mp - K));
113 			if ((U -= c) <= 0.0)  return(K - 1);
114 
115 			K  = m + I;                                    /* upward search    */
116 			d *= (np - K)/(double)K * ((Mp - K)/(double)(N_Mn + K));
117 			if ((U -= d) <= 0.0)  return(K);
118 		}
119 
120  /* upward search from K = 2m + 1 to K = b                               */
121 		for (K = mp + m; K <= b; K++) {
122 			d *= (np - K)/(double)K * ((Mp - K)/(double)(N_Mn + K));
123 			if ((U -= d) <= 0.0)  return(K);
124 		}
125 	}
126 }
127 /**
128  * Returns a random number from the distribution.
129  */
hprs(int N, int M, int n, RandomEngine randomGenerator)130 protected int hprs(int N, int M, int n,  RandomEngine randomGenerator) {
131 	int    Dk, X, V;
132 	double Mp, np, p, nu, U, Y, W;       /* (X, Y) <-> (V, W) */
133 
134 	if (N != N_last || M != M_last || n != n_last) {  /* set-up            */
135 		N_last = N;
136 		M_last = M;
137 		n_last = n;
138 
139 		Mp = (double) (M + 1);
140 		np = (double) (n + 1);  N_Mn = N - M - n;
141 
142 		p  = Mp / (N + 2.0);  nu = np * p;              // main parameters
143 
144 		// approximate deviation of reflection points k2, k4 from nu - 1/2
145 		U  = Math.sqrt(nu * (1.0 - p) * (1.0 - (n + 2.0)/(N + 3.0)) + 0.25);
146 
147 		// mode m, reflection points k2 and k4, and points k1 and k5, which
148 		// delimit the centre region of h(x)
149 		// k2 = ceil (nu - 1/2 - U),    k1 = 2*k2 - (m - 1 + delta_ml)
150 		// k4 = floor(nu - 1/2 + U),    k5 = 2*k4 - (m + 1 - delta_mr)
151 
152 		m  = (int) nu;
153 		k2 = (int) Math.ceil(nu - 0.5 - U);  if (k2 >= m)  k2 = m - 1;
154 		k4 = (int)     (nu - 0.5 + U);
155 		k1 = k2 + k2 - m + 1;            				// delta_ml = 0
156 		k5 = k4 + k4 - m;                               // delta_mr = 1
157 
158 		// range width of the critical left and right centre region
159 		dl = (double) (k2 - k1);
160 		dr = (double) (k5 - k4);
161 
162 		// recurrence constants r(k) = p(k)/p(k-1) at k = k1, k2, k4+1, k5+1
163 		r1 = (np/(double) k1       - 1.0) * (Mp - k1)/(double)(N_Mn + k1);
164 		r2 = (np/(double) k2       - 1.0) * (Mp - k2)/(double)(N_Mn + k2);
165 		r4 = (np/(double)(k4 + 1) - 1.0) * (M  - k4)/(double)(N_Mn + k4 + 1);
166 		r5 = (np/(double)(k5 + 1) - 1.0) * (M  - k5)/(double)(N_Mn + k5 + 1);
167 
168 		// reciprocal values of the scale parameters of expon. tail envelopes
169 		ll =  Math.log(r1);                                  // expon. tail left  //
170 		lr = -Math.log(r5);                                  // expon. tail right //
171 
172 		// hypergeom. constant, necessary for computing function values f(k)
173 		c_pm = fc_lnpk(m, N_Mn, M, n);
174 
175 		// function values f(k) = p(k)/p(m)  at  k = k2, k4, k1, k5
176 		f2 = Math.exp(c_pm - fc_lnpk(k2, N_Mn, M, n));
177 		f4 = Math.exp(c_pm - fc_lnpk(k4, N_Mn, M, n));
178 		f1 = Math.exp(c_pm - fc_lnpk(k1, N_Mn, M, n));
179 		f5 = Math.exp(c_pm - fc_lnpk(k5, N_Mn, M, n));
180 
181 		// area of the two centre and the two exponential tail regions
182 		// area of the two immediate acceptance regions between k2, k4
183 		p1 = f2 * (dl + 1.0);                           // immed. left
184 		p2 = f2 * dl         + p1;                      // centre left
185 		p3 = f4 * (dr + 1.0) + p2;                      // immed. right
186 		p4 = f4 * dr         + p3;                      // centre right
187 		p5 = f1 / ll         + p4;                      // expon. tail left
188 		p6 = f5 / lr         + p5;                      // expon. tail right
189 	}
190 
191 	for (;;) {
192 		// generate uniform number U -- U(0, p6)
193 		// case distinction corresponding to U
194 		if ((U = randomGenerator.raw() * p6) < p2) {    // centre left
195 
196 			// immediate acceptance region R2 = [k2, m) *[0, f2),  X = k2, ... m -1
197 			if ((W = U - p1) < 0.0)  return(k2 + (int)(U/f2));
198 			// immediate acceptance region R1 = [k1, k2)*[0, f1),  X = k1, ... k2-1
199 			if ((Y = W / dl) < f1 )  return(k1 + (int)(W/f1));
200 
201 			// computation of candidate X < k2, and its counterpart V > k2
202 			// either squeeze-acceptance of X or acceptance-rejection of V
203 			Dk = (int)(dl * randomGenerator.raw()) + 1;
204 			if (Y <= f2 - Dk * (f2 - f2/r2)) {            // quick accept of
205 				return(k2 - Dk);                          // X = k2 - Dk
206 			}
207 			if ((W = f2 + f2 - Y) < 1.0) {                // quick reject of V
208 				V = k2 + Dk;
209 				if (W <= f2 + Dk * (1.0 - f2)/(dl + 1.0)) { // quick accept of
210 					return(V);                              // V = k2 + Dk
211 				}
212 				if (Math.log(W) <= c_pm - fc_lnpk(V, N_Mn, M, n)) {
213 					return(V);               // final accept of V
214 				}
215 			}
216 			X = k2 - Dk;
217 		}
218 		else if (U < p4) {                              // centre right
219 
220 			// immediate acceptance region R3 = [m, k4+1)*[0, f4), X = m, ... k4
221 			if ((W = U - p3) < 0.0)  return(k4 - (int)((U - p2)/f4));
222 			// immediate acceptance region R4 = [k4+1, k5+1)*[0, f5)
223 			if ((Y = W / dr) < f5 )  return(k5 - (int)(W/f5));
224 
225 			// computation of candidate X > k4, and its counterpart V < k4
226 			// either squeeze-acceptance of X or acceptance-rejection of V
227 			Dk = (int)(dr * randomGenerator.raw()) + 1;
228 			if (Y <= f4 - Dk * (f4 - f4*r4)) {            // quick accept of
229 				return(k4 + Dk);                          // X = k4 + Dk
230 			}
231 			if ((W = f4 + f4 - Y) < 1.0) {                // quick reject of V
232 				V = k4 - Dk;
233 				if (W <= f4 + Dk * (1.0 - f4)/dr) {       // quick accept of
234 					return(V);                            // V = k4 - Dk
235 				}
236 				if (Math.log(W) <= c_pm - fc_lnpk(V, N_Mn, M, n)) {
237 					return(V);                            // final accept of V
238 				}
239 			}
240 			X = k4 + Dk;
241 		}
242 		else {
243 			Y = randomGenerator.raw();
244 			if (U < p5) {                                 // expon. tail left
245 				Dk = (int)(1.0 - Math.log(Y)/ll);
246 				if ((X = k1 - Dk) < 0)  continue;         // 0 <= X <= k1 - 1
247 				Y *= (U - p4) * ll;                       // Y -- U(0, h(x))
248 				if (Y <= f1 - Dk * (f1 - f1/r1)) {
249 					return(X);                            // quick accept of X
250 				}
251 			}
252 			else {                                        // expon. tail right
253 				Dk = (int)(1.0 - Math.log(Y)/lr);
254 				if ((X = k5 + Dk) > n )  continue;        // k5 + 1 <= X <= n
255 				Y *= (U - p5) * lr;                       // Y -- U(0, h(x))   /
256 				if (Y <= f5 - Dk * (f5 - f5*r5)) {
257 					return(X);                            // quick accept of X
258 				}
259 			}
260 		}
261 
262 	// acceptance-rejection test of candidate X from the original area
263 	// test, whether  Y <= f(X),    with  Y = U*h(x)  and  U -- U(0, 1)
264 	// log f(X) = log( m! (M - m)! (n - m)! (N - M - n + m)! )
265 	//          - log( X! (M - X)! (n - X)! (N - M - n + X)! )
266 	// by using an external function for log k!
267 		if (Math.log(Y) <= c_pm - fc_lnpk(X, N_Mn, M, n))  return(X);
268 	}
269 }
270 /**
271  * Returns a random number from the distribution.
272  */
nextInt()273 public int nextInt() {
274 	return nextInt(this.my_N, this.my_s, this.my_n, this.randomGenerator);
275 }
276 /**
277  * Returns a random number from the distribution; bypasses the internal state.
278  */
nextInt(int N, int s, int n)279 public int nextInt(int N, int s, int n) {
280 	return nextInt(N,s,n,this.randomGenerator);
281 }
282 /**
283  * Returns a random number from the distribution; bypasses the internal state.
284  */
nextInt(int N, int M, int n, RandomEngine randomGenerator)285 protected int nextInt(int N, int M, int n, RandomEngine randomGenerator) {
286 /******************************************************************
287  *                                                                *
288  * Hypergeometric Distribution - Patchwork Rejection/Inversion    *
289  *                                                                *
290  ******************************************************************
291  *                                                                *
292  * The basic algorithms work for parameters 1 <= n <= M <= N/2.   *
293  * Otherwise parameters are re-defined in the set-up step and the *
294  * random number K is adapted before delivering.                  *
295  * For l = m-max(0,n-N+M) < 10  Inversion method hmdu is applied: *
296  * The random numbers are generated via modal down-up search,     *
297  * starting at the mode m. The cumulative probabilities           *
298  * are avoided by using the technique of chop-down.               *
299  * For l >= 10  the Patchwork Rejection method  hprs is employed: *
300  * The area below the histogram function f(x) in its              *
301  * body is rearranged by certain point reflections. Within a      *
302  * large center interval variates are sampled efficiently by      *
303  * rejection from uniform hats. Rectangular immediate acceptance  *
304  * regions speed up the generation. The remaining tails are       *
305  * covered by exponential functions.                              *
306  *                                                                *
307  ******************************************************************
308  *                                                                *
309  * FUNCTION :   - hprsc samples a random number from the          *
310  *                Hypergeometric distribution with parameters     *
311  *                N (number of red and black balls), M (number    *
312  *                of red balls) and n (number of trials)          *
313  *                valid for N >= 2, M,n <= N.                     *
314  * REFERENCE :  - H. Zechner (1994): Efficient sampling from      *
315  *                continuous and discrete unimodal distributions, *
316  *                Doctoral Dissertation, 156 pp., Technical       *
317  *                University Graz, Austria.                       *
318  * SUBPROGRAMS: - flogfak(k)  ... log(k!) with long integer k     *
319  *              - drand(seed) ... (0,1)-Uniform generator with    *
320  *                unsigned long integer *seed.                    *
321  *              - hmdu(seed,N,M,n) ... Hypergeometric generator   *
322  *                for l<10                                        *
323  *              - hprs(seed,N,M,n) ... Hypergeometric generator   *
324  *                for l>=10 with unsigned long integer *seed,     *
325  *                long integer  N , M , n.                        *
326  *                                                                *
327  ******************************************************************/
328 	int Nhalf, n_le_Nhalf, M_le_Nhalf, K;
329 
330 	Nhalf =  N / 2;
331 	n_le_Nhalf = (n <= Nhalf)  ?  n  :  N - n;
332 	M_le_Nhalf = (M <= Nhalf)  ?  M  :  N - M;
333 
334 	if ((n*M/N) < 10) {
335 		K = (n_le_Nhalf <= M_le_Nhalf)
336 			?  hmdu(N, M_le_Nhalf, n_le_Nhalf, randomGenerator)
337 			:  hmdu(N, n_le_Nhalf, M_le_Nhalf, randomGenerator);
338 	}
339 	else {
340 		K = (n_le_Nhalf <= M_le_Nhalf)
341 			?  hprs(N, M_le_Nhalf, n_le_Nhalf, randomGenerator)
342 			:  hprs(N, n_le_Nhalf, M_le_Nhalf, randomGenerator);
343 	}
344 
345 	if (n <= Nhalf) {
346 		return (M <= Nhalf)  ?      K  :  n - K;
347 	}
348 	else {
349 		return (M <= Nhalf)  ?  M - K  :  n - N + M + K;
350 	}
351 }
352 /**
353  * Returns the probability distribution function.
354  */
pdf(int k)355 public double pdf(int k) {
356 	return Arithmetic.binomial(my_s, k) * Arithmetic.binomial(my_N - my_s, my_n - k)
357 		/ Arithmetic.binomial(my_N, my_n);
358 }
359 /**
360  * Sets the parameters.
361  */
setState(int N, int s, int n)362 public void setState(int N, int s, int n) {
363 	this.my_N = N;
364 	this.my_s = s;
365 	this.my_n = n;
366 }
367 /**
368  * Returns a random number from the distribution.
369  */
staticNextInt(int N, int M, int n)370 public static double staticNextInt(int N, int M, int n) {
371 	synchronized (shared) {
372 		return shared.nextInt(N,M,n);
373 	}
374 }
375 /**
376  * Returns a String representation of the receiver.
377  */
toString()378 public String toString() {
379 	return this.getClass().getName()+"("+my_N+","+my_s+","+my_n+")";
380 }
381 /**
382  * Sets the uniform random number generated shared by all <b>static</b> methods.
383  * @param randomGenerator the new uniform random number generator to be shared.
384  */
xstaticSetRandomGenerator(RandomEngine randomGenerator)385 private static void xstaticSetRandomGenerator(RandomEngine randomGenerator) {
386 	synchronized (shared) {
387 		shared.setRandomGenerator(randomGenerator);
388 	}
389 }
390 }
391