1 /*************************** wnchyppr.cpp **********************************
2 * Author:        Agner Fog
3 * Date created:  2002-10-20
4 * Last modified: 2013-12-20
5 * Project:       stocc.zip
6 * Source URL:    www.agner.org/random
7 *
8 * Description:
9 * Calculation of univariate and multivariate Wallenius noncentral
10 * hypergeometric probability distribution.
11 *
12 * This file contains source code for the class CWalleniusNCHypergeometric
13 * and CMultiWalleniusNCHypergeometricMoments defined in stocc.h.
14 *
15 * Documentation:
16 * ==============
17 * The file stocc.h contains class definitions.
18 * The file nchyp.pdf, available from www.agner.org/random/theory
19 * describes the theory of the calculation methods.
20 * The file ran-instructions.pdf contains further documentation and
21 * instructions.
22 *
23 * Copyright 2002-2008 by Agner Fog.
24 * GNU General Public License http://www.gnu.org/licenses/gpl.html
25 *****************************************************************************/
26 
27 #include <string.h>                    // memcpy function
28 #include "stocc.h"                     // class definition
29 #include "erfres.cpp"                  // table of error function residues (Don't precompile this as a header file)
30 
31 /***********************************************************************
32 constants
33 ***********************************************************************/
34 static const double LN2 = 0.693147180559945309417; // log(2)
35 
36 
37 /***********************************************************************
38 Log and Exp functions with special care for small x
39 ***********************************************************************/
40 // These are functions that involve expressions of the types log(1+x)
41 // and exp(x)-1. These functions need special care when x is small to
42 // avoid loss of precision. There are three versions of these functions:
43 // (1) Assembly version in library randomaXX.lib
44 // (2) Use library functions log1p and expm1 if available
45 // (3) Use Taylor expansion if none of the above are available
46 
47 #if defined(__GNUC__) || defined (__INTEL_COMPILER) || defined(HAVE_EXPM1)
48 // Functions log1p(x) = log(1+x) and expm1(x) = exp(x)-1 are available
49 // in the math libraries of Gnu and Intel compilers
50 // and in R.DLL (www.r-project.org).
51 
pow2_1(double q,double * y0=0)52 double pow2_1(double q, double * y0 = 0) {
53    // calculate 2^q and (1-2^q) without loss of precision.
54    // return value is (1-2^q). 2^q is returned in *y0
55    double y, y1;
56    q *= LN2;
57    if (fabs(q) > 0.1) {
58       y = exp(q);                      // 2^q
59       y1 = 1. - y;                     // 1-2^q
60    }
61    else { // Use expm1
62       y1 = expm1(q);                   // 2^q-1
63       y = y1 + 1;                      // 2^q
64       y1 = -y1;                        // 1-2^q
65    }
66    if (y0) *y0 = y;                    // Return y if not void pointer
67    return y1;                          // Return y1
68 }
69 
log1mx(double x,double x1)70 double log1mx(double x, double x1) {
71    // Calculate log(1-x) without loss of precision when x is small.
72    // Parameter x1 must be = 1-x.
73    if (fabs(x) > 0.03) {
74       return log(x1);
75    }
76    else { // use log1p(x) = log(1+x)
77       return log1p(-x);
78    }
79 }
80 
log1pow(double q,double x)81 double log1pow(double q, double x) {
82    // calculate log((1-e^q)^x) without loss of precision.
83    // Combines the methods of the above two functions.
84    double y, y1;
85 
86    if (fabs(q) > 0.1) {
87       y = exp(q);                      // e^q
88       y1 = 1. - y;                     // 1-e^q
89    }
90    else { // Use expm1
91       y1 = expm1(q);                   // e^q-1
92       y = y1 + 1;                      // e^q
93       y1 = -y1;                        // 1-e^q
94    }
95 
96    if (y > 0.1) { // (1-y)^x calculated without problem
97       return x * log(y1);
98    }
99    else { // Use log1p
100       return x * log1p(-y);
101    }
102 }
103 
104 #else
105 // (3)
106 // Functions log1p and expm1 are not available in MS and Borland compiler
107 // libraries. Use explicit Taylor expansion when needed.
108 
pow2_1(double q,double * y0=0)109 double pow2_1(double q, double * y0 = 0) {
110    // calculate 2^q and (1-2^q) without loss of precision.
111    // return value is (1-2^q). 2^q is returned in *y0
112    double y, y1, y2, qn, i, ifac;
113    q *= LN2;
114    if (fabs(q) > 0.1) {
115       y = exp(q);
116       y1 = 1. - y;
117    }
118    else { // expand 1-e^q = -summa(q^n/n!) to avoid loss of precision
119       y1 = 0;  qn = i = ifac = 1;
120       do {
121          y2 = y1;
122          qn *= q;  ifac *= i++;
123          y1 -= qn / ifac;
124       }
125       while (y1 != y2);
126       y = 1.-y1;
127    }
128    if (y0) *y0 = y;
129    return y1;
130 }
131 
log1mx(double x,double x1)132 double log1mx(double x, double x1) {
133    // Calculate log(1-x) without loss of precision when x is small.
134    // Parameter x1 must be = 1-x.
135    if (fabs(x) > 0.03) {
136       return log(x1);
137    }
138    else { // expand ln(1-x) = -summa(x^n/n)
139       double y, z1, z2, i;
140       y = i = 1.;  z1 = 0;
141       do {
142          z2 = z1;
143          y *= x;
144          z1 -= y / i++;
145       }
146       while (z1 != z2);
147       return z1;
148    }
149 }
150 
log1pow(double q,double x)151 double log1pow(double q, double x) {
152    // calculate log((1-e^q)^x) without loss of precision
153    // Uses various Taylor expansions to avoid loss of precision
154    double y, y1, y2, z1, z2, qn, i, ifac;
155 
156    if (fabs(q) > 0.1) {
157       y = exp(q);  y1 = 1. - y;
158    }
159    else { // expand 1-e^q = -summa(q^n/n!) to avoid loss of precision
160       y1 = 0;  qn = i = ifac = 1;
161       do {
162          y2 = y1;
163          qn *= q;  ifac *= i++;
164          y1 -= qn / ifac;
165       }
166       while (y1 != y2);
167       y = 1. - y1;
168    }
169    if (y > 0.1) { // (1-y)^x calculated without problem
170       return x * log(y1);
171    }
172    else { // expand ln(1-y) = -summa(y^n/n)
173       y1 = i = 1.;  z1 = 0;
174       do {
175          z2 = z1;
176          y1 *= y;
177          z1 -= y1 / i++;
178       }
179       while (z1 != z2);
180       return x * z1;
181    }
182 }
183 
184 #endif
185 
186 /***********************************************************************
187 Other shared functions
188 ***********************************************************************/
189 
LnFacr(double x)190 double LnFacr(double x) {
191    // log factorial of non-integer x
192    int32_t ix = (int32_t)(x);
193    if (x == ix) return LnFac(ix);      // x is integer
194    double r, r2, D = 1., f;
195    static const double
196       C0 =  0.918938533204672722,      // ln(sqrt(2*pi))
197       C1 =  1./12.,
198       C3 = -1./360.,
199       C5 =  1./1260.,
200       C7 = -1./1680.;
201    if (x < 6.) {
202       if (x == 0 || x == 1) return 0;
203       while (x < 6) D *= ++x;
204    }
205    r  = 1. / x;  r2 = r*r;
206    f = (x + 0.5)*log(x) - x + C0 + r*(C1 + r2*(C3 + r2*(C5 + r2*C7)));
207    if (D != 1.) f -= log(D);
208    return f;
209 }
210 
211 
FallingFactorial(double a,double b)212 double FallingFactorial(double a, double b) {
213    // calculates ln(a*(a-1)*(a-2)* ... * (a-b+1))
214 
215    if (b < 30 && int(b) == b && a < 1E10) {
216       // direct calculation
217       double f = 1.;
218       for (int i = 0; i < b; i++) f *= a--;
219       return log(f);
220    }
221 
222    if (a > 100.*b && b > 1.) {
223       // combine Stirling formulas for a and (a-b) to avoid loss of precision
224       double ar = 1./a;
225       double cr = 1./(a-b);
226       // calculate -log(1-b/a) by Taylor expansion
227       double s = 0., lasts, n = 1., ba = b*ar, f = ba;
228       do {
229          lasts = s;
230          s += f/n;
231          f *= ba;
232          n++;
233       }
234       while (s != lasts);
235       return (a+0.5)*s + b*log(a-b) - b + (1./12.)*(ar-cr)
236          //- (1./360.)*(ar*ar*ar-cr*cr*cr)
237          ;
238    }
239    // use LnFacr function
240    return LnFacr(a)-LnFacr(a-b);
241 }
242 
Erf(double x)243 double Erf (double x) {
244    // Calculates the error function erf(x) as a series expansion or
245    // continued fraction expansion.
246    // This function may be available in math libraries as erf(x)
247    static const double rsqrtpi  = 0.564189583547756286948; // 1/sqrt(pi)
248    static const double rsqrtpi2 = 1.12837916709551257390;  // 2/sqrt(pi)
249    if (x < 0.) return -Erf(-x);
250    if (x > 6.) return 1.;
251    if (x < 2.4) {
252       // use series expansion
253       double term;                     // term in summation
254       double j21;                      // 2j+1
255       double sum = 0;                  // summation
256       double xx2 = x*x*2.;             // 2x^2
257       int j;
258       term = x;  j21 = 1.;
259       for (j=0; j<=50; j++) {          // summation loop
260          sum += term;
261          if (term <= 1.E-13) break;
262          j21 += 2.;
263          term *= xx2 / j21;
264       }
265       return exp(-x*x) * sum * rsqrtpi2;
266    }
267    else {
268       // use continued fraction expansion
269       double a, f;
270       int n = int(2.25f*x*x - 23.4f*x + 60.84f); // predict expansion degree
271       if (n < 1) n = 1;
272       a = 0.5 * n;  f = x;
273       for (; n > 0; n--) {             // continued fraction loop
274          f = x + a / f;
275          a -= 0.5;
276       }
277       return 1. - exp(-x*x) * rsqrtpi / f;
278    }
279 }
280 
281 
FloorLog2(float x)282 int32_t FloorLog2(float x) {
283    // This function calculates floor(log2(x)) for positive x.
284    // The return value is <= -127 for x <= 0.
285 
286    union UfloatInt {  // Union for extracting bits from a float
287       float   f;
288       int32_t i;
289       UfloatInt(float ff) {f = ff;}  // constructor
290    };
291 
292 #if defined(_M_IX86) || defined(__INTEL__) || defined(_M_X64) || defined(__IA64__) || defined(__POWERPC__)
293    // Running on a platform known to use IEEE-754 floating point format
294    //int32_t n = *(int32_t*)&x;
295    int32_t n = UfloatInt(x).i;
296    return (n >> 23) - 0x7F;
297 #else
298    // Check if floating point format is IEEE-754
299    static const UfloatInt check(1.0f);
300    if (check.i == 0x3F800000) {
301       // We have the standard IEEE floating point format
302       int32_t n = UfloatInt(x).i;
303       return (n >> 23) - 0x7F;
304    }
305    else {
306       // Unknown floating point format
307       if (x <= 0.f) return -127;
308       return (int32_t)floor(log(x)*(1./LN2));
309    }
310 #endif
311 }
312 
313 
NumSD(double accuracy)314 int NumSD (double accuracy) {
315    // Gives the length of the integration interval necessary to achieve
316    // the desired accuracy when integrating/summating a probability
317    // function, relative to the standard deviation
318    // Returns an integer approximation to 2*NormalDistrFractile(accuracy/2)
319    static const double fract[] = {
320       2.699796e-03, 4.652582e-04, 6.334248e-05, 6.795346e-06, 5.733031e-07,
321       3.797912e-08, 1.973175e-09, 8.032001e-11, 2.559625e-12, 6.381783e-14
322    };
323    int i;
324    for (i = 0; i < (int)(sizeof(fract)/sizeof(*fract)); i++) {
325       if (accuracy >= fract[i]) break;
326    }
327    return i + 6;
328 }
329 
330 
331 /***********************************************************************
332 Methods for class CWalleniusNCHypergeometric
333 ***********************************************************************/
334 
CWalleniusNCHypergeometric(int32_t n_,int32_t m_,int32_t N_,double odds_,double accuracy_)335 CWalleniusNCHypergeometric::CWalleniusNCHypergeometric(int32_t n_, int32_t m_, int32_t N_, double odds_, double accuracy_) {
336    // constructor
337    accuracy = accuracy_;
338    SetParameters(n_, m_, N_, odds_);
339 }
340 
341 
SetParameters(int32_t n_,int32_t m_,int32_t N_,double odds)342 void CWalleniusNCHypergeometric::SetParameters(int32_t n_, int32_t m_, int32_t N_, double odds) {
343    // change parameters
344    if (n_ < 0 || n_ > N_ || m_ < 0 || m_ > N_ || odds < 0) FatalError("Parameter out of range in CWalleniusNCHypergeometric");
345    n = n_; m = m_; N = N_; omega = odds;          // set parameters
346    xmin = m + n - N;  if (xmin < 0) xmin = 0;     // calculate xmin
347    xmax = n;  if (xmax > m) xmax = m;             // calculate xmax
348    xLastBico = xLastFindpars = -99;               // indicate last x is invalid
349    r = 1.;                                        // initialize
350 }
351 
352 
mean(void)353 double CWalleniusNCHypergeometric::mean(void) {
354    // find approximate mean
355    int iter;                            // number of iterations
356    double a, b;                         // temporaries in calculation of first guess
357    double mean, mean1;                  // iteration value of mean
358    double m1r, m2r;                     // 1/m, 1/m2
359    double e1, e2;                       // temporaries
360    double g;                            // function to find root of
361    double gd;                           // derivative of g
362    double omegar;                       // 1/omega
363 
364    if (omega == 1.) { // simple hypergeometric
365       return (double)(m)*n/N;
366    }
367 
368    if (omega == 0.) {
369       if (n > N-m) FatalError("Not enough items with nonzero weight in CWalleniusNCHypergeometric::mean");
370       return 0.;
371    }
372 
373    if (xmin == xmax) return xmin;
374 
375    // calculate Cornfield mean of Fisher noncentral hypergeometric distribution as first guess
376    a = (m+n)*omega + (N-m-n);
377    b = a*a - 4.*omega*(omega-1.)*m*n;
378    b = b > 0. ? sqrt(b) : 0.;
379    mean = (a-b)/(2.*(omega-1.));
380    if (mean < xmin) mean = xmin;
381    if (mean > xmax) mean = xmax;
382 
383    m1r = 1./m;  m2r = 1./(N-m);
384    iter = 0;
385 
386    if (omega > 1.) {
387       do { // Newton Raphson iteration
388          mean1 = mean;
389          e1 = 1.-(n-mean)*m2r;
390          if (e1 < 1E-14) {
391             e2 = 0.;     // avoid underflow
392          }
393          else {
394             e2 = pow(e1,omega-1.);
395          }
396          g = e2*e1 + (mean-m)*m1r;
397          gd = e2*omega*m2r + m1r;
398          mean -= g / gd;
399          if (mean < xmin) mean = xmin;
400          if (mean > xmax) mean = xmax;
401          if (++iter > 40) {
402             FatalError("Search for mean failed in function CWalleniusNCHypergeometric::mean");
403          }
404       }
405       while (fabs(mean1 - mean) > 2E-6);
406    }
407    else { // omega < 1
408       omegar = 1./omega;
409       do { // Newton Raphson iteration
410          mean1 = mean;
411          e1 = 1.-mean*m1r;
412          if (e1 < 1E-14) {
413             e2 = 0.;   // avoid underflow
414          }
415          else {
416             e2 = pow(e1,omegar-1.);
417          }
418          g = 1.-(n-mean)*m2r-e2*e1;
419          gd = e2*omegar*m1r + m2r;
420          mean -= g / gd;
421          if (mean < xmin) mean = xmin;
422          if (mean > xmax) mean = xmax;
423          if (++iter > 40) {
424             FatalError("Search for mean failed in function CWalleniusNCHypergeometric::mean");
425          }
426       }
427       while (fabs(mean1 - mean) > 2E-6);
428    }
429    return mean;
430 }
431 
432 
variance(void)433 double CWalleniusNCHypergeometric::variance(void) {
434    // find approximate variance (poor approximation)
435    double my = mean(); // approximate mean
436    // find approximate variance from Fisher's noncentral hypergeometric approximation
437    double r1 = my * (m-my); double r2 = (n-my)*(my+N-n-m);
438    if (r1 <= 0. || r2 <= 0.) return 0.;
439    double var = N*r1*r2/((N-1)*(m*r2+(N-m)*r1));
440    if (var < 0.) var = 0.;
441    return var;
442 }
443 
444 
moments(double * mean_,double * var_)445 double CWalleniusNCHypergeometric::moments(double * mean_, double * var_) {
446    // calculate exact mean and variance
447    // return value = sum of f(x), expected = 1.
448    double y, sy=0, sxy=0, sxxy=0, me1;
449    int32_t x, xm, x1;
450    const double accuracy = 1E-10f;  // accuracy of calculation
451    xm = (int32_t)mean();  // approximation to mean
452    for (x = xm; x <= xmax; x++) {
453       y = probability(x);
454       x1 = x - xm;  // subtract approximate mean to avoid loss of precision in sums
455       sy += y; sxy += x1 * y; sxxy += x1 * x1 * y;
456       if (y < accuracy && x != xm) break;
457    }
458    for (x = xm-1; x >= xmin; x--) {
459       y = probability(x);
460       x1 = x - xm;  // subtract approximate mean to avoid loss of precision in sums
461       sy += y; sxy += x1 * y; sxxy += x1 * x1 * y;
462       if (y < accuracy) break;
463    }
464 
465    me1 = sxy / sy;
466    *mean_ = me1 + xm;
467    y = sxxy / sy - me1 * me1;
468    if (y < 0) y=0;
469    *var_ = y;
470    return sy;
471 }
472 
473 
mode(void)474 int32_t CWalleniusNCHypergeometric::mode(void) {
475    // find mode
476    int32_t Mode;                       // mode
477 
478    if (omega == 1.) {
479       // simple hypergeometric
480       int32_t L  = m + n - N;
481       int32_t m1 = m + 1, n1 = n + 1;
482       Mode = int32_t((double)m1*n1*omega/((m1+n1)*omega-L));
483    }
484    else {
485       // find mode
486       double f, f2 = 0.; // f2 = -1.;
487       int32_t xi, x2;
488       int32_t xmin = m + n - N;  if (xmin < 0) xmin = 0;  // calculate xmin
489       int32_t xmax = n;  if (xmax > m) xmax = m;          // calculate xmax
490 
491       Mode = (int32_t)mean();                             // floor(mean)
492       if (omega < 1.) {
493         if (Mode < xmax) Mode++;                        // ceil(mean)
494         x2 = xmin;                                      // lower limit
495         if (omega > 0.294 && N <= 10000000) {
496           x2 = Mode - 1;}                    // search for mode can be limited
497         for (xi = Mode; xi >= x2; xi--) {
498           f = probability(xi);
499           if (f <= f2) break;
500           Mode = xi;  f2 = f;
501         }
502       }
503       else {
504         if (Mode < xmin) Mode++;
505         x2 = xmax;                           // upper limit
506         if (omega < 3.4 && N <= 10000000) {
507           x2 = Mode + 1;}                    // search for mode can be limited
508         for (xi = Mode; xi <= x2; xi++) {
509           f = probability(xi);
510           if (f <= f2) break;
511           Mode = xi; f2 = f;
512         }
513       }
514    }
515    return Mode;
516 }
517 
518 
lnbico()519 double CWalleniusNCHypergeometric::lnbico() {
520    // natural log of binomial coefficients.
521    // returns lambda = log(m!*x!/(m-x)!*m2!*x2!/(m2-x2)!)
522    int32_t x2 = n-x, m2 = N-m;
523    if (xLastBico < 0) { // m, n, N have changed
524       mFac = LnFac(m) + LnFac(m2);
525    }
526    if (m < FAK_LEN && m2 < FAK_LEN)  goto DEFLT;
527    switch (x - xLastBico) {
528   case 0: // x unchanged
529      break;
530   case 1: // x incremented. calculate from previous value
531      xFac += log (double(x) * (m2-x2) / (double(x2+1)*(m-x+1)));
532      break;
533   case -1: // x decremented. calculate from previous value
534      xFac += log (double(x2) * (m-x) / (double(x+1)*(m2-x2+1)));
535      break;
536   default: DEFLT: // calculate all
537      xFac = LnFac(x) + LnFac(x2) + LnFac(m-x) + LnFac(m2-x2);
538    }
539    xLastBico = x;
540    return bico = mFac - xFac;
541 }
542 
543 
findpars()544 void CWalleniusNCHypergeometric::findpars() {
545    // calculate d, E, r, w
546    if (x == xLastFindpars) {
547       return;    // all values are unchanged since last call
548    }
549 
550    // find r to center peak of integrand at 0.5
551    double dd, d1, z, zd, rr, lastr, rrc, rt, r2, r21, a, b, dummy;
552    double oo[2];
553    double xx[2] = {static_cast<double>(x), static_cast<double>(n-x)};
554    int i, j = 0;
555    if (omega > 1.) { // make both omegas <= 1 to avoid overflow
556       oo[0] = 1.;  oo[1] = 1./omega;
557    }
558    else {
559       oo[0] = omega;  oo[1] = 1.;
560    }
561    dd = oo[0]*(m-x) + oo[1]*(N-m-xx[1]);
562    d1 = 1./dd;
563    E = (oo[0]*m + oo[1]*(N-m)) * d1;
564    rr = r;
565    if (rr <= d1) rr = 1.2*d1;           // initial guess
566    // Newton-Raphson iteration to find r
567    do {
568       lastr = rr;
569       rrc = 1. / rr;
570       z = dd - rrc;
571       zd = rrc * rrc;
572       for (i=0; i<2; i++) {
573          rt = rr * oo[i];
574          if (rt < 100.) {                  // avoid overflow if rt big
575             r21 = pow2_1(rt, &r2);         // r2=2^r, r21=1.-2^r
576             a = oo[i] / r21;               // omegai/(1.-2^r)
577             b = xx[i] * a;                 // x*omegai/(1.-2^r)
578             z  += b;
579             zd += b * a * LN2 * r2;
580          }
581       }
582       if (zd == 0) FatalError("can't find r in function CWalleniusNCHypergeometric::findpars");
583       rr -= z / zd;
584       if (rr <= d1) rr = lastr * 0.125 + d1*0.875;
585       if (++j == 70) FatalError("convergence problem searching for r in function CWalleniusNCHypergeometric::findpars");
586    }
587    while (fabs(rr-lastr) > rr * 1.E-6);
588    if (omega > 1) {
589       dd *= omega;  rr *= oo[1];
590    }
591    r = rr;  rd = rr * dd;
592 
593    // find peak width
594    double ro, k1, k2;
595    ro = r * omega;
596    if (ro < 300) {                      // avoid overflow
597       k1 = pow2_1(ro, &dummy);
598       k1 = -1. / k1;
599       k1 = omega*omega*(k1+k1*k1);
600    }
601    else k1 = 0.;
602    if (r < 300) {                       // avoid overflow
603       k2 = pow2_1(r, &dummy);
604       k2 = -1. / k2;
605       k2 = (k2+k2*k2);
606    }
607    else k2 = 0.;
608    phi2d = -4.*r*r*(x*k1 + (n-x)*k2);
609    if (phi2d >= 0.) {
610       FatalError("peak width undefined in function CWalleniusNCHypergeometric::findpars");
611       /* wr = r = 0.; */
612    }
613    else {
614       wr = sqrt(-phi2d); w = 1./wr;
615    }
616    xLastFindpars = x;
617 }
618 
619 
BernouilliH(int32_t x_,double h,double rh,StochasticLib1 * sto)620 int CWalleniusNCHypergeometric::BernouilliH(int32_t x_, double h, double rh, StochasticLib1 *sto) {
621    // This function generates a Bernouilli variate with probability proportional
622    // to the univariate Wallenius' noncentral hypergeometric distribution.
623    // The return value will be 1 with probability f(x_)/h and 0 with probability
624    // 1-f(x_)/h.
625    // This is equivalent to calling sto->Bernouilli(probability(x_)/h),
626    // but this method is faster. The method used here avoids calculating the
627    // Wallenius probability by sampling in the t-domain.
628    // rh is a uniform random number in the interval 0 <= rh < h. The function
629    // uses additional random numbers generated from sto.
630    // This function is intended for use in rejection methods for sampling from
631    // the Wallenius distribution. It is called from
632    // StochasticLib3::WalleniusNCHypRatioOfUnifoms in the file stoc3.cpp
633    double f0;                      // Lambda*Phi(.5)
634    double phideri0;                // phi(.5)/rd
635    double qi;                      // 2^(-r*omega[i])
636    double qi1;                     // 1-qi
637    double omegai[2] = {omega,1.};  // weights for each color
638    double romegi;                  // r*omega[i]
639    double xi[2] =                  // number of each color sampled
640        {static_cast<double>(x_), static_cast<double>(n-x_)};
641    double k;                       // adjusted width for majorizing function Ypsilon(t)
642    double erfk;                    // erf correction
643    double rdm1;                    // rd - 1
644    double G_integral;              // integral of majorizing function Ypsilon(t)
645    double ts;                      // t sampled from Ypsilon(t) distribution
646    double logts;                   // log(ts)
647    double rlogts;                  // r*log(ts)
648    double fts;                     // Phi(ts)/rd
649    double rgts;                    // 1/(Ypsilon(ts)/rd)
650    double t2;                      // temporary in calculation of Ypsilon(ts)
651    int i, j;                       // loop counters
652    static const double rsqrt8 = 0.3535533905932737622; // 1/sqrt(8)
653    static const double sqrt2pi = 2.506628274631000454; // sqrt(2*pi)
654 
655    x = x_;                         // save x in class object
656    lnbico();                       // calculate bico = log(Lambda)
657    findpars();                     // calculate r, d, rd, w, E
658    if (E > 0.) {
659       k = log(E);                  // correction for majorizing function
660       k = 1. + 0.0271 * (k * sqrt(k));
661    }
662    else k = 1.;
663    k *= w;                         // w * k
664    rdm1 = rd - 1.;
665 
666    // calculate phi(.5)/rd
667    phideri0 = -LN2 * rdm1;
668    for (i=0; i<2; i++) {
669       romegi = r * omegai[i];
670       if (romegi > 40.) {
671          qi=0.;  qi1 = 1.;           // avoid underflow
672       }
673       else {
674          qi1 = pow2_1(-romegi, &qi);
675       }
676       phideri0 += xi[i] * log1mx(qi, qi1);
677    }
678 
679    erfk = Erf(rsqrt8 / k);
680    f0 = rd * exp(phideri0 + bico);
681    G_integral = f0 * sqrt2pi * k * erfk;
682 
683    if (G_integral <= h) {          // G fits under h-hat
684       do {
685          ts = sto->Normal(0,k);      // sample ts from normal distribution
686       }
687       while (fabs(ts) >= 0.5);      // reject values outside interval, and avoid ts = 0
688       ts += 0.5;                    // ts = normal distributed in interval (0,1)
689 
690       for (fts=0., j=0; j<2; j++) { // calculate (Phi(ts)+Phi(1-ts))/2
691          logts = log(ts);  rlogts = r * logts; // (ts = 0 avoided above)
692          fts += exp(log1pow(rlogts*omega,xi[0]) + log1pow(rlogts,xi[1]) + rdm1*logts + bico);
693          ts = 1. - ts;
694       }
695       fts *= 0.5;
696 
697       t2 = (ts-0.5) / k;            // calculate 1/Ypsilon(ts)
698       rgts = exp(-(phideri0 + bico - 0.5 * t2*t2));
699       return rh < G_integral * fts * rgts;   // Bernouilli variate
700    }
701 
702    else { // G > h: can't use sampling in t-domain
703       return rh < probability(x);
704    }
705 }
706 
707 
708 /***********************************************************************
709 methods for calculating probability in class CWalleniusNCHypergeometric
710 ***********************************************************************/
711 
recursive()712 double CWalleniusNCHypergeometric::recursive() {
713    // recursive calculation
714    // Wallenius noncentral hypergeometric distribution by recursion formula
715    // Approximate by ignoring probabilities < accuracy and minimize storage requirement
716    const int BUFSIZE = 512;            // buffer size
717    double p[BUFSIZE+2];                // probabilities
718    double * p1, * p2;                  // offset into p
719    double mxo;                         // (m-x)*omega
720    double Nmnx;                        // N-m-nu+x
721    double y, y1;                       // save old p[x] before it is overwritten
722    double d1, d2, dcom;                // divisors in probability formula
723    double accuracya;                   // absolute accuracy
724    int32_t xi, nu;                     // xi, nu = recursion values of x, n
725    int32_t x1, x2;                     // xi_min, xi_max
726 
727    accuracya = 0.005f * accuracy;      // absolute accuracy
728    p1 = p2 = p + 1;                    // make space for p1[-1]
729    p1[-1] = 0.;  p1[0]  = 1.;          // initialize for recursion
730    x1 = x2 = 0;
731    for (nu=1; nu<=n; nu++) {
732       if (n - nu < x - x1 || p1[x1] < accuracya) {
733          x1++;                        // increase lower limit when breakpoint passed or probability negligible
734          p2--;                        // compensate buffer offset in order to reduce storage space
735       }
736       if (x2 < x && p1[x2] >= accuracya) {
737          x2++;  y1 = 0.;               // increase upper limit until x has been reached
738       }
739       else {
740          y1 = p1[x2];
741       }
742       if (x1 > x2) return 0.;
743       if (p2+x2-p > BUFSIZE) FatalError("buffer overrun in function CWalleniusNCHypergeometric::recursive");
744 
745       mxo = (m-x2)*omega;
746       Nmnx = N-m-nu+x2+1;
747       for (xi = x2; xi >= x1; xi--) {  // backwards loop
748          d2 = mxo + Nmnx;
749          mxo += omega; Nmnx--;
750          d1 = mxo + Nmnx;
751          dcom = 1. / (d1 * d2);        // save a division by making common divisor
752          y  = p1[xi-1]*mxo*d2*dcom + y1*(Nmnx+1)*d1*dcom;
753          y1 = p1[xi-1];                // (warning: pointer alias, can't swap instruction order)
754          p2[xi] = y;
755       }
756       p1 = p2;
757    }
758 
759    if (x < x1 || x > x2) return 0.;
760    return p1[x];
761 }
762 
763 
binoexpand()764 double CWalleniusNCHypergeometric::binoexpand() {
765    // calculate by binomial expansion of integrand
766    // only for x < 2 or n-x < 2 (not implemented for higher x because of loss of precision)
767    int32_t x1, m1, m2;
768    double o;
769    if (x > n/2) { // invert
770       x1 = n-x; m1 = N-m; m2 = m; o = 1./omega;
771    }
772    else {
773       x1 = x; m1 = m; m2 = N-m; o = omega;
774    }
775    if (x1 == 0) {
776       return exp(FallingFactorial(m2,n) - FallingFactorial(m2+o*m1,n));
777    }
778    if (x1 == 1) {
779       double d, e, q, q0, q1;
780       q = FallingFactorial(m2,n-1);
781       e = o*m1+m2;
782       q1 = q - FallingFactorial(e,n);
783       e -= o;
784       q0 = q - FallingFactorial(e,n);
785       d = e - (n-1);
786       return m1*d*(exp(q0) - exp(q1));
787    }
788 
789    FatalError("x > 1 not supported by function CWalleniusNCHypergeometric::binoexpand");
790    return 0;
791 }
792 
793 
laplace()794 double CWalleniusNCHypergeometric::laplace() {
795    // Laplace's method with narrow integration interval,
796    // using error function residues table, defined in erfres.cpp
797    // Note that this function can only be used when the integrand peak is narrow.
798    // findpars() must be called before this function.
799 
800    const int COLORS = 2;         // number of colors
801    const int MAXDEG = 40;        // arraysize, maximum expansion degree
802    int degree;                   // max expansion degree
803    double accur;                 // stop expansion when terms below this threshold
804    double omegai[COLORS] = {omega, 1.}; // weights for each color
805    double xi[COLORS] =           // number of each color sampled
806        {static_cast<double>(x), static_cast<double>(n-x)};
807    double f0;                    // factor outside integral
808    double rho[COLORS];           // r*omegai
809    double qi;                    // 2^(-rho)
810    double qi1;                   // 1-qi
811    double qq[COLORS];            // qi / qi1
812    double eta[COLORS+1][MAXDEG+1]; // eta coefficients
813    double phideri[MAXDEG+1];     // derivatives of phi
814    double PSIderi[MAXDEG+1];     // derivatives of PSI
815    double * erfresp;             // pointer to table of error function residues
816 
817    // variables in asymptotic summation
818    static const double sqrt8  = 2.828427124746190098; // sqrt(8)
819    double qqpow;                 // qq^j
820    double pow2k;                 // 2^k
821    double bino;                  // binomial coefficient
822    double vr;                    // 1/v, v = integration interval
823    double v2m2;                  // (2*v)^(-2)
824    double v2mk1;                 // (2*v)^(-k-1)
825    double s;                     // summation term
826    double sum;                   // Taylor sum
827 
828    int i;                        // loop counter for color
829    int j;                        // loop counter for derivative
830    int k;                        // loop counter for expansion degree
831    int ll;                       // k/2
832    int converg = 0;              // number of consequtive terms below accuracy
833    int PrecisionIndex;           // index into ErfRes table according to desired precision
834 
835    // initialize
836    for (k = 0; k <= 2; k++)  phideri[k] = PSIderi[k] = 0;
837 
838    // find rho[i], qq[i], first eta coefficients, and zero'th derivative of phi
839    for (i = 0; i < COLORS; i++) {
840       rho[i] = r * omegai[i];
841       if (rho[i] > 40.) {
842          qi=0.;  qi1 = 1.;}                 // avoid underflow
843       else {
844          qi1 = pow2_1(-rho[i], &qi);}       // qi=2^(-rho), qi1=1.-2^(-rho)
845       qq[i] = qi / qi1;                     // 2^(-r*omegai)/(1.-2^(-r*omegai))
846       // peak = zero'th derivative
847       phideri[0] += xi[i] * log1mx(qi, qi1);
848       // eta coefficients
849       eta[i][0] = 0.;
850       eta[i][1] = eta[i][2] = rho[i]*rho[i];
851    }
852 
853    // r, rd, and w must be calculated by findpars()
854    // zero'th derivative
855    phideri[0] -= (rd - 1.) * LN2;
856    // scaled factor outside integral
857    f0 = rd * exp(phideri[0] + lnbico());
858 
859    vr = sqrt8 * w;
860    phideri[2] = phi2d;
861 
862    // get table according to desired precision
863    PrecisionIndex = (-FloorLog2((float)accuracy) - ERFRES_B + ERFRES_S - 1) / ERFRES_S;
864    if (PrecisionIndex < 0) PrecisionIndex = 0;
865    if (PrecisionIndex > ERFRES_N-1) PrecisionIndex = ERFRES_N-1;
866    while (w * NumSDev[PrecisionIndex] > 0.3) {
867       // check if integration interval is too wide
868       if (PrecisionIndex == 0) {
869          FatalError("Laplace method failed. Peak width too high in function CWalleniusNCHypergeometric::laplace");
870          break;}
871       PrecisionIndex--;                     // reduce precision to keep integration interval narrow
872    }
873    erfresp = ErfRes[PrecisionIndex];        // choose desired table
874 
875    degree = MAXDEG;                         // max expansion degree
876    if (degree >= ERFRES_L*2) degree = ERFRES_L*2-2;
877 
878    // set up for starting loop at k=3
879    v2m2 = 0.25 * vr * vr;                   // (2*v)^(-2)
880    PSIderi[0] = 1.;
881    pow2k = 8.;
882    sum = 0.5 * vr * erfresp[0];
883    v2mk1 = 0.5 * vr * v2m2 * v2m2;
884    accur = accuracy * sum;
885 
886    // summation loop
887    for (k = 3; k <= degree; k++) {
888       phideri[k] = 0.;
889 
890       // loop for all (2) colors
891       for (i = 0; i < COLORS; i++) {
892          eta[i][k] = 0.;
893          // backward loop for all powers
894          for (j = k; j > 0; j--) {
895             // find coefficients recursively from previous coefficients
896             eta[i][j]  =  eta[i][j]*(j*rho[i]-(k-2)) +  eta[i][j-1]*rho[i]*(j-1);
897          }
898          qqpow = 1.;
899          // forward loop for all powers
900          for (j=1; j<=k; j++) {
901             qqpow *= qq[i];                 // qq^j
902             // contribution to derivative
903             phideri[k] += xi[i] * eta[i][j] * qqpow;
904          }
905       }
906 
907       // finish calculation of derivatives
908       phideri[k] = -pow2k*phideri[k] + 2*(1-k)*phideri[k-1];
909 
910       pow2k *= 2.;    // 2^k
911 
912       // loop to calculate derivatives of PSI from derivatives of psi.
913       // terms # 0, 1, 2, k-2, and k-1 are zero and not included in loop.
914       // The j'th derivatives of psi are identical to the derivatives of phi for j>2, and
915       // zero for j=1,2. Hence we are using phideri[j] for j>2 here.
916       PSIderi[k] = phideri[k];              // this is term # k
917       bino = 0.5 * (k-1) * (k-2);           // binomial coefficient for term # 3
918       for (j = 3; j < k-2; j++) {           // loop for remaining nonzero terms (if k>5)
919          PSIderi[k] += PSIderi[k-j] * phideri[j] * bino;
920          bino *= double(k-j)/double(j);
921       }
922       if ((k & 1) == 0) {                   // only for even k
923          ll = k/2;
924          s = PSIderi[k] * v2mk1 * erfresp[ll];
925          sum += s;
926 
927          // check for convergence of Taylor expansion
928          if (fabs(s) < accur) converg++; else converg = 0;
929          if (converg > 1) break;
930 
931          // update recursive expressions
932          v2mk1 *= v2m2;
933       }
934    }
935    // multiply by terms outside integral
936    return f0 * sum;
937 }
938 
939 
integrate()940 double CWalleniusNCHypergeometric::integrate() {
941    // Wallenius non-central hypergeometric distribution function
942    // calculation by numerical integration with variable-length steps
943    // NOTE: findpars() must be called before this function.
944    double s;                           // result of integration step
945    double sum;                         // integral
946    double ta, tb;                      // subinterval for integration step
947 
948    lnbico();                           // compute log of binomial coefficients
949 
950    // choose method:
951    if (w < 0.02 || (w < 0.1 && (x==m || n-x==N-m) && accuracy > 1E-6)) {
952       // normal method. Step length determined by peak width w
953       double delta, s1;
954       s1 = accuracy < 1E-9 ? 0.5 : 1.;
955       delta = s1 * w;                       // integration steplength
956       ta = 0.5 + 0.5 * delta;
957       sum = integrate_step(1.-ta, ta);      // first integration step around center peak
958       do {
959          tb = ta + delta;
960          if (tb > 1.) tb = 1.;
961          s  = integrate_step(ta, tb);       // integration step to the right of peak
962          s += integrate_step(1.-tb,1.-ta);  // integration step to the left of peak
963          sum += s;
964          if (s < accuracy * sum) break;     // stop before interval finished if accuracy reached
965          ta = tb;
966          if (tb > 0.5 + w) delta *= 2.;     // increase step length far from peak
967       }
968       while (tb < 1.);
969    }
970    else {
971       // difficult situation. Step length determined by inflection points
972       double t1, t2, tinf, delta, delta1;
973       sum = 0.;
974       // do left and right half of integration interval separately:
975       for (t1=0., t2=0.5; t1 < 1.; t1+=0.5, t2+=0.5) {
976          // integrate from 0 to 0.5 or from 0.5 to 1
977          tinf = search_inflect(t1, t2);     // find inflection point
978          delta = tinf - t1; if (delta > t2 - tinf) delta = t2 - tinf; // distance to nearest endpoint
979          delta *= 1./7.;                    // 1/7 will give 3 steps to nearest endpoint
980          if (delta < 1E-4) delta = 1E-4;
981          delta1 = delta;
982          // integrate from tinf forwards to t2
983          ta = tinf;
984          do {
985             tb = ta + delta1;
986             if (tb > t2 - 0.25*delta1) tb = t2; // last step of this subinterval
987             s = integrate_step(ta, tb);         // integration step
988             sum += s;
989             delta1 *= 2;                        // double steplength
990             if (s < sum * 1E-4) delta1 *= 8.;   // large step when s small
991             ta = tb;
992          }
993          while (tb < t2);
994          if (tinf) {
995             // integrate from tinf backwards to t1
996             tb = tinf;
997             do {
998                ta = tb - delta;
999                if (ta < t1 + 0.25*delta) ta = t1; // last step of this subinterval
1000                s = integrate_step(ta, tb);        // integration step
1001                sum += s;
1002                delta *= 2;                        // double steplength
1003                if (s < sum * 1E-4) delta *= 8.;   // large step when s small
1004                tb = ta;}
1005             while (ta > t1);
1006          }
1007       }
1008    }
1009    return sum * rd;
1010 }
1011 
1012 
integrate_step(double ta,double tb)1013 double CWalleniusNCHypergeometric::integrate_step(double ta, double tb) {
1014    // integration subprocedure used by integrate()
1015    // makes one integration step from ta to tb using Gauss-Legendre method.
1016    // result is scaled by multiplication with exp(bico)
1017    double ab, delta, tau, ltau, y, sum, taur, rdm1;
1018    int i;
1019 
1020    // define constants for Gauss-Legendre integration with IPOINTS points
1021 #define IPOINTS  8  // number of points in each integration step
1022 
1023 #if   IPOINTS == 3
1024    static const double xval[3]    = {-.774596669241,0,0.774596668241};
1025    static const double weights[3] = {.5555555555555555,.88888888888888888,.55555555555555};
1026 #elif IPOINTS == 4
1027    static const double xval[4]    = {-0.861136311594,-0.339981043585,0.339981043585,0.861136311594},
1028       static const double weights[4] = {0.347854845137,0.652145154863,0.652145154863,0.347854845137};
1029 #elif IPOINTS == 5
1030    static const double xval[5]    = {-0.906179845939,-0.538469310106,0,0.538469310106,0.906179845939};
1031    static const double weights[5] = {0.236926885056,0.478628670499,0.568888888889,0.478628670499,0.236926885056};
1032 #elif IPOINTS == 6
1033    static const double xval[6]    = {-0.932469514203,-0.661209386466,-0.238619186083,0.238619186083,0.661209386466,0.932469514203};
1034    static const double weights[6] = {0.171324492379,0.360761573048,0.467913934573,0.467913934573,0.360761573048,0.171324492379};
1035 #elif IPOINTS == 8
1036    static const double xval[8]    = {-0.960289856498,-0.796666477414,-0.525532409916,-0.183434642496,0.183434642496,0.525532409916,0.796666477414,0.960289856498};
1037    static const double weights[8] = {0.10122853629,0.222381034453,0.313706645878,0.362683783378,0.362683783378,0.313706645878,0.222381034453,0.10122853629};
1038 #elif IPOINTS == 12
1039    static const double xval[12]   = {-0.981560634247,-0.90411725637,-0.769902674194,-0.587317954287,-0.367831498998,-0.125233408511,0.125233408511,0.367831498998,0.587317954287,0.769902674194,0.90411725637,0.981560634247};
1040    static const double weights[12]= {0.0471753363866,0.106939325995,0.160078328543,0.203167426723,0.233492536538,0.249147045813,0.249147045813,0.233492536538,0.203167426723,0.160078328543,0.106939325995,0.0471753363866};
1041 #elif IPOINTS == 16
1042    static const double xval[16]   = {-0.989400934992,-0.944575023073,-0.865631202388,-0.755404408355,-0.617876244403,-0.458016777657,-0.281603550779,-0.0950125098376,0.0950125098376,0.281603550779,0.458016777657,0.617876244403,0.755404408355,0.865631202388,0.944575023073,0.989400934992};
1043    static const double weights[16]= {0.027152459411,0.0622535239372,0.0951585116838,0.124628971256,0.149595988817,0.169156519395,0.182603415045,0.189450610455,0.189450610455,0.182603415045,0.169156519395,0.149595988817,0.124628971256,0.0951585116838,0.0622535239372,0.027152459411};
1044 #else
1045 #error // IPOINTS must be a value for which the tables are defined
1046 #endif
1047 
1048    delta = 0.5 * (tb - ta);
1049    ab = 0.5 * (ta + tb);
1050    rdm1 = rd - 1.;
1051    sum = 0;
1052 
1053    for (i = 0; i < IPOINTS; i++) {
1054       tau = ab + delta * xval[i];
1055       ltau = log(tau);
1056       taur = r * ltau;
1057       // possible loss of precision due to subtraction here:
1058       y = log1pow(taur*omega,x) + log1pow(taur,n-x) + rdm1*ltau + bico;
1059       if (y > -50.) sum += weights[i] * exp(y);
1060    }
1061    return delta * sum;
1062 }
1063 
1064 
search_inflect(double t_from,double t_to)1065 double CWalleniusNCHypergeometric::search_inflect(double t_from, double t_to) {
1066    // search for an inflection point of the integrand PHI(t) in the interval
1067    // t_from < t < t_to
1068    const int COLORS = 2;                // number of colors
1069    double t, t1;                        // independent variable
1070    double rho[COLORS];                  // r*omega[i]
1071    double q;                            // t^rho[i] / (1-t^rho[i])
1072    double q1;                           // 1-t^rho[i]
1073    double xx[COLORS];                   // x[i]
1074    double zeta[COLORS][4][4];           // zeta[i,j,k] coefficients
1075    double phi[4];                       // derivatives of phi(t) = log PHI(t)
1076    double Z2;                           // PHI''(t)/PHI(t)
1077    double Zd;                           // derivative in Newton Raphson iteration
1078    double rdm1;                         // r * d - 1
1079    double tr;                           // 1/t
1080    double log2t;                        // log2(t)
1081    double method;                       // 0 for z2'(t) method, 1 for z3(t) method
1082    int i;                               // color
1083    int iter;                            // count iterations
1084 
1085    rdm1 = rd - 1.;
1086    if (t_from == 0 && rdm1 <= 1.) return 0.;//no inflection point
1087    rho[0] = r*omega;  rho[1] = r;
1088    xx[0] = x;  xx[1] = n - x;
1089    t = 0.5 * (t_from + t_to);
1090    for (i = 0; i < COLORS; i++) {           // calculate zeta coefficients
1091       zeta[i][1][1] = rho[i];
1092       zeta[i][1][2] = rho[i] * (rho[i] - 1.);
1093       zeta[i][2][2] = rho[i] * rho[i];
1094       zeta[i][1][3] = zeta[i][1][2] * (rho[i] - 2.);
1095       zeta[i][2][3] = zeta[i][1][2] * rho[i] * 3.;
1096       zeta[i][3][3] = zeta[i][2][2] * rho[i] * 2.;
1097    }
1098    iter = 0;
1099 
1100    do {
1101       t1 = t;
1102       tr = 1. / t;
1103       log2t = log(t)*(1./LN2);
1104       phi[1] = phi[2] = phi[3] = 0.;
1105       for (i=0; i<COLORS; i++) {            // calculate first 3 derivatives of phi(t)
1106          q1 = pow2_1(rho[i]*log2t,&q);
1107          q /= q1;
1108          phi[1] -= xx[i] * zeta[i][1][1] * q;
1109          phi[2] -= xx[i] * q * (zeta[i][1][2] + q * zeta[i][2][2]);
1110          phi[3] -= xx[i] * q * (zeta[i][1][3] + q * (zeta[i][2][3] + q * zeta[i][3][3]));
1111       }
1112       phi[1] += rdm1;
1113       phi[2] -= rdm1;
1114       phi[3] += 2. * rdm1;
1115       phi[1] *= tr;
1116       phi[2] *= tr * tr;
1117       phi[3] *= tr * tr * tr;
1118       method = (iter & 2) >> 1;        // alternate between the two methods
1119       Z2 = phi[1]*phi[1] + phi[2];
1120       Zd = method*phi[1]*phi[1]*phi[1] + (2.+method)*phi[1]*phi[2] + phi[3];
1121 
1122       if (t < 0.5) {
1123          if (Z2 > 0) {
1124             t_from = t;
1125          }
1126          else {
1127             t_to = t;
1128          }
1129          if (Zd >= 0) {
1130             // use binary search if Newton-Raphson iteration makes problems
1131             t = (t_from ? 0.5 : 0.2) * (t_from + t_to);
1132          }
1133          else {
1134             // Newton-Raphson iteration
1135             t -= Z2 / Zd;
1136          }
1137       }
1138       else {
1139          if (Z2 < 0) {
1140             t_from = t;
1141          }
1142          else {
1143             t_to = t;
1144          }
1145          if (Zd <= 0) {
1146             // use binary search if Newton-Raphson iteration makes problems
1147             t = 0.5 * (t_from + t_to);
1148          }
1149          else {
1150             // Newton-Raphson iteration
1151             t -= Z2 / Zd;
1152          }
1153       }
1154       if (t >= t_to) t = (t1 + t_to) * 0.5;
1155       if (t <= t_from) t = (t1 + t_from) * 0.5;
1156       if (++iter > 20) FatalError("Search for inflection point failed in function CWalleniusNCHypergeometric::search_inflect");
1157    }
1158    while (fabs(t - t1) > 1E-5);
1159    return t;
1160 }
1161 
1162 
probability(int32_t x_)1163 double CWalleniusNCHypergeometric::probability(int32_t x_) {
1164    // calculate probability function. choosing best method
1165    x = x_;
1166    if (x < xmin || x > xmax) return 0.;
1167    if (xmin == xmax) return 1.;
1168 
1169    if (omega == 1.) { // hypergeometric
1170       return exp(lnbico() + LnFac(n) + LnFac(N-n) - LnFac(N));
1171    }
1172 
1173    if (omega == 0.) {
1174       if (n > N-m) FatalError("Not enough items with nonzero weight in CWalleniusNCHypergeometric::probability");
1175       return x == 0;
1176    }
1177 
1178    int32_t x2 = n - x;
1179    int32_t x0 = x < x2 ? x : x2;
1180    int em = (x == m || x2 == N-m);
1181 
1182    if (x0 == 0 && n > 500) {
1183       return binoexpand();
1184    }
1185 
1186    if (double(n)*x0 < 1000 || (double(n)*x0 < 10000 && (N > 1000.*n || em))) {
1187       return recursive();
1188    }
1189 
1190    if (x0 <= 1 && N-n <= 1) {
1191       return binoexpand();
1192    }
1193 
1194    findpars();
1195 
1196    if (w < 0.04 && E < 10 && (!em || w > 0.004)) {
1197       return laplace();
1198    }
1199 
1200    return integrate();
1201 }
1202 
1203 
MakeTable(double * table,int32_t MaxLength,int32_t * xfirst,int32_t * xlast,double cutoff)1204 int32_t CWalleniusNCHypergeometric::MakeTable(double * table, int32_t MaxLength, int32_t * xfirst, int32_t * xlast, double cutoff) {
1205    // Makes a table of Wallenius noncentral hypergeometric probabilities
1206    // table must point to an array of length MaxLength.
1207    // The function returns 1 if table is long enough. Otherwise it fills
1208    // the table with as many correct values as possible and returns 0.
1209    // The tails are cut off where the values are < cutoff, so that
1210    // *xfirst may be > xmin and *xlast may be < xmax.
1211    // The value of cutoff will be 0.01 * accuracy if not specified.
1212    // The first and last x value represented in the table are returned in
1213    // *xfirst and *xlast. The resulting probability values are returned in
1214    // the first (*xfirst - *xlast + 1) positions of table. Any unused part
1215    // of table may be overwritten with garbage.
1216    //
1217    // The function will return the following information when MaxLength = 0:
1218    // The return value is the desired length of table.
1219    // *xfirst is 1 if it will be more efficient to call MakeTable than to call
1220    // probability repeatedly, even if only some of the table values are needed.
1221    // *xfirst is 0 if it is more efficient to call probability repeatedly.
1222 
1223    double * p1, * p2;                  // offset into p
1224    double mxo;                         // (m-x)*omega
1225    double Nmnx;                        // N-m-nu+x
1226    double y, y1;                       // probability. Save old p[x] before it is overwritten
1227    double d1, d2, dcom;                // divisors in probability formula
1228    double area;                        // estimate of area needed for recursion method
1229    int32_t xi, nu;                     // xi, nu = recursion values of x, n
1230    int32_t x1, x2;                     // lowest and highest x or xi
1231    int32_t i1, i2;                     // index into table
1232    int32_t UseTable;                   // 1 if table method used
1233    int32_t LengthNeeded;               // Necessary table length
1234 
1235    // special cases
1236    if (n == 0 || m == 0) {x1 = 0; goto DETERMINISTIC;}
1237    if (n == N)           {x1 = m; goto DETERMINISTIC;}
1238    if (m == N)           {x1 = n; goto DETERMINISTIC;}
1239    if (omega <= 0.) {
1240       if (n > N-m) FatalError("Not enough items with nonzero weight in  CWalleniusNCHypergeometric::MakeTable");
1241       x1 = 0;
1242       DETERMINISTIC:
1243       if (MaxLength == 0) {
1244          if (xfirst) *xfirst = 1;
1245          return 1;
1246       }
1247       *xfirst = *xlast = x1;
1248       *table = 1.;
1249       return 1;
1250    }
1251 
1252    if (cutoff <= 0. || cutoff > 0.1) cutoff = 0.01 * accuracy;
1253 
1254    LengthNeeded = N - m;               // m2
1255    if (m < LengthNeeded) LengthNeeded = m;
1256    if (n < LengthNeeded) LengthNeeded = n; // LengthNeeded = min(m1,m2,n)
1257    area = double(n)*LengthNeeded;      // Estimate calculation time for table method
1258    UseTable = area < 5000. || (area < 10000. && N > 1000. * n);
1259 
1260    if (MaxLength <= 0) {
1261       // Return UseTable and LengthNeeded
1262       if (xfirst) *xfirst = UseTable;
1263       i1 = LengthNeeded + 2;           // Necessary table length
1264       if (!UseTable && i1 > 200) {
1265          // Calculate necessary table length from standard deviation
1266          double sd = sqrt(variance()); // calculate approximate standard deviation
1267          // estimate number of standard deviations to include from normal distribution
1268          i2 = (int32_t)(NumSD(accuracy) * sd + 0.5);
1269          if (i1 > i2) i1 = i2;
1270       }
1271       return i1;
1272    }
1273 
1274    if (UseTable && MaxLength > LengthNeeded) {
1275       // use recursion table method
1276       p1 = p2 = table + 1;             // make space for p1[-1]
1277       p1[-1] = 0.;  p1[0] = 1.;        // initialize for recursion
1278       x1 = x2 = 0;
1279       for (nu = 1; nu <= n; nu++) {
1280          if (n - nu < xmin - x1 || p1[x1] < cutoff) {
1281             x1++;                      // increase lower limit when breakpoint passed or probability negligible
1282             p2--;                      // compensate buffer offset in order to reduce storage space
1283          }
1284          if (x2 < xmax && p1[x2] >= cutoff) {
1285             x2++;  y1 = 0.;            // increase upper limit until x has been reached
1286          }
1287          else {
1288             y1 = p1[x2];
1289          }
1290          if (p2 - table + x2 >= MaxLength || x1 > x2) {
1291             goto ONE_BY_ONE;           // Error: table length exceeded. Use other method
1292          }
1293 
1294          mxo = (m-x2)*omega;
1295          Nmnx = N-m-nu+x2+1;
1296          for (xi = x2; xi >= x1; xi--) { // backwards loop
1297             d2 = mxo + Nmnx;
1298             mxo += omega; Nmnx--;
1299             d1 = mxo + Nmnx;
1300             dcom = 1. / (d1 * d2);     // save a division by making common divisor
1301             y  = p1[xi-1]*mxo*d2*dcom + y1*(Nmnx+1)*d1*dcom;
1302             y1 = p1[xi-1];             // (warning: pointer alias, can't swap instruction order)
1303             p2[xi] = y;
1304          }
1305          p1 = p2;
1306       }
1307 
1308       // return results
1309       i1 = i2 = x2 - x1 + 1;              // desired table length
1310       if (i2 > MaxLength) i2 = MaxLength; // limit table length
1311       *xfirst = x1;  *xlast = x1 + i2 - 1;
1312       if (i2 > 0) memmove(table, table+1, i2*sizeof(table[0]));// copy to start of table
1313       return i1 == i2;                    // true if table size not reduced
1314    }
1315 
1316    else {
1317       // Recursion method would take too much time
1318       // Calculate values one by one
1319       ONE_BY_ONE:
1320 
1321       // Start to fill table from the end and down. start with x = floor(mean)
1322       x2 = (int32_t)mean();
1323       x1 = x2 + 1;  i1 = MaxLength;
1324       while (x1 > xmin) {              // loop for left tail
1325          x1--;  i1--;
1326          y = probability(x1);
1327          table[i1] = y;
1328          if (y < cutoff) break;
1329          if (i1 == 0) break;
1330       }
1331       *xfirst = x1;
1332       i2 = x2 - x1 + 1;
1333       if (i1 > 0 && i2 > 0) { // move numbers down to beginning of table
1334          memmove(table, table+i1, i2*sizeof(table[0]));
1335       }
1336       // Fill rest of table from mean and up
1337       i2--;
1338       while (x2 < xmax) {              // loop for right tail
1339          if (i2 == MaxLength-1) {
1340             *xlast = x2; return 0;     // table full
1341          }
1342          x2++;  i2++;
1343          y = probability(x2);
1344          table[i2] = y;
1345          if (y < cutoff) break;
1346       }
1347       *xlast = x2;
1348       return 1;
1349    }
1350 }
1351 
1352 
1353 /***********************************************************************
1354 calculation methods in class CMultiWalleniusNCHypergeometric
1355 ***********************************************************************/
1356 
CMultiWalleniusNCHypergeometric(int32_t n_,int32_t * m_,double * odds_,int colors_,double accuracy_)1357 CMultiWalleniusNCHypergeometric::CMultiWalleniusNCHypergeometric(int32_t n_, int32_t * m_, double * odds_, int colors_, double accuracy_) {
1358    // constructor
1359    accuracy = accuracy_;
1360    SetParameters(n_, m_, odds_, colors_);
1361 }
1362 
1363 
SetParameters(int32_t n_,int32_t * m_,double * odds_,int colors_)1364 void CMultiWalleniusNCHypergeometric::SetParameters(int32_t n_, int32_t * m_, double * odds_, int colors_) {
1365    // change parameters
1366    int32_t N1;
1367    int i;
1368    n = n_;  m = m_;  omega = odds_;  colors = colors_;
1369    r = 1.;
1370    for (N = N1 = 0, i = 0; i < colors; i++) {
1371       if (m[i] < 0 || omega[i] < 0) FatalError("Parameter negative in constructor for CMultiWalleniusNCHypergeometric");
1372       N += m[i];
1373       if (omega[i]) N1 += m[i];
1374    }
1375    if (N < n) FatalError("Not enough items in constructor for CMultiWalleniusNCHypergeometric");
1376    if (N1< n) FatalError("Not enough items with nonzero weight in constructor for CMultiWalleniusNCHypergeometric");
1377 }
1378 
1379 
mean(double * mu)1380 void CMultiWalleniusNCHypergeometric::mean(double * mu) {
1381    // calculate approximate mean of multivariate Wallenius noncentral hypergeometric
1382    // distribution. Result is returned in mu[0..colors-1]
1383    double omeg[MAXCOLORS];              // scaled weights
1384    double omr;                          // reciprocal mean weight
1385    double t, t1;                        // independent variable in iteration
1386    double To, To1;                      // exp(t*omega[i]), 1-exp(t*omega[i])
1387    double H;                            // function to find root of
1388    double HD;                           // derivative of H
1389    double dummy;                        // unused return
1390    int i;                               // color index
1391    int iter;                            // number of iterations
1392 
1393    if (n == 0) {
1394       // needs special case
1395       for (i = 0; i < colors; i++) {
1396          mu[i] = 0.;
1397       }
1398       return;
1399    }
1400 
1401    // calculate mean weight
1402    for (omr=0., i=0; i < colors; i++) omr += omega[i] * m[i];
1403    omr = N / omr;
1404    // scale weights to make mean = 1
1405    for (i = 0; i < colors; i++) omeg[i] = omega[i] * omr;
1406    // Newton Raphson iteration
1407    iter = 0;  t = -1.;                  // first guess
1408    do {
1409       t1 = t;
1410       H = HD = 0.;
1411       // calculate H and HD
1412       for (i = 0; i < colors; i++) {
1413          if (omeg[i] != 0.) {
1414             To1 = pow2_1(t * (1./LN2) * omeg[i], &To);
1415             H += m[i] * To1;
1416             HD -= m[i] * omeg[i] * To;
1417          }
1418       }
1419       t -= (H-n) / HD;
1420       if (t >= 0) t = 0.5 * t1;
1421       if (++iter > 20) {
1422          FatalError("Search for mean failed in function CMultiWalleniusNCHypergeometric::mean");
1423       }
1424    }
1425    while (fabs(H - n) > 1E-3);
1426    // finished iteration. Get all mu[i]
1427    for (i = 0; i < colors; i++) {
1428       if (omeg[i] != 0.) {
1429          To1 = pow2_1(t * (1./LN2) * omeg[i], &dummy);
1430          mu[i] = m[i] * To1;
1431       }
1432       else {
1433          mu[i] = 0.;
1434       }
1435    }
1436 }
1437 /*
1438 void CMultiWalleniusNCHypergeometric::variance(double * var, double * mean_) {
1439    // calculates approximate variance and mean of multivariate
1440    // Wallenius' noncentral hypergeometric distribution
1441    // (accuracy is not too good).
1442    // Variance is returned in variance[0..colors-1].
1443    // Mean is returned in mean_[0..colors-1] if not NULL.
1444    // The calculation is reasonably fast.
1445    double r1, r2;
1446    double mu[MAXCOLORS];
1447    int i;
1448 
1449    // Store mean in array mu if mean_ is NULL
1450    if (mean_ == 0) mean_ = mu;
1451 
1452    // Calculate mean
1453    mean(mean_);
1454 
1455    // Calculate variance
1456    for (i = 0; i < colors; i++) {
1457       r1 = mean_[i] * (m[i]-mean_[i]);
1458       r2 = (n-mean_[i])*(mean_[i]+N-n-m[i]);
1459       if (r1 <= 0. || r2 <= 0.) {
1460          var[i] = 0.;
1461       }
1462       else {
1463          var[i] = N*r1*r2/((N-1)*(m[i]*r2+(N-m[i])*r1));
1464       }
1465    }
1466 }
1467 */
1468 
1469 // implementations of different calculation methods
binoexpand(void)1470 double CMultiWalleniusNCHypergeometric::binoexpand(void) {
1471    // binomial expansion of integrand
1472    // only implemented for x[i] = 0 for all but one i
1473    int i, j, k;
1474    double W = 0.;                       // total weight
1475    for (i=j=k=0; i<colors; i++) {
1476       W += omega[i] * m[i];
1477       if (x[i]) {
1478          j=i; k++;                      // find the nonzero x[i]
1479       }
1480    }
1481    if (k > 1) FatalError("More than one x[i] nonzero in CMultiWalleniusNCHypergeometric::binoexpand");
1482    return exp(FallingFactorial(m[j],n) - FallingFactorial(W/omega[j],n));
1483 }
1484 
1485 
lnbico(void)1486 double CMultiWalleniusNCHypergeometric::lnbico(void) {
1487    // natural log of binomial coefficients
1488    bico = 0.;
1489    int i;
1490    for (i=0; i<colors; i++) {
1491       if (x[i] < m[i] && omega[i]) {
1492          bico += LnFac(m[i]) - LnFac(x[i]) - LnFac(m[i]-x[i]);
1493       }
1494    }
1495    return bico;
1496 }
1497 
1498 
findpars(void)1499 void CMultiWalleniusNCHypergeometric::findpars(void) {
1500    // calculate r, w, E
1501    // calculate d, E, r, w
1502 
1503    // find r to center peak of integrand at 0.5
1504    double dd;                           // scaled d
1505    double dr;                           // 1/d
1506 
1507    double z, zd, rr, lastr, rrc, rt, r2, r21, a, b, ro, k1, dummy;
1508    double omax;                         // highest omega
1509    double omaxr;                        // 1/omax
1510    double omeg[MAXCOLORS];              // scaled weights
1511    int i, j = 0;
1512 
1513    // find highest omega
1514    for (omax=0., i=0; i < colors; i++) {
1515       if (omega[i] > omax) omax = omega[i];
1516    }
1517    omaxr = 1. / omax;
1518    dd = E = 0.;
1519    for (i = 0; i < colors; i++) {
1520       // scale weights to make max = 1
1521       omeg[i] = omega[i] * omaxr;
1522       // calculate d and E
1523       dd += omeg[i] * (m[i]-x[i]);
1524       E  += omeg[i] * m[i];
1525    }
1526    dr = 1. / dd;
1527    E *= dr;
1528    rr = r * omax;
1529    if (rr <= dr) rr = 1.2 * dr;  // initial guess
1530    // Newton-Raphson iteration to find r
1531    do {
1532       lastr = rr;
1533       rrc = 1. / rr;
1534       z = dd - rrc;                    // z(r)
1535       zd = rrc * rrc;                  // z'(r)
1536       for (i=0; i<colors; i++) {
1537          rt = rr * omeg[i];
1538          if (rt < 100. && rt > 0.) {   // avoid overflow and division by 0
1539             r21 = pow2_1(rt, &r2);     // r2=2^r, r21=1.-2^r
1540             a = omeg[i] / r21;         // omegai/(1.-2^r)
1541             b = x[i] * a;              // x*omegai/(1.-2^r)
1542             z  += b;
1543             zd += b * a * r2 * LN2;
1544          }
1545       }
1546       if (zd == 0) FatalError("can't find r in function CMultiWalleniusNCHypergeometric::findpars");
1547       rr -= z / zd;                    // next r
1548       if (rr <= dr) rr = lastr * 0.125 + dr * 0.875;
1549       if (++j == 70) FatalError("convergence problem searching for r in function CMultiWalleniusNCHypergeometric::findpars");
1550    }
1551    while (fabs(rr-lastr) > rr * 1.E-5);
1552    rd = rr * dd;
1553    r = rr * omaxr;
1554 
1555    // find peak width
1556    phi2d = 0.;
1557    for (i=0; i<colors; i++) {
1558       ro = rr * omeg[i];
1559       if (ro < 300 && ro > 0.) {       // avoid overflow and division by 0
1560          k1 = pow2_1(ro, &dummy);
1561          k1 = -1. / k1;
1562          k1 = omeg[i] * omeg[i] * (k1 + k1*k1);
1563       }
1564       else k1 = 0.;
1565       phi2d += x[i] * k1;
1566    }
1567    phi2d *= -4. * rr * rr;
1568    if (phi2d > 0.) FatalError("peak width undefined in function CMultiWalleniusNCHypergeometric::findpars");
1569    wr = sqrt(-phi2d);  w = 1. / wr;
1570 }
1571 
1572 
laplace(void)1573 double CMultiWalleniusNCHypergeometric::laplace(void) {
1574    // Laplace's method with narrow integration interval,
1575    // using error function residues table, defined in erfres.cpp
1576    // Note that this function can only be used when the integrand peak is narrow.
1577    // findpars() must be called before this function.
1578 
1579    const int MAXDEG = 40;              // arraysize
1580    int degree;                         // max expansion degree
1581    double accur;                       // stop expansion when terms below this threshold
1582    double f0;                          // factor outside integral
1583    double rho[MAXCOLORS];              // r*omegai
1584    double qi;                          // 2^(-rho)
1585    double qi1;                         // 1-qi
1586    double qq[MAXCOLORS];               // qi / qi1
1587    double eta[MAXCOLORS+1][MAXDEG+1];  // eta coefficients
1588    double phideri[MAXDEG+1];           // derivatives of phi
1589    double PSIderi[MAXDEG+1];           // derivatives of PSI
1590    double * erfresp;                   // pointer to table of error function residues
1591 
1592    // variables in asymptotic summation
1593    static const double sqrt8  = 2.828427124746190098; // sqrt(8)
1594    double qqpow;                       // qq^j
1595    double pow2k;                       // 2^k
1596    double bino;                        // binomial coefficient
1597    double vr;                          // 1/v, v = integration interval
1598    double v2m2;                        // (2*v)^(-2)
1599    double v2mk1;                       // (2*v)^(-k-1)
1600    double s;                           // summation term
1601    double sum;                         // Taylor sum
1602 
1603    int i;                              // loop counter for color
1604    int j;                              // loop counter for derivative
1605    int k;                              // loop counter for expansion degree
1606    int ll;                             // k/2
1607    int converg = 0;                    // number of consequtive terms below accuracy
1608    int PrecisionIndex;                 // index into ErfRes table according to desired precision
1609 
1610    // initialize
1611    for (k = 0; k <= 2; k++)  phideri[k] = PSIderi[k] = 0;
1612 
1613    // find rho[i], qq[i], first eta coefficients, and zero'th derivative of phi
1614    for (i = 0; i < colors; i++) {
1615       rho[i] = r * omega[i];
1616       if (rho[i] == 0.) continue;
1617       if (rho[i] > 40.) {
1618          qi=0.;  qi1 = 1.;             // avoid underflow
1619       }
1620       else {
1621          qi1 = pow2_1(-rho[i], &qi);   // qi=2^(-rho), qi1=1.-2^(-rho)
1622       }
1623       qq[i] = qi / qi1;                // 2^(-r*omegai)/(1.-2^(-r*omegai))
1624       // peak = zero'th derivative
1625       phideri[0] += x[i] * log1mx(qi, qi1);
1626       // eta coefficients
1627       eta[i][0] = 0.;
1628       eta[i][1] = eta[i][2] = rho[i]*rho[i];
1629    }
1630 
1631    // d, r, and w must be calculated by findpars()
1632    // zero'th derivative
1633    phideri[0] -= (rd - 1.) * LN2;
1634    // scaled factor outside integral
1635    f0 = rd * exp(phideri[0] + lnbico());
1636    // calculate narrowed integration interval
1637    vr = sqrt8 * w;
1638    phideri[2] = phi2d;
1639 
1640    // get table according to desired precision
1641    PrecisionIndex = (-FloorLog2((float)accuracy) - ERFRES_B + ERFRES_S - 1) / ERFRES_S;
1642    if (PrecisionIndex < 0) PrecisionIndex = 0;
1643    if (PrecisionIndex > ERFRES_N-1) PrecisionIndex = ERFRES_N-1;
1644    while (w * NumSDev[PrecisionIndex] > 0.3) {
1645       // check if integration interval is too wide
1646       if (PrecisionIndex == 0) {
1647          FatalError("Laplace method failed. Peak width too high in function CWalleniusNCHypergeometric::laplace");
1648          break;
1649       }
1650       PrecisionIndex--;                // reduce precision to keep integration interval narrow
1651    }
1652    erfresp = ErfRes[PrecisionIndex];   // choose desired table
1653 
1654    degree = MAXDEG;                    // max expansion degree
1655    if (degree >= ERFRES_L*2) degree = ERFRES_L*2-2;
1656 
1657    // set up for starting loop at k=3
1658    v2m2 = 0.25 * vr * vr;              // (2*v)^(-2)
1659    PSIderi[0] = 1.;
1660    pow2k = 8.;
1661    sum = 0.5 * vr * erfresp[0];
1662    v2mk1 = 0.5 * vr * v2m2 * v2m2;
1663    accur = accuracy * sum;
1664 
1665    // summation loop
1666    for (k = 3; k <= degree; k++) {
1667       phideri[k] = 0.;
1668 
1669       // loop for all colors
1670       for (i = 0; i < colors; i++) {
1671          if (rho[i] == 0.) continue;
1672          eta[i][k] = 0.;
1673          // backward loop for all powers
1674          for (j = k; j > 0; j--) {
1675             // find coefficients recursively from previous coefficients
1676             eta[i][j]  =  eta[i][j]*(j*rho[i]-(k-2)) +  eta[i][j-1]*rho[i]*(j-1);
1677          }
1678          qqpow = 1.;
1679          // forward loop for all powers
1680          for (j = 1; j <= k; j++) {
1681             qqpow *= qq[i];   // qq^j
1682             // contribution to derivative
1683             phideri[k] += x[i] * eta[i][j] * qqpow;
1684          }
1685       }
1686 
1687       // finish calculation of derivatives
1688       phideri[k] = -pow2k * phideri[k] + 2*(1-k)*phideri[k-1];
1689 
1690       pow2k *= 2.;                     // 2^k
1691 
1692       // loop to calculate derivatives of PSI from derivatives of psi.
1693       // terms # 0, 1, 2, k-2, and k-1 are zero and not included in loop.
1694       // The j'th derivatives of psi are identical to the derivatives of phi for j>2, and
1695       // zero for j=1,2. Hence we are using phideri[j] for j>2 here.
1696       PSIderi[k] = phideri[k];         // this is term # k
1697       bino = 0.5 * (k-1) * (k-2);      // binomial coefficient for term # 3
1698       for (j=3; j < k-2; j++) { // loop for remaining nonzero terms (if k>5)
1699          PSIderi[k] += PSIderi[k-j] * phideri[j] * bino;
1700          bino *= double(k-j)/double(j);
1701       }
1702 
1703       if ((k & 1) == 0) { // only for even k
1704          ll = k/2;
1705          s = PSIderi[k] * v2mk1 * erfresp[ll];
1706          sum += s;
1707 
1708          // check for convergence of Taylor expansion
1709          if (fabs(s) < accur) converg++; else converg = 0;
1710          if (converg > 1) break;
1711 
1712          // update recursive expressions
1713          v2mk1 *= v2m2;
1714       }
1715    }
1716 
1717    // multiply by terms outside integral
1718    return f0 * sum;
1719 }
1720 
1721 
integrate(void)1722 double CMultiWalleniusNCHypergeometric::integrate(void) {
1723    // Wallenius non-central hypergeometric distribution function
1724    // calculation by numerical integration with variable-length steps
1725    // NOTE: findpars() must be called before this function.
1726    double s;                           // result of integration step
1727    double sum;                         // integral
1728    double ta, tb;                      // subinterval for integration step
1729 
1730    lnbico();                           // compute log of binomial coefficients
1731 
1732    // choose method:
1733    if (w < 0.02) {
1734       // normal method. Step length determined by peak width w
1735       double delta, s1;
1736       s1 = accuracy < 1E-9 ? 0.5 : 1.;
1737       delta = s1 * w;                            // integration steplength
1738       ta = 0.5 + 0.5 * delta;
1739       sum = integrate_step(1.-ta, ta);           // first integration step around center peak
1740       do {
1741          tb = ta + delta;
1742          if (tb > 1.) tb = 1.;
1743          s  = integrate_step(ta, tb);            // integration step to the right of peak
1744          s += integrate_step(1.-tb,1.-ta);       // integration step to the left of peak
1745          sum += s;
1746          if (s < accuracy * sum) break;          // stop before interval finished if accuracy reached
1747          ta = tb;
1748          if (tb > 0.5 + w) delta *= 2.;          // increase step length far from peak
1749       }
1750       while (tb < 1.);
1751    }
1752 
1753    else {
1754       // difficult situation. Step length determined by inflection points
1755       double t1, t2, tinf, delta, delta1;
1756       sum = 0.;
1757       // do left and right half of integration interval separately:
1758       for (t1=0., t2=0.5; t1 < 1.; t1+=0.5, t2+=0.5) {
1759          // integrate from 0 to 0.5 or from 0.5 to 1
1760          tinf = search_inflect(t1, t2);          // find inflection point
1761          delta = tinf - t1; if (delta > t2 - tinf) delta = t2 - tinf; // distance to nearest endpoint
1762          delta *= 1./7.;                         // 1/7 will give 3 steps to nearest endpoint
1763          if (delta < 1E-4) delta = 1E-4;
1764          delta1 = delta;
1765          // integrate from tinf forwards to t2
1766          ta = tinf;
1767          do {
1768             tb = ta + delta1;
1769             if (tb > t2 - 0.25*delta1) tb = t2;  // last step of this subinterval
1770             s = integrate_step(ta, tb);          // integration step
1771             sum += s;
1772             delta1 *= 2;                         // double steplength
1773             if (s < sum * 1E-4) delta1 *= 8.;    // large step when s small
1774             ta = tb;
1775          }
1776          while (tb < t2);
1777          if (tinf) {
1778             // integrate from tinf backwards to t1
1779             tb = tinf;
1780             do {
1781                ta = tb - delta;
1782                if (ta < t1 + 0.25*delta) ta = t1; // last step of this subinterval
1783                s = integrate_step(ta, tb);        // integration step
1784                sum += s;
1785                delta *= 2;                       // double steplength
1786                if (s < sum * 1E-4) delta *= 8.;  // large step when s small
1787                tb = ta;
1788             }
1789             while (ta > t1);
1790          }
1791       }
1792    }
1793    return sum * rd;
1794 }
1795 
1796 
integrate_step(double ta,double tb)1797 double CMultiWalleniusNCHypergeometric::integrate_step(double ta, double tb) {
1798    // integration subprocedure used by integrate()
1799    // makes one integration step from ta to tb using Gauss-Legendre method.
1800    // result is scaled by multiplication with exp(bico)
1801    double ab, delta, tau, ltau, y, sum, taur, rdm1;
1802    int i, j;
1803 
1804    // define constants for Gauss-Legendre integration with IPOINTS points
1805 #define IPOINTS  8  // number of points in each integration step
1806 
1807 #if   IPOINTS == 3
1808    static const double xval[3]    = {-.774596669241,0,0.774596668241};
1809    static const double weights[3] = {.5555555555555555,.88888888888888888,.55555555555555};
1810 #elif IPOINTS == 4
1811    static const double xval[4]    = {-0.861136311594,-0.339981043585,0.339981043585,0.861136311594},
1812       static const double weights[4] = {0.347854845137,0.652145154863,0.652145154863,0.347854845137};
1813 #elif IPOINTS == 5
1814    static const double xval[5]    = {-0.906179845939,-0.538469310106,0,0.538469310106,0.906179845939};
1815    static const double weights[5] = {0.236926885056,0.478628670499,0.568888888889,0.478628670499,0.236926885056};
1816 #elif IPOINTS == 6
1817    static const double xval[6]    = {-0.932469514203,-0.661209386466,-0.238619186083,0.238619186083,0.661209386466,0.932469514203};
1818    static const double weights[6] = {0.171324492379,0.360761573048,0.467913934573,0.467913934573,0.360761573048,0.171324492379};
1819 #elif IPOINTS == 8
1820    static const double xval[8]    = {-0.960289856498,-0.796666477414,-0.525532409916,-0.183434642496,0.183434642496,0.525532409916,0.796666477414,0.960289856498};
1821    static const double weights[8] = {0.10122853629,0.222381034453,0.313706645878,0.362683783378,0.362683783378,0.313706645878,0.222381034453,0.10122853629};
1822 #elif IPOINTS == 12
1823    static const double xval[12]   = {-0.981560634247,-0.90411725637,-0.769902674194,-0.587317954287,-0.367831498998,-0.125233408511,0.125233408511,0.367831498998,0.587317954287,0.769902674194,0.90411725637,0.981560634247};
1824    static const double weights[12]= {0.0471753363866,0.106939325995,0.160078328543,0.203167426723,0.233492536538,0.249147045813,0.249147045813,0.233492536538,0.203167426723,0.160078328543,0.106939325995,0.0471753363866};
1825 #elif IPOINTS == 16
1826    static const double xval[16]   = {-0.989400934992,-0.944575023073,-0.865631202388,-0.755404408355,-0.617876244403,-0.458016777657,-0.281603550779,-0.0950125098376,0.0950125098376,0.281603550779,0.458016777657,0.617876244403,0.755404408355,0.865631202388,0.944575023073,0.989400934992};
1827    static const double weights[16]= {0.027152459411,0.0622535239372,0.0951585116838,0.124628971256,0.149595988817,0.169156519395,0.182603415045,0.189450610455,0.189450610455,0.182603415045,0.169156519395,0.149595988817,0.124628971256,0.0951585116838,0.0622535239372,0.027152459411};
1828 #else
1829 #error // IPOINTS must be a value for which the tables are defined
1830 #endif
1831 
1832    delta = 0.5 * (tb - ta);
1833    ab = 0.5 * (ta + tb);
1834    rdm1 = rd - 1.;
1835    sum = 0;
1836 
1837    for (j = 0; j < IPOINTS; j++) {
1838       tau = ab + delta * xval[j];
1839       ltau = log(tau);
1840       taur = r * ltau;
1841       y = 0.;
1842       for (i = 0; i < colors; i++) {
1843          // possible loss of precision due to subtraction here:
1844          if (omega[i]) {
1845             y += log1pow(taur*omega[i],x[i]);   // ln((1-e^taur*omegai)^xi)
1846          }
1847       }
1848       y += rdm1*ltau + bico;
1849       if (y > -50.) sum += weights[j] * exp(y);
1850    }
1851    return delta * sum;
1852 }
1853 
1854 
search_inflect(double t_from,double t_to)1855 double CMultiWalleniusNCHypergeometric::search_inflect(double t_from, double t_to) {
1856    // search for an inflection point of the integrand PHI(t) in the interval
1857    // t_from < t < t_to
1858    double t, t1;                       // independent variable
1859    double rho[MAXCOLORS];              // r*omega[i]
1860    double q;                           // t^rho[i] / (1-t^rho[i])
1861    double q1;                          // 1-t^rho[i]
1862    double zeta[MAXCOLORS][4][4];       // zeta[i,j,k] coefficients
1863    double phi[4];                      // derivatives of phi(t) = log PHI(t)
1864    double Z2;                          // PHI''(t)/PHI(t)
1865    double Zd;                          // derivative in Newton Raphson iteration
1866    double rdm1;                        // r * d - 1
1867    double tr;                          // 1/t
1868    double log2t;                       // log2(t)
1869    double method;                      // 0 for z2'(t) method, 1 for z3(t) method
1870    int i;                              // color
1871    int iter;                           // count iterations
1872 
1873    rdm1 = rd - 1.;
1874    if (t_from == 0 && rdm1 <= 1.) return 0.;     //no inflection point
1875    t = 0.5 * (t_from + t_to);
1876    for (i = 0; i < colors; i++) {                // calculate zeta coefficients
1877       rho[i] = r * omega[i];
1878       zeta[i][1][1] = rho[i];
1879       zeta[i][1][2] = rho[i] * (rho[i] - 1.);
1880       zeta[i][2][2] = rho[i] * rho[i];
1881       zeta[i][1][3] = zeta[i][1][2] * (rho[i] - 2.);
1882       zeta[i][2][3] = zeta[i][1][2] * rho[i] * 3.;
1883       zeta[i][3][3] = zeta[i][2][2] * rho[i] * 2.;
1884    }
1885    iter = 0;
1886 
1887    do {
1888       t1 = t;
1889       tr = 1. / t;
1890       log2t = log(t)*(1./LN2);
1891       phi[1] = phi[2] = phi[3] = 0.;
1892       for (i=0; i<colors; i++) {                 // calculate first 3 derivatives of phi(t)
1893          if (rho[i] == 0.) continue;
1894          q1 = pow2_1(rho[i]*log2t,&q);
1895          q /= q1;
1896          phi[1] -= x[i] * zeta[i][1][1] * q;
1897          phi[2] -= x[i] * q * (zeta[i][1][2] + q * zeta[i][2][2]);
1898          phi[3] -= x[i] * q * (zeta[i][1][3] + q * (zeta[i][2][3] + q * zeta[i][3][3]));
1899       }
1900       phi[1] += rdm1;
1901       phi[2] -= rdm1;
1902       phi[3] += 2. * rdm1;
1903       phi[1] *= tr;
1904       phi[2] *= tr * tr;
1905       phi[3] *= tr * tr * tr;
1906       method = (iter & 2) >> 1;                  // alternate between the two methods
1907       Z2 = phi[1]*phi[1] + phi[2];
1908       Zd = method*phi[1]*phi[1]*phi[1] + (2.+method)*phi[1]*phi[2] + phi[3];
1909 
1910       if (t < 0.5) {
1911          if (Z2 > 0) {
1912             t_from = t;
1913          }
1914          else {
1915             t_to = t;
1916          }
1917          if (Zd >= 0) {
1918             // use binary search if Newton-Raphson iteration makes problems
1919             t = (t_from ? 0.5 : 0.2) * (t_from + t_to);
1920          }
1921          else {
1922             // Newton-Raphson iteration
1923             t -= Z2 / Zd;
1924          }
1925       }
1926       else {
1927          if (Z2 < 0) {
1928             t_from = t;
1929          }
1930          else {
1931             t_to = t;
1932          }
1933          if (Zd <= 0) {
1934             // use binary search if Newton-Raphson iteration makes problems
1935             t = 0.5 * (t_from + t_to);
1936          }
1937          else {
1938             // Newton-Raphson iteration
1939             t -= Z2 / Zd;
1940          }
1941       }
1942       if (t >= t_to) t = (t1 + t_to) * 0.5;
1943       if (t <= t_from) t = (t1 + t_from) * 0.5;
1944       if (++iter > 20) FatalError("Search for inflection point failed in function CMultiWalleniusNCHypergeometric::search_inflect");
1945    }
1946    while (fabs(t - t1) > 1E-5);
1947    return t;
1948 }
1949 
1950 
probability(int32_t * x_)1951 double CMultiWalleniusNCHypergeometric::probability(int32_t * x_) {
1952    // calculate probability function. choosing best method
1953    int i, j, em;
1954    int central;
1955    int32_t xsum;
1956    x = x_;
1957 
1958    for (xsum = i = 0; i < colors; i++)  xsum += x[i];
1959    if (xsum != n) {
1960       FatalError("sum of x values not equal to n in function CMultiWalleniusNCHypergeometric::probability");
1961    }
1962 
1963    if (colors < 3) {
1964       if (colors <= 0) return 1.;
1965       if (colors == 1) return x[0] == m[0];
1966       // colors = 2
1967       if (omega[1] == 0.) return x[0] == m[0];
1968       return CWalleniusNCHypergeometric(n,m[0],N,omega[0]/omega[1],accuracy).probability(x[0]);
1969    }
1970 
1971    central = 1;
1972    for (i = j = em = 0; i < colors; i++) {
1973       if (x[i] > m[i] || x[i] < 0 || x[i] < n - N + m[i]) return 0.;
1974       if (x[i] > 0) j++;
1975       if (omega[i] == 0. && x[i]) return 0.;
1976       if (x[i] == m[i] || omega[i] == 0.) em++;
1977       if (i > 0 && omega[i] != omega[i-1]) central = 0;
1978    }
1979 
1980    if (n == 0 || em == colors) return 1.;
1981 
1982    if (central) {
1983       // All omega's are equal.
1984       // This is multivariate central hypergeometric distribution
1985       int32_t sx = n,  sm = N;
1986       double p = 1.;
1987       for (i = 0; i < colors - 1; i++) {
1988          // Use univariate hypergeometric (usedcolors-1) times
1989          p *= CWalleniusNCHypergeometric(sx, m[i], sm, 1.).probability(x[i]);
1990          sx -= x[i];  sm -= m[i];
1991       }
1992       return p;
1993    }
1994 
1995 
1996    if (j == 1) {
1997       return binoexpand();
1998    }
1999 
2000    findpars();
2001    if (w < 0.04 && E < 10 && (!em || w > 0.004)) {
2002       return laplace();
2003    }
2004 
2005    return integrate();
2006 }
2007 
2008 
2009 /***********************************************************************
2010 Methods for CMultiWalleniusNCHypergeometricMoments
2011 ***********************************************************************/
2012 
moments(double * mu,double * variance,int32_t * combinations)2013 double CMultiWalleniusNCHypergeometricMoments::moments(double * mu, double * variance, int32_t * combinations) {
2014    // calculates mean and variance of multivariate Wallenius noncentral
2015    // hypergeometric distribution by calculating all combinations of x-values.
2016    // Return value = sum of all probabilities. The deviation of this value
2017    // from 1 is a measure of the accuracy.
2018    // Returns the mean to mean[0...colors-1]
2019    // Returns the variance to variance[0...colors-1]
2020    double sumf;                        // sum of all f(x) values
2021    int32_t msum;                       // temporary sum
2022    int i;                              // loop counter
2023 
2024    // get approximate mean
2025    mean(sx);
2026    // round mean to integers
2027    for (i=0; i < colors; i++) {
2028       xm[i] = (int32_t)(sx[i]+0.4999999);
2029    }
2030 
2031    // set up for recursive loops
2032    for (i=colors-1, msum=0; i >= 0; i--) {
2033       remaining[i] = msum;  msum += m[i];
2034    }
2035    for (i=0; i<colors; i++)  sx[i] = sxx[i] = 0.;
2036    sn = 0;
2037 
2038    // recursive loops to calculate sums
2039    sumf = loop(n, 0);
2040 
2041    // calculate mean and variance
2042    for (i = 0; i < colors; i++) {
2043       mu[i] = sx[i]/sumf;
2044       variance[i] = sxx[i]/sumf - sx[i]*sx[i]/(sumf*sumf);
2045    }
2046 
2047    // return combinations and sum
2048    if (combinations) *combinations = sn;
2049    return sumf;
2050 }
2051 
2052 
loop(int32_t n,int c)2053 double CMultiWalleniusNCHypergeometricMoments::loop(int32_t n, int c) {
2054    // recursive function to loop through all combinations of x-values.
2055    // used by moments()
2056    int32_t x, x0;                      // x of color c
2057    int32_t xmin, xmax;                 // min and max of x[c]
2058    double s1, s2, sum = 0.;            // sum of f(x) values
2059    int i;                              // loop counter
2060 
2061    if (c < colors-1) {
2062       // not the last color
2063       // calculate min and max of x[c] for given x[0]..x[c-1]
2064       xmin = n - remaining[c];  if (xmin < 0) xmin = 0;
2065       xmax = m[c];  if (xmax > n) xmax = n;
2066       x0 = xm[c];  if (x0 < xmin) x0 = xmin;  if (x0 > xmax) x0 = xmax;
2067       // loop for all x[c] from mean and up
2068       for (x = x0, s2 = 0.; x <= xmax; x++) {
2069          xi[c] = x;
2070          sum += s1 = loop(n-x, c+1);             // recursive loop for remaining colors
2071          if (s1 < accuracy && s1 < s2) break;    // stop when values become negligible
2072          s2 = s1;
2073       }
2074       // loop for all x[c] from mean and down
2075       for (x = x0-1; x >= xmin; x--) {
2076          xi[c] = x;
2077          sum += s1 = loop(n-x, c+1);             // recursive loop for remaining colors
2078          if (s1 < accuracy && s1 < s2) break;    // stop when values become negligible
2079          s2 = s1;
2080       }
2081    }
2082    else {
2083       // last color
2084       xi[c] = n;
2085       s1 = probability(xi);
2086       for (i=0; i < colors; i++) {
2087          sx[i]  += s1 * xi[i];
2088          sxx[i] += s1 * xi[i] * xi[i];
2089       }
2090       sn++;
2091       sum = s1;
2092    }
2093    return sum;
2094 }
2095