1 /*
2  *  Mathlib : A C Library of Special Functions
3  *  Copyright (C) 1999-2020  The R Core Team
4  *  Copyright (C) 1998       Ross Ihaka
5  *  Copyright (C) 2004	     Morten Welinder
6  *  Copyright (C) 2004	     The R Foundation
7  *
8  *  This program is free software; you can redistribute it and/or modify
9  *  it under the terms of the GNU General Public License as published by
10  *  the Free Software Foundation; either version 2 of the License, or
11  *  (at your option) any later version.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program; if not, a copy is available at
20  *  https://www.R-project.org/Licenses/
21  *
22  *  DESCRIPTION
23  *
24  *	The distribution function of the hypergeometric distribution.
25  *
26  * Current implementation based on posting
27  * From: Morten Welinder <terra@gnome.org>
28  * Cc: R-bugs@biostat.ku.dk
29  * Subject: [Rd] phyper accuracy and efficiency (PR#6772)
30  * Date: Thu, 15 Apr 2004 18:06:37 +0200 (CEST)
31  ......
32 
33  The current version has very serious cancellation issues.  For example,
34  if you ask for a small right-tail you are likely to get total cancellation.
35  For example,  phyper(59, 150, 150, 60, FALSE, FALSE) gives 6.372680161e-14.
36  The right answer is dhyper(0, 150, 150, 60, FALSE) which is 5.111204798e-22.
37 
38  phyper is also really slow for large arguments.
39 
40  Therefore, I suggest using the code below. This is a sniplet from Gnumeric ...
41  The code isn't perfect.  In fact, if  x*(NR+NB)  is close to	n*NR,
42  then this code can take a while. Not longer than the old code, though.
43 
44  -- Thanks to Ian Smith for ideas.
45 */
46 
47 #include "nmath.h"
48 #include "dpq.h"
49 
pdhyper(double x,double NR,double NB,double n,int log_p)50 static double pdhyper (double x, double NR, double NB, double n, int log_p)
51 {
52 /*
53  * Calculate
54  *
55  *	    phyper (x, NR, NB, n, TRUE, FALSE)
56  *   [log]  ----------------------------------
57  *	       dhyper (x, NR, NB, n, FALSE)
58  *
59  * without actually calling phyper.  This assumes that
60  *
61  *     x * (NR + NB) <= n * NR
62  *
63  */
64     LDOUBLE sum = 0;
65     LDOUBLE term = 1;
66 
67     while (x > 0 && term >= DBL_EPSILON * sum) {
68 	term *= x * (NB - n + x) / (n + 1 - x) / (NR + 1 - x);
69 	sum += term;
70 	x--;
71     }
72 
73     double ss = (double) sum;
74     return log_p ? log1p(ss) : 1 + ss;
75 }
76 
77 
78 /* FIXME: The old phyper() code was basically used in ./qhyper.c as well
79  * -----  We need to sync this again!
80                       q         m           n         k   */
phyper(double x,double NR,double NB,double n,int lower_tail,int log_p)81 double phyper (double x, double NR, double NB, double n,
82 	       int lower_tail, int log_p)
83 {
84 /* Sample of  n balls from  NR red  and	 NB black ones;	 x are red */
85 
86     double d, pd;
87 
88 #ifdef IEEE_754
89     if(ISNAN(x) || ISNAN(NR) || ISNAN(NB) || ISNAN(n))
90 	return x + NR + NB + n;
91 #endif
92 
93     x = floor (x + 1e-7);
94     NR = R_forceint(NR);
95     NB = R_forceint(NB);
96     n  = R_forceint(n);
97 
98     if (NR < 0 || NB < 0 || !R_FINITE(NR + NB) || n < 0 || n > NR + NB)
99 	ML_WARN_return_NAN;
100 
101     if (x * (NR + NB) > n * NR) {
102 	/* Swap tails.	*/
103 	double oldNB = NB;
104 	NB = NR;
105 	NR = oldNB;
106 	x = n - x - 1;
107 	lower_tail = !lower_tail;
108     }
109 
110     /* support of dhyper() as a function of its parameters
111      * R:  .suppHyper <- function(m,n,k) max(0, k-n) : min(k, m)
112      * --  where R's (m,n, k) == (NR,NB, n)  here */
113     if (x < 0 || x < n - NB)
114 	return R_DT_0;
115     if (x >= NR || x >= n)
116 	return R_DT_1;
117     d  = dhyper (x, NR, NB, n, log_p);
118     // dhyper(.., log_p=FALSE) > 0 mathematically, but not always numerically :
119     if((!log_p && d == 0.) ||
120         (log_p && d == ML_NEGINF))
121 	return R_DT_0;
122     pd = pdhyper(x, NR, NB, n, log_p);
123 
124     return log_p ? R_DT_Log(d + pd) : R_D_Lval(d * pd);
125 }
126 
127 // NB: MM has code for  AS 152 (Lund, 1980) >> R_77 (Shea, 1989) >> R_86 (Berger, 1991)
128