1 /*
2 * Mathlib : A C Library of Special Functions
3 * Copyright (C) 1999-2020 The R Core Team
4 * Copyright (C) 1998 Ross Ihaka
5 * Copyright (C) 2004 Morten Welinder
6 * Copyright (C) 2004 The R Foundation
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, a copy is available at
20 * https://www.R-project.org/Licenses/
21 *
22 * DESCRIPTION
23 *
24 * The distribution function of the hypergeometric distribution.
25 *
26 * Current implementation based on posting
27 * From: Morten Welinder <terra@gnome.org>
28 * Cc: R-bugs@biostat.ku.dk
29 * Subject: [Rd] phyper accuracy and efficiency (PR#6772)
30 * Date: Thu, 15 Apr 2004 18:06:37 +0200 (CEST)
31 ......
32
33 The current version has very serious cancellation issues. For example,
34 if you ask for a small right-tail you are likely to get total cancellation.
35 For example, phyper(59, 150, 150, 60, FALSE, FALSE) gives 6.372680161e-14.
36 The right answer is dhyper(0, 150, 150, 60, FALSE) which is 5.111204798e-22.
37
38 phyper is also really slow for large arguments.
39
40 Therefore, I suggest using the code below. This is a sniplet from Gnumeric ...
41 The code isn't perfect. In fact, if x*(NR+NB) is close to n*NR,
42 then this code can take a while. Not longer than the old code, though.
43
44 -- Thanks to Ian Smith for ideas.
45 */
46
47 #include "nmath.h"
48 #include "dpq.h"
49
pdhyper(double x,double NR,double NB,double n,int log_p)50 static double pdhyper (double x, double NR, double NB, double n, int log_p)
51 {
52 /*
53 * Calculate
54 *
55 * phyper (x, NR, NB, n, TRUE, FALSE)
56 * [log] ----------------------------------
57 * dhyper (x, NR, NB, n, FALSE)
58 *
59 * without actually calling phyper. This assumes that
60 *
61 * x * (NR + NB) <= n * NR
62 *
63 */
64 LDOUBLE sum = 0;
65 LDOUBLE term = 1;
66
67 while (x > 0 && term >= DBL_EPSILON * sum) {
68 term *= x * (NB - n + x) / (n + 1 - x) / (NR + 1 - x);
69 sum += term;
70 x--;
71 }
72
73 double ss = (double) sum;
74 return log_p ? log1p(ss) : 1 + ss;
75 }
76
77
78 /* FIXME: The old phyper() code was basically used in ./qhyper.c as well
79 * ----- We need to sync this again!
80 q m n k */
phyper(double x,double NR,double NB,double n,int lower_tail,int log_p)81 double phyper (double x, double NR, double NB, double n,
82 int lower_tail, int log_p)
83 {
84 /* Sample of n balls from NR red and NB black ones; x are red */
85
86 double d, pd;
87
88 #ifdef IEEE_754
89 if(ISNAN(x) || ISNAN(NR) || ISNAN(NB) || ISNAN(n))
90 return x + NR + NB + n;
91 #endif
92
93 x = floor (x + 1e-7);
94 NR = R_forceint(NR);
95 NB = R_forceint(NB);
96 n = R_forceint(n);
97
98 if (NR < 0 || NB < 0 || !R_FINITE(NR + NB) || n < 0 || n > NR + NB)
99 ML_WARN_return_NAN;
100
101 if (x * (NR + NB) > n * NR) {
102 /* Swap tails. */
103 double oldNB = NB;
104 NB = NR;
105 NR = oldNB;
106 x = n - x - 1;
107 lower_tail = !lower_tail;
108 }
109
110 /* support of dhyper() as a function of its parameters
111 * R: .suppHyper <- function(m,n,k) max(0, k-n) : min(k, m)
112 * -- where R's (m,n, k) == (NR,NB, n) here */
113 if (x < 0 || x < n - NB)
114 return R_DT_0;
115 if (x >= NR || x >= n)
116 return R_DT_1;
117 d = dhyper (x, NR, NB, n, log_p);
118 // dhyper(.., log_p=FALSE) > 0 mathematically, but not always numerically :
119 if((!log_p && d == 0.) ||
120 (log_p && d == ML_NEGINF))
121 return R_DT_0;
122 pd = pdhyper(x, NR, NB, n, log_p);
123
124 return log_p ? R_DT_Log(d + pd) : R_D_Lval(d * pd);
125 }
126
127 // NB: MM has code for AS 152 (Lund, 1980) >> R_77 (Shea, 1989) >> R_86 (Berger, 1991)
128