1 /*
2 * R : A Computer Language for Statistical Data Analysis
3 * Copyright (C) 1999-2016 The R Core Team.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, a copy is available at
17 * https://www.R-project.org/Licenses/
18 */
19
20 /* ks.c
21 Compute the asymptotic distribution of the one- and two-sample
22 two-sided Kolmogorov-Smirnov statistics, and the exact distributions
23 in the two-sided one-sample and two-sample cases.
24 */
25
26 #include <math.h>
27 #include <R.h>
28 #include <Rinternals.h>
29 #include <Rmath.h> /* constants */
30
31 static double K(int n, double d);
32 static void m_multiply(double *A, double *B, double *C, int m);
33 static void m_power(double *A, int eA, double *V, int *eV, int m, int n);
34
35 /* Two-sample two-sided asymptotic distribution */
36 static void
pkstwo(int n,double * x,double tol)37 pkstwo(int n, double *x, double tol)
38 {
39 /* x[1:n] is input and output
40 *
41 * Compute
42 * \sum_{k=-\infty}^\infty (-1)^k e^{-2 k^2 x^2}
43 * = 1 + 2 \sum_{k=1}^\infty (-1)^k e^{-2 k^2 x^2}
44 * = \frac{\sqrt{2\pi}}{x} \sum_{k=1}^\infty \exp(-(2k-1)^2\pi^2/(8x^2))
45 *
46 * See e.g. J. Durbin (1973), Distribution Theory for Tests Based on the
47 * Sample Distribution Function. SIAM.
48 *
49 * The 'standard' series expansion obviously cannot be used close to 0;
50 * we use the alternative series for x < 1, and a rather crude estimate
51 * of the series remainder term in this case, in particular using that
52 * ue^(-lu^2) \le e^(-lu^2 + u) \le e^(-(l-1)u^2 - u^2+u) \le e^(-(l-1))
53 * provided that u and l are >= 1.
54 *
55 * (But note that for reasonable tolerances, one could simply take 0 as
56 * the value for x < 0.2, and use the standard expansion otherwise.)
57 *
58 */
59 double new, old, s, w, z;
60 int i, k, k_max;
61
62 k_max = (int) sqrt(2 - log(tol));
63
64 for(i = 0; i < n; i++) {
65 if(x[i] < 1) {
66 z = - (M_PI_2 * M_PI_4) / (x[i] * x[i]);
67 w = log(x[i]);
68 s = 0;
69 for(k = 1; k < k_max; k += 2) {
70 s += exp(k * k * z - w);
71 }
72 x[i] = s / M_1_SQRT_2PI;
73 }
74 else {
75 z = -2 * x[i] * x[i];
76 s = -1;
77 k = 1;
78 old = 0;
79 new = 1;
80 while(fabs(old - new) > tol) {
81 old = new;
82 new += 2 * s * exp(z * k * k);
83 s *= -1;
84 k++;
85 }
86 x[i] = new;
87 }
88 }
89 }
90
91 /* Two-sided two-sample */
psmirnov2x(double * x,int m,int n)92 static double psmirnov2x(double *x, int m, int n)
93 {
94 double md, nd, q, *u, w;
95 int i, j;
96
97 if(m > n) {
98 i = n; n = m; m = i;
99 }
100 md = (double) m;
101 nd = (double) n;
102 /*
103 q has 0.5/mn added to ensure that rounding error doesn't
104 turn an equality into an inequality, eg abs(1/2-4/5)>3/10
105
106 */
107 q = (0.5 + floor(*x * md * nd - 1e-7)) / (md * nd);
108 u = (double *) R_alloc(n + 1, sizeof(double));
109
110 for(j = 0; j <= n; j++) {
111 u[j] = ((j / nd) > q) ? 0 : 1;
112 }
113 for(i = 1; i <= m; i++) {
114 w = (double)(i) / ((double)(i + n));
115 if((i / md) > q)
116 u[0] = 0;
117 else
118 u[0] = w * u[0];
119 for(j = 1; j <= n; j++) {
120 if(fabs(i / md - j / nd) > q)
121 u[j] = 0;
122 else
123 u[j] = w * u[j] + u[j - 1];
124 }
125 }
126 return u[n];
127 }
128
129 static double
K(int n,double d)130 K(int n, double d)
131 {
132 /* Compute Kolmogorov's distribution.
133 Code published in
134 George Marsaglia and Wai Wan Tsang and Jingbo Wang (2003),
135 "Evaluating Kolmogorov's distribution".
136 Journal of Statistical Software, Volume 8, 2003, Issue 18.
137 URL: http://www.jstatsoft.org/v08/i18/.
138 */
139
140 int k, m, i, j, g, eH, eQ;
141 double h, s, *H, *Q;
142
143 /*
144 The faster right-tail approximation is omitted here.
145 s = d*d*n;
146 if(s > 7.24 || (s > 3.76 && n > 99))
147 return 1-2*exp(-(2.000071+.331/sqrt(n)+1.409/n)*s);
148 */
149 k = (int) (n * d) + 1;
150 m = 2 * k - 1;
151 h = k - n * d;
152 H = (double*) Calloc(m * m, double);
153 Q = (double*) Calloc(m * m, double);
154 for(i = 0; i < m; i++)
155 for(j = 0; j < m; j++)
156 if(i - j + 1 < 0)
157 H[i * m + j] = 0;
158 else
159 H[i * m + j] = 1;
160 for(i = 0; i < m; i++) {
161 H[i * m] -= R_pow_di(h, i + 1);
162 H[(m - 1) * m + i] -= R_pow_di(h, (m - i));
163 }
164 H[(m - 1) * m] += ((2 * h - 1 > 0) ? R_pow_di(2 * h - 1, m) : 0);
165 for(i = 0; i < m; i++)
166 for(j = 0; j < m; j++)
167 if(i - j + 1 > 0)
168 for(g = 1; g <= i - j + 1; g++)
169 H[i * m + j] /= g;
170 eH = 0;
171 m_power(H, eH, Q, &eQ, m, n);
172 s = Q[(k - 1) * m + k - 1];
173 for(i = 1; i <= n; i++) {
174 s = s * i / n;
175 if(s < 1e-140) {
176 s *= 1e140;
177 eQ -= 140;
178 }
179 }
180 s *= R_pow_di(10.0, eQ);
181 Free(H);
182 Free(Q);
183 return(s);
184 }
185
186 static void
m_multiply(double * A,double * B,double * C,int m)187 m_multiply(double *A, double *B, double *C, int m)
188 {
189 /* Auxiliary routine used by K().
190 Matrix multiplication.
191 */
192 int i, j, k;
193 double s;
194 for(i = 0; i < m; i++)
195 for(j = 0; j < m; j++) {
196 s = 0.;
197 for(k = 0; k < m; k++)
198 s+= A[i * m + k] * B[k * m + j];
199 C[i * m + j] = s;
200 }
201 }
202
203 static void
m_power(double * A,int eA,double * V,int * eV,int m,int n)204 m_power(double *A, int eA, double *V, int *eV, int m, int n)
205 {
206 /* Auxiliary routine used by K().
207 Matrix power.
208 */
209 double *B;
210 int eB , i;
211
212 if(n == 1) {
213 for(i = 0; i < m * m; i++)
214 V[i] = A[i];
215 *eV = eA;
216 return;
217 }
218 m_power(A, eA, V, eV, m, n / 2);
219 B = (double*) Calloc(m * m, double);
220 m_multiply(V, V, B, m);
221 eB = 2 * (*eV);
222 if((n % 2) == 0) {
223 for(i = 0; i < m * m; i++)
224 V[i] = B[i];
225 *eV = eB;
226 }
227 else {
228 m_multiply(A, B, V, m);
229 *eV = eA + eB;
230 }
231 if(V[(m / 2) * m + (m / 2)] > 1e140) {
232 for(i = 0; i < m * m; i++)
233 V[i] = V[i] * 1e-140;
234 *eV += 140;
235 }
236 Free(B);
237 }
238
239 /* Two-sided two-sample */
pSmirnov2x(SEXP statistic,SEXP snx,SEXP sny)240 SEXP pSmirnov2x(SEXP statistic, SEXP snx, SEXP sny)
241 {
242 int nx = asInteger(snx), ny = asInteger(sny);
243 double st = asReal(statistic);
244 return ScalarReal(psmirnov2x(&st, nx, ny));
245 }
246
247 /* Two-sample two-sided asymptotic distribution */
pKS2(SEXP statistic,SEXP stol)248 SEXP pKS2(SEXP statistic, SEXP stol)
249 {
250 int n = LENGTH(statistic);
251 double tol = asReal(stol);
252 SEXP ans = duplicate(statistic);
253 pkstwo(n, REAL(ans), tol);
254 return ans;
255 }
256
257
258 /* The two-sided one-sample 'exact' distribution */
pKolmogorov2x(SEXP statistic,SEXP sn)259 SEXP pKolmogorov2x(SEXP statistic, SEXP sn)
260 {
261 int n = asInteger(sn);
262 double st = asReal(statistic), p;
263 p = K(n, st);
264 return ScalarReal(p);
265 }
266