1 /*
2  *  R : A Computer Language for Statistical Data Analysis
3  *  Copyright (C) 1999-2016   The R Core Team.
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, a copy is available at
17  *  https://www.R-project.org/Licenses/
18  */
19 
20 /* ks.c
21    Compute the asymptotic distribution of the one- and two-sample
22    two-sided Kolmogorov-Smirnov statistics, and the exact distributions
23    in the two-sided one-sample and two-sample cases.
24 */
25 
26 #include <math.h>
27 #include <R.h>
28 #include <Rinternals.h>
29 #include <Rmath.h>		/* constants */
30 
31 static double K(int n, double d);
32 static void m_multiply(double *A, double *B, double *C, int m);
33 static void m_power(double *A, int eA, double *V, int *eV, int m, int n);
34 
35 /* Two-sample two-sided asymptotic distribution */
36 static void
pkstwo(int n,double * x,double tol)37 pkstwo(int n, double *x, double tol)
38 {
39 /* x[1:n] is input and output
40  *
41  * Compute
42  *   \sum_{k=-\infty}^\infty (-1)^k e^{-2 k^2 x^2}
43  *   = 1 + 2 \sum_{k=1}^\infty (-1)^k e^{-2 k^2 x^2}
44  *   = \frac{\sqrt{2\pi}}{x} \sum_{k=1}^\infty \exp(-(2k-1)^2\pi^2/(8x^2))
45  *
46  * See e.g. J. Durbin (1973), Distribution Theory for Tests Based on the
47  * Sample Distribution Function.  SIAM.
48  *
49  * The 'standard' series expansion obviously cannot be used close to 0;
50  * we use the alternative series for x < 1, and a rather crude estimate
51  * of the series remainder term in this case, in particular using that
52  * ue^(-lu^2) \le e^(-lu^2 + u) \le e^(-(l-1)u^2 - u^2+u) \le e^(-(l-1))
53  * provided that u and l are >= 1.
54  *
55  * (But note that for reasonable tolerances, one could simply take 0 as
56  * the value for x < 0.2, and use the standard expansion otherwise.)
57  *
58  */
59     double new, old, s, w, z;
60     int i, k, k_max;
61 
62     k_max = (int) sqrt(2 - log(tol));
63 
64     for(i = 0; i < n; i++) {
65 	if(x[i] < 1) {
66 	    z = - (M_PI_2 * M_PI_4) / (x[i] * x[i]);
67 	    w = log(x[i]);
68 	    s = 0;
69 	    for(k = 1; k < k_max; k += 2) {
70 		s += exp(k * k * z - w);
71 	    }
72 	    x[i] = s / M_1_SQRT_2PI;
73 	}
74 	else {
75 	    z = -2 * x[i] * x[i];
76 	    s = -1;
77 	    k = 1;
78 	    old = 0;
79 	    new = 1;
80 	    while(fabs(old - new) > tol) {
81 		old = new;
82 		new += 2 * s * exp(z * k * k);
83 		s *= -1;
84 		k++;
85 	    }
86 	    x[i] = new;
87 	}
88     }
89 }
90 
91 /* Two-sided two-sample */
psmirnov2x(double * x,int m,int n)92 static double psmirnov2x(double *x, int m, int n)
93 {
94     double md, nd, q, *u, w;
95     int i, j;
96 
97     if(m > n) {
98 	i = n; n = m; m = i;
99     }
100     md = (double) m;
101     nd = (double) n;
102     /*
103        q has 0.5/mn added to ensure that rounding error doesn't
104        turn an equality into an inequality, eg abs(1/2-4/5)>3/10
105 
106     */
107     q = (0.5 + floor(*x * md * nd - 1e-7)) / (md * nd);
108     u = (double *) R_alloc(n + 1, sizeof(double));
109 
110     for(j = 0; j <= n; j++) {
111 	u[j] = ((j / nd) > q) ? 0 : 1;
112     }
113     for(i = 1; i <= m; i++) {
114 	w = (double)(i) / ((double)(i + n));
115 	if((i / md) > q)
116 	    u[0] = 0;
117 	else
118 	    u[0] = w * u[0];
119 	for(j = 1; j <= n; j++) {
120 	    if(fabs(i / md - j / nd) > q)
121 		u[j] = 0;
122 	    else
123 		u[j] = w * u[j] + u[j - 1];
124 	}
125     }
126     return u[n];
127 }
128 
129 static double
K(int n,double d)130 K(int n, double d)
131 {
132     /* Compute Kolmogorov's distribution.
133        Code published in
134 	 George Marsaglia and Wai Wan Tsang and Jingbo Wang (2003),
135 	 "Evaluating Kolmogorov's distribution".
136 	 Journal of Statistical Software, Volume 8, 2003, Issue 18.
137 	 URL: http://www.jstatsoft.org/v08/i18/.
138     */
139 
140    int k, m, i, j, g, eH, eQ;
141    double h, s, *H, *Q;
142 
143    /*
144       The faster right-tail approximation is omitted here.
145       s = d*d*n;
146       if(s > 7.24 || (s > 3.76 && n > 99))
147           return 1-2*exp(-(2.000071+.331/sqrt(n)+1.409/n)*s);
148    */
149    k = (int) (n * d) + 1;
150    m = 2 * k - 1;
151    h = k - n * d;
152    H = (double*) Calloc(m * m, double);
153    Q = (double*) Calloc(m * m, double);
154    for(i = 0; i < m; i++)
155        for(j = 0; j < m; j++)
156 	   if(i - j + 1 < 0)
157 	       H[i * m + j] = 0;
158 	   else
159 	       H[i * m + j] = 1;
160    for(i = 0; i < m; i++) {
161        H[i * m] -= R_pow_di(h, i + 1);
162        H[(m - 1) * m + i] -= R_pow_di(h, (m - i));
163    }
164    H[(m - 1) * m] += ((2 * h - 1 > 0) ? R_pow_di(2 * h - 1, m) : 0);
165    for(i = 0; i < m; i++)
166        for(j = 0; j < m; j++)
167 	   if(i - j + 1 > 0)
168 	       for(g = 1; g <= i - j + 1; g++)
169 		   H[i * m + j] /= g;
170    eH = 0;
171    m_power(H, eH, Q, &eQ, m, n);
172    s = Q[(k - 1) * m + k - 1];
173    for(i = 1; i <= n; i++) {
174        s = s * i / n;
175        if(s < 1e-140) {
176 	   s *= 1e140;
177 	   eQ -= 140;
178        }
179    }
180    s *= R_pow_di(10.0, eQ);
181    Free(H);
182    Free(Q);
183    return(s);
184 }
185 
186 static void
m_multiply(double * A,double * B,double * C,int m)187 m_multiply(double *A, double *B, double *C, int m)
188 {
189     /* Auxiliary routine used by K().
190        Matrix multiplication.
191     */
192     int i, j, k;
193     double s;
194     for(i = 0; i < m; i++)
195 	for(j = 0; j < m; j++) {
196 	    s = 0.;
197 	    for(k = 0; k < m; k++)
198 		s+= A[i * m + k] * B[k * m + j];
199 	    C[i * m + j] = s;
200 	}
201 }
202 
203 static void
m_power(double * A,int eA,double * V,int * eV,int m,int n)204 m_power(double *A, int eA, double *V, int *eV, int m, int n)
205 {
206     /* Auxiliary routine used by K().
207        Matrix power.
208     */
209     double *B;
210     int eB , i;
211 
212     if(n == 1) {
213 	for(i = 0; i < m * m; i++)
214 	    V[i] = A[i];
215 	*eV = eA;
216 	return;
217     }
218     m_power(A, eA, V, eV, m, n / 2);
219     B = (double*) Calloc(m * m, double);
220     m_multiply(V, V, B, m);
221     eB = 2 * (*eV);
222     if((n % 2) == 0) {
223 	for(i = 0; i < m * m; i++)
224 	    V[i] = B[i];
225 	*eV = eB;
226     }
227     else {
228 	m_multiply(A, B, V, m);
229 	*eV = eA + eB;
230     }
231     if(V[(m / 2) * m + (m / 2)] > 1e140) {
232 	for(i = 0; i < m * m; i++)
233 	    V[i] = V[i] * 1e-140;
234 	*eV += 140;
235     }
236     Free(B);
237 }
238 
239 /* Two-sided two-sample */
pSmirnov2x(SEXP statistic,SEXP snx,SEXP sny)240 SEXP pSmirnov2x(SEXP statistic, SEXP snx, SEXP sny)
241 {
242     int nx = asInteger(snx), ny = asInteger(sny);
243     double st = asReal(statistic);
244     return ScalarReal(psmirnov2x(&st, nx, ny));
245 }
246 
247 /* Two-sample two-sided asymptotic distribution */
pKS2(SEXP statistic,SEXP stol)248 SEXP pKS2(SEXP statistic, SEXP stol)
249 {
250     int n = LENGTH(statistic);
251     double tol = asReal(stol);
252     SEXP ans = duplicate(statistic);
253     pkstwo(n, REAL(ans), tol);
254     return ans;
255 }
256 
257 
258 /* The two-sided one-sample 'exact' distribution */
pKolmogorov2x(SEXP statistic,SEXP sn)259 SEXP pKolmogorov2x(SEXP statistic, SEXP sn)
260 {
261     int n = asInteger(sn);
262     double st = asReal(statistic), p;
263     p = K(n, st);
264     return ScalarReal(p);
265 }
266