1 /*************************************************************************\
2 *
3 * Package: ProbDist
4 * File: gofs.c
5 * Environment: ANSI C
6 *
7 * Copyright (c) 2002 Pierre L'Ecuyer, DIRO, Université de Montréal.
8 * e-mail: lecuyer@iro.umontreal.ca
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted without a fee for private, research,
13 * academic, or other non-commercial purposes.
14 * Any use of this software in a commercial environment requires a
15 * written licence from the copyright owner.
16 *
17 * Any changes made to this package must be clearly identified as such.
18 *
19 * In scientific publications which used this software, a reference to it
20 * would be appreciated.
21 *
22 * Redistributions of source code must retain this copyright notice
23 * and the following disclaimer.
24 *
25 * THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
27 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28 *
29 \*************************************************************************/
30
31 #include "util.h"
32 #include "tables.h"
33 #include "num.h"
34 #include "num2.h"
35
36 #include "gofs.h"
37 #include "fdist.h"
38 #include "wdist.h"
39
40 #include <float.h>
41 #include <math.h>
42 #include <stdio.h>
43
44
45 #define TRACE0(x) printf ("*** " #x " ");
46 #define TRACE1(N, form) printf ("*** " #N " = %"#form " ", N);
47
48
49
50 /*---------------------------- extern variables ---------------------------*/
51
52 double gofs_MinExpected = 10.0;
53
54 double gofs_EpsilonAD = DBL_EPSILON / 2.0;
55
56
57
58 /*---------------------------- module variables ---------------------------*/
59
60 /* Used in discontinuous distributions */
61 static double EpsilonD = 1.0E-15;
62
63
64
65
66
67
68 /*-------------------------------- functions ------------------------------*/
69
70
gofs_ContUnifTransform(double V[],long N,wdist_CFUNC F,double par[],double U[])71 void gofs_ContUnifTransform (double V[], long N, wdist_CFUNC F,
72 double par[], double U[])
73 {
74 long i;
75 for (i = 1; i <= N; i++)
76 U[i] = F (par, V[i]);
77 }
78
79 /*-------------------------------------------------------------------------*/
80
gofs_DiscUnifTransform(double V[],long N,wdist_DFUNC F,fmass_INFO W,double U[])81 void gofs_DiscUnifTransform (double V[], long N, wdist_DFUNC F,
82 fmass_INFO W, double U[])
83 {
84 long i;
85 for (i = 1; i <= N; i++)
86 U[i] = F (W, (long) V[i]);
87 }
88
89 /*-------------------------------------------------------------------------*/
90
gofs_DiffD(double U[],double D[],long N1,long N2,double a,double b)91 void gofs_DiffD (double U[], double D[], long N1, long N2,
92 double a, double b)
93 {
94 long i;
95 D[N1 - 1] = U[N1] - a;
96 for (i = N1; i < N2; i++)
97 D[i] = U[i + 1] - U[i];
98 D[N2] = b - U[N2];
99 }
100
101 /*-------------------------------------------------------------------------*/
102 #ifdef USE_LONGLONG
103
gofs_DiffLL(longlong U[],longlong D[],long N1,long N2,longlong a,longlong b)104 void gofs_DiffLL (longlong U[], longlong D[], long N1, long N2,
105 longlong a, longlong b)
106 {
107 long i;
108 D[N1 - 1] = U[N1] - a;
109 for (i = N1; i < N2; i++)
110 D[i] = U[i + 1] - U[i];
111 D[N2] = b - U[N2];
112 }
113
114 /*-------------------------------------------------------------------------*/
115
gofs_DiffULL(ulonglong U[],ulonglong D[],long N1,long N2,ulonglong a,ulonglong b)116 void gofs_DiffULL (ulonglong U[], ulonglong D[], long N1, long N2,
117 ulonglong a, ulonglong b)
118 {
119 long i;
120 D[N1 - 1] = U[N1] - a;
121 for (i = N1; i < N2; i++)
122 D[i] = U[i + 1] - U[i];
123 D[N2] = b - U[N2];
124 }
125
126 #endif
127 /*-------------------------------------------------------------------------*/
128
gofs_DiffL(long U[],long D[],long N1,long N2,long a,long b)129 void gofs_DiffL (long U[], long D[], long N1, long N2, long a, long b)
130 {
131 long i;
132 D[N1 - 1] = U[N1] - a;
133 for (i = N1; i < N2; i++)
134 D[i] = U[i + 1] - U[i];
135 D[N2] = b - U[N2];
136 }
137
138 /*-------------------------------------------------------------------------*/
139
gofs_IterateSpacings(double V[],double S[],long N)140 void gofs_IterateSpacings (double V[], double S[], long N)
141 {
142 long i;
143 tables_QuickSortD (S, 0, N);
144 for (i = 0; i < N; i++)
145 S[N - i] = (i + 1) * (S[N - i] - S[N - i - 1]);
146 S[0] = (N + 1) * S[0];
147 V[1] = S[0];
148 for (i = 2; i <= N; i++)
149 V[i] = V[i - 1] + S[i - 1];
150 }
151
152 /*-------------------------------------------------------------------------*/
153
gofs_PowerRatios(double U[],long N)154 void gofs_PowerRatios (double U[], long N)
155 {
156 long i;
157 /* Assumes that the U[i] are already sorted in increasing order. */
158 for (i = 1; i < N; i++) {
159 if (U[i + 1] == 0.0 || U[i + 1] == -0.0) {
160 /* util_Warning (1, "gofs_PowerRatios: 0 divisor"); */
161 U[i] = 1.0;
162 } else
163 U[i] = pow (U[i] / U[i + 1], (double) i);
164 }
165 U[N] = pow (U[N], (double) N);
166 tables_QuickSortD (U, 1, N);
167 }
168
169 /*-------------------------------------------------------------------------*/
170
gofs_MergeClasses(double NbExp[],long Loc[],long * smin,long * smax,long * NbClasses)171 void gofs_MergeClasses (double NbExp[], long Loc[],
172 long *smin, long *smax, long *NbClasses)
173 {
174 long s0, j, s;
175 double somme;
176
177 *NbClasses = 0;
178 s = *smin;
179 while (s <= *smax) {
180 /* Merge classes to ensure that the number expected in each class is
181 >= gofs_MinExpected. */
182 if (NbExp[s] < gofs_MinExpected) {
183 s0 = s;
184 somme = NbExp[s];
185 while (somme < gofs_MinExpected && s < *smax) {
186 NbExp[s] = 0.0;
187 ++s;
188 somme += NbExp[s];
189 }
190 NbExp[s] = somme;
191 for (j = s0; j <= s; j++)
192 Loc[j] = s;
193 } else {
194 Loc[s] = s;
195 }
196 ++*NbClasses;
197 ++s;
198 }
199 *smin = Loc[*smin];
200
201 /* Special case: the last class, if NbExp < MinExpected */
202 if (NbExp[*smax] < gofs_MinExpected) {
203 if (s0 > *smin)
204 --s0;
205 NbExp[s0] += NbExp[*smax];
206 NbExp[*smax] = 0.0;
207 --*NbClasses;
208 for (j = s0 + 1; j <= *smax; j++)
209 Loc[j] = s0;
210 *smax = s0;
211 }
212 util_Warning (*NbClasses < 2, "gofs_MergeClasses: NumClasses < 2.\n"
213 " The chi-square test is not done.");
214 /*
215 util_Assert (*NbClasses > 1, "gofs_MergeClasses: NumClasses < 2");
216 */
217 }
218
219
220 /*-------------------------------------------------------------------------*/
221
gofs_WriteClasses(double NbExp[],long Loc[],long smin,long smax,long NbClasses)222 void gofs_WriteClasses (double NbExp[], long Loc[],
223 long smin, long smax, long NbClasses)
224 {
225 /* Writes the groupings of cells before or after a merging that has */
226 /* been done by a previous call to gofs_MergeClasses. */
227 long s, s0;
228 double somme;
229 const double epsilon = 5.0E-16;
230
231 /* Before merging classes or cells */
232 if (NbClasses <= 0) {
233 somme = 0.0;
234 printf ("-----------------------------------------------\n"
235 "Expected numbers per class before merging:\n\n"
236 "Class s NumExpected[s]\n");
237
238 /* Don't print classes for which the expected number < epsilon */
239 /* Instead reset smin */
240 s = smin;
241 while (NbExp[s] < epsilon)
242 s++;
243 if (s > smin) {
244 smin = s;
245 s--;
246 printf ("<= %3ld", s);
247 num_WriteD (NbExp[s], 18, 4, 4);
248 printf ("\n");
249 }
250 /* Reset smax also */
251 s0 = s = smax;
252 while (NbExp[s] < epsilon)
253 s--;
254 if (s < smax)
255 smax = s;
256
257 /* Now print the classes with their expected numbers */
258 for (s = smin; s <= smax; s++) {
259 somme += NbExp[s];
260 printf ("%6ld", s);
261 num_WriteD (NbExp[s], 20, 4, 4);
262 printf ("\n");
263 }
264
265 if (s0 > smax) {
266 s = smax + 1;
267 printf (">= %3ld", s);
268 num_WriteD (NbExp[s], 18, 4, 4);
269 printf ("\n");
270 }
271
272 printf ("\n");
273 printf ("Total No. Expected = %18.2f\n\n", somme);
274 return;
275 }
276
277 /* NbClasses > 0: After merging classes */
278 printf ("-----------------------------------------------\n"
279 "Expected numbers per class after merging:\n"
280 "Number of classes: %4ld\n\n", NbClasses);
281 printf ("Class s NumExpected[s]\n");
282
283 somme = 0.0;
284 for (s = smin; s <= smax; s++) {
285 if (Loc[s] == s) {
286 somme += NbExp[s];
287 printf ("%4ld %18.4f\n", s, NbExp[s]);
288 }
289 }
290 printf ("\nTotal NumExpected = %18.2f\n\n", somme);
291 printf ("The groupings :\n Class s Loc[s]\n");
292 for (s = smin; s <= smax; s++) {
293 if (s == smin)
294 printf ("<= ");
295 else if (s == smax)
296 printf (">= ");
297 else
298 printf (" ");
299 printf ("%4ld %12ld\n", s, Loc[s]);
300 }
301 printf ("\n\n");
302 }
303
304
305 /*-------------------------------------------------------------------------*/
306
307 /*******************************\
308
309 Computing EDF test statistics
310
311 \*******************************/
312
313
gofs_Chi2(double NbExp[],long Count[],long smin,long smax)314 double gofs_Chi2 (double NbExp[], long Count[], long smin, long smax)
315 {
316 double Diff, Khi;
317 long s;
318
319 Khi = 0.0;
320 for (s = smin; s <= smax; s++) {
321 if (NbExp[s] <= 0.0) {
322 util_Assert (Count[s] == 0,
323 "gofs_Chi2: NbExp[s] = 0 and Count[s] > 0");
324 } else {
325 Diff = Count[s] - NbExp[s];
326 Khi += Diff * Diff / NbExp[s];
327 }
328 }
329 return Khi;
330 }
331
332 /*-------------------------------------------------------------------------*/
333
gofs_Chi2Equal(double NbExp,long Count[],long smin,long smax)334 double gofs_Chi2Equal (double NbExp, long Count[], long smin, long smax)
335 {
336 double Diff, Khi;
337 long s;
338 Khi = 0.0;
339 for (s = smin; s <= smax; s++) {
340 Diff = Count[s] - NbExp;
341 Khi += Diff * Diff;
342 }
343 return Khi / NbExp;
344 }
345
346 /*-------------------------------------------------------------------------*/
347
gofs_Scan(double U[],long N,double d)348 long gofs_Scan (double U[], long N, double d)
349 {
350 long m, j = 1, i = 0;
351 double High;
352
353 High = 0.0;
354 m = 1;
355 while (j < N && High < 1.0) {
356 ++i;
357 /* Low = U[i]; */
358 High = U[i] + d;
359 while (j <= N && U[j] < High)
360 ++j;
361 /* j is now the index of the first obs. to the right of High. */
362 if (j - i > m)
363 m = j - i;
364 }
365 /* p-value = fbar_Scan (N, d, m); */
366 return m;
367 }
368
369 /*-------------------------------------------------------------------------*/
370
gofs_CramerMises(double U[],long N)371 double gofs_CramerMises (double U[], long N)
372 {
373 long i;
374 double W, W2;
375
376 if (N <= 0) {
377 util_Warning (TRUE, "gofs_CramerMises: N <= 0");
378 return 0.0;
379 }
380
381 W2 = 1.0 / (12 * N);
382 for (i = 1; i <= N; i++) {
383 W = U[i] - (i - 0.5) / N;
384 W2 += W * W;
385 }
386 return W2;
387 /* p-value = fbar_CramerMises (N, W2); */
388 }
389
390 /*-------------------------------------------------------------------------*/
391
gofs_WatsonG(double U[],long N)392 double gofs_WatsonG (double U[], long N)
393 {
394 long i;
395 double SumZ;
396 double D2;
397 double DP, G;
398 double UnSurN = 1.0 / N;
399
400 if (N <= 0) {
401 util_Warning (TRUE, "gofs_WatsonG: N <= 0");
402 return 0.0;
403 }
404
405 /* degenerate case N = 1 */
406 if (N == 1)
407 return 0.0;
408
409 /* We assume that U is already sorted. */
410 DP = SumZ = 0.0;
411 for (i = 1; i <= N; i++) {
412 D2 = i * UnSurN - U[i];
413 if (D2 > DP)
414 DP = D2;
415 SumZ += U[i];
416 }
417 SumZ = SumZ * UnSurN - 0.5;
418 G = sqrt ((double) N) * (DP + SumZ);
419 return G;
420 /* p-value = fbar_WatsonG (N, G); */
421 }
422
423 /*-------------------------------------------------------------------------*/
424
gofs_WatsonU(double U[],long N)425 double gofs_WatsonU (double U[], long N)
426 {
427 long i;
428 double SumZ, W, W2, U2;
429
430 if (N <= 0) {
431 util_Warning (TRUE, "gofs_WatsonU: N <= 0");
432 return 0.0;
433 }
434
435 /* degenerate case N = 1 */
436 if (N == 1) {
437 return 1.0 / 12.0;
438 }
439
440 SumZ = 0.0;
441 W2 = 1.0 / (12 * N);
442 for (i = 1; i <= N; i++) {
443 SumZ += U[i];
444 W = U[i] - (i - 0.5) / N;
445 W2 += W * W;
446 }
447 SumZ = SumZ / N - 0.5;
448 U2 = W2 - SumZ * SumZ * N;
449 return U2;
450 /* p-value = fbar_WatsonU (N, U2); */
451 }
452
453 /*-------------------------------------------------------------------------*/
454
gofs_AndersonDarling(double V[],long N)455 double gofs_AndersonDarling (double V[], long N)
456 {
457 long i;
458 double U1;
459 double U, A2;
460
461 if (N <= 0) {
462 util_Warning (TRUE, "gofs_AndersonDarling: N <= 0");
463 return 0.0;
464 }
465
466 A2 = 0.0;
467 for (i = 1; i <= N; i++) {
468 U1 = U = V[i];
469 if (U <= gofs_EpsilonAD) {
470 U1 = U = gofs_EpsilonAD;
471 } else if (U >= 1 - gofs_EpsilonAD)
472 U1 = 1.0 - gofs_EpsilonAD;
473 A2 += (2 * i - 1) * log (U) + (1 + 2 * (N - i)) * num2_log1p (-U1);
474 }
475 A2 = -N - A2 / N;
476 return A2;
477 /* p-value = fbar_AndersonDarling (N, A2); */
478 }
479
480 /*-------------------------------------------------------------------------*/
481
gofs_KSJumpOne(double U[],long N,double a,double * DP,double * DM)482 void gofs_KSJumpOne (double U[], long N, double a, double *DP, double *DM)
483 /* Statistics KS+ and KS-. Case with 1 jump at a, near the lower tail of
484 the distribution. */
485 {
486 long j, i;
487 double D2, D1, UnSurN;
488
489 if (N <= 0) {
490 *DP = *DM = 0.0;
491 util_Warning (TRUE, "gofs_KSJumpOne: N <= 0");
492 return;
493 }
494
495 *DP = 0.0;
496 *DM = 0.0;
497 UnSurN = 1.0 / N;
498 j = 1;
499 while (j < N && U[j] <= a + EpsilonD)
500 ++j;
501 for (i = j - 1; i <= N; i++) {
502 if (i >= 1) {
503 D1 = i * UnSurN - U[i];
504 if (D1 > *DP)
505 *DP = D1;
506 }
507 if (i >= j) {
508 D2 = U[i] - (i - 1) * UnSurN;
509 if (D2 > *DM)
510 *DM = D2;
511 }
512 }
513 }
514
515 /*-------------------------------------------------------------------------*/
516
gofs_KS(double U[],long N,double * DP,double * DM,double * D)517 void gofs_KS (double U[], long N, double *DP, double *DM, double *D)
518 {
519 if (N <= 0) {
520 *DP = *DM = *D = 0.0;
521 util_Warning (TRUE, "gofs_KS: N <= 0");
522 return;
523 }
524
525 gofs_KSJumpOne (U, N, 0.0, DP, DM);
526 if (*DM > *DP)
527 *D = *DM;
528 else
529 *D = *DP;
530 /* pp = fbar_KSPlus (N, *DP);
531 pm = fbar_KSPlus (N, *DM);
532 p = fbar_KS (N, *D); */
533 }
534
535 /*-------------------------------------------------------------------------*/
536 #if 0
537
538 void gofs_KSJumpsMany (double X[], int N, wdist_CFUNC F, double W[],
539 double *DP, double *DM, int Detail)
540 {
541 int i;
542 double y, UnSurN, D;
543
544 if (N <= 0) {
545 *DP = *DM = 0.0;
546 util_Warning (TRUE, "gofs_KSJumpsMany: N <= 0");
547 return;
548 }
549
550 util_Assert (N > 0, "gofs_KSJumpsMany: N <= 0");
551 UnSurN = 1.0 / N;
552 *DP = 0.0;
553 *DM = 0.0;
554
555 if (Detail > 0) {
556 printf ("-----------------------------------------------\n"
557 "Values of the distribution F(x+0) :\n\n");
558 }
559 /* Assume that the X[i] are already sorted */
560 for (i = 1; i <= N; i++) {
561 /* Compute KS+ */
562 y = F (W, X[i]);
563 D = i * UnSurN - y;
564 if (D > *DP)
565 *DP = D;
566 if (Detail > 0) {
567 printf ("%14.6f %14.6f\n", X[i], y);
568 }
569 /* Compute KS- */
570 y = F (W, X[i] - EpsilonD);
571 D = y - (i - 1) * UnSurN;
572 if (D > *DM)
573 *DM = D;
574 }
575 if (Detail > 0)
576 printf ("\n\n");
577 }
578 #endif
579