1 /*************************************************************************\
2  *
3  * Package:        ProbDist
4  * File:           gofs.c
5  * Environment:    ANSI C
6  *
7  * Copyright (c) 2002 Pierre L'Ecuyer, DIRO, Université de Montréal.
8  * e-mail: lecuyer@iro.umontreal.ca
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted without a fee for private, research,
13  * academic, or other non-commercial purposes.
14  * Any use of this software in a commercial environment requires a
15  * written licence from the copyright owner.
16  *
17  * Any changes made to this package must be clearly identified as such.
18  *
19  * In scientific publications which used this software, a reference to it
20  * would be appreciated.
21  *
22  * Redistributions of source code must retain this copyright notice
23  * and the following disclaimer.
24  *
25  * THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
27  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28  *
29 \*************************************************************************/
30 
31 #include "util.h"
32 #include "tables.h"
33 #include "num.h"
34 #include "num2.h"
35 
36 #include "gofs.h"
37 #include "fdist.h"
38 #include "wdist.h"
39 
40 #include <float.h>
41 #include <math.h>
42 #include <stdio.h>
43 
44 
45 #define TRACE0(x) printf ("***   " #x "     ");
46 #define TRACE1(N, form) printf ("***   " #N " = %"#form "     ", N);
47 
48 
49 
50 /*---------------------------- extern variables ---------------------------*/
51 
52 double gofs_MinExpected = 10.0;
53 
54 double gofs_EpsilonAD = DBL_EPSILON / 2.0;
55 
56 
57 
58 /*---------------------------- module variables ---------------------------*/
59 
60 /* Used in discontinuous distributions */
61 static double EpsilonD = 1.0E-15;
62 
63 
64 
65 
66 
67 
68 /*-------------------------------- functions ------------------------------*/
69 
70 
gofs_ContUnifTransform(double V[],long N,wdist_CFUNC F,double par[],double U[])71 void gofs_ContUnifTransform (double V[], long N, wdist_CFUNC F,
72                              double par[], double U[])
73 {
74    long i;
75    for (i = 1; i <= N; i++)
76       U[i] = F (par, V[i]);
77 }
78 
79 /*-------------------------------------------------------------------------*/
80 
gofs_DiscUnifTransform(double V[],long N,wdist_DFUNC F,fmass_INFO W,double U[])81 void gofs_DiscUnifTransform (double V[], long N, wdist_DFUNC F,
82                              fmass_INFO W, double U[])
83 {
84    long i;
85    for (i = 1; i <= N; i++)
86       U[i] = F (W, (long) V[i]);
87 }
88 
89 /*-------------------------------------------------------------------------*/
90 
gofs_DiffD(double U[],double D[],long N1,long N2,double a,double b)91 void gofs_DiffD (double U[], double D[], long N1, long N2,
92                  double a, double b)
93 {
94    long i;
95    D[N1 - 1] = U[N1] - a;
96    for (i = N1; i < N2; i++)
97       D[i] = U[i + 1] - U[i];
98    D[N2] = b - U[N2];
99 }
100 
101 /*-------------------------------------------------------------------------*/
102 #ifdef USE_LONGLONG
103 
gofs_DiffLL(longlong U[],longlong D[],long N1,long N2,longlong a,longlong b)104 void gofs_DiffLL (longlong U[], longlong D[], long N1, long N2,
105                   longlong a, longlong b)
106 {
107    long i;
108    D[N1 - 1] = U[N1] - a;
109    for (i = N1; i < N2; i++)
110       D[i] = U[i + 1] - U[i];
111    D[N2] = b - U[N2];
112 }
113 
114 /*-------------------------------------------------------------------------*/
115 
gofs_DiffULL(ulonglong U[],ulonglong D[],long N1,long N2,ulonglong a,ulonglong b)116 void gofs_DiffULL (ulonglong U[], ulonglong D[], long N1, long N2,
117                    ulonglong a, ulonglong b)
118 {
119    long i;
120    D[N1 - 1] = U[N1] - a;
121    for (i = N1; i < N2; i++)
122       D[i] = U[i + 1] - U[i];
123    D[N2] = b - U[N2];
124 }
125 
126 #endif
127 /*-------------------------------------------------------------------------*/
128 
gofs_DiffL(long U[],long D[],long N1,long N2,long a,long b)129 void gofs_DiffL (long U[], long D[], long N1, long N2, long a, long b)
130 {
131    long i;
132    D[N1 - 1] = U[N1] - a;
133    for (i = N1; i < N2; i++)
134       D[i] = U[i + 1] - U[i];
135    D[N2] = b - U[N2];
136 }
137 
138 /*-------------------------------------------------------------------------*/
139 
gofs_IterateSpacings(double V[],double S[],long N)140 void gofs_IterateSpacings (double V[], double S[], long N)
141 {
142    long i;
143    tables_QuickSortD (S, 0, N);
144    for (i = 0; i < N; i++)
145       S[N - i] = (i + 1) * (S[N - i] - S[N - i - 1]);
146    S[0] = (N + 1) * S[0];
147    V[1] = S[0];
148    for (i = 2; i <= N; i++)
149       V[i] = V[i - 1] + S[i - 1];
150 }
151 
152 /*-------------------------------------------------------------------------*/
153 
gofs_PowerRatios(double U[],long N)154 void gofs_PowerRatios (double U[], long N)
155 {
156    long i;
157    /* Assumes that the U[i] are already sorted in increasing order. */
158    for (i = 1; i < N; i++) {
159       if (U[i + 1] == 0.0 || U[i + 1] == -0.0) {
160          /* util_Warning (1, "gofs_PowerRatios: 0 divisor"); */
161          U[i] = 1.0;
162       } else
163          U[i] = pow (U[i] / U[i + 1], (double) i);
164    }
165    U[N] = pow (U[N], (double) N);
166    tables_QuickSortD (U, 1, N);
167 }
168 
169 /*-------------------------------------------------------------------------*/
170 
gofs_MergeClasses(double NbExp[],long Loc[],long * smin,long * smax,long * NbClasses)171 void gofs_MergeClasses (double NbExp[], long Loc[],
172                         long *smin, long *smax, long *NbClasses)
173 {
174    long s0, j, s;
175    double somme;
176 
177    *NbClasses = 0;
178    s = *smin;
179    while (s <= *smax) {
180       /* Merge classes to ensure that the number expected in each class is
181          >= gofs_MinExpected. */
182       if (NbExp[s] < gofs_MinExpected) {
183          s0 = s;
184          somme = NbExp[s];
185          while (somme < gofs_MinExpected && s < *smax) {
186             NbExp[s] = 0.0;
187             ++s;
188             somme += NbExp[s];
189          }
190          NbExp[s] = somme;
191          for (j = s0; j <= s; j++)
192             Loc[j] = s;
193       } else {
194          Loc[s] = s;
195       }
196       ++*NbClasses;
197       ++s;
198    }
199    *smin = Loc[*smin];
200 
201    /* Special case: the last class, if NbExp < MinExpected */
202    if (NbExp[*smax] < gofs_MinExpected) {
203       if (s0 > *smin)
204          --s0;
205       NbExp[s0] += NbExp[*smax];
206       NbExp[*smax] = 0.0;
207       --*NbClasses;
208       for (j = s0 + 1; j <= *smax; j++)
209          Loc[j] = s0;
210       *smax = s0;
211    }
212    util_Warning (*NbClasses < 2, "gofs_MergeClasses:   NumClasses < 2.\n"
213                                  "   The chi-square test is not done.");
214    /*
215    util_Assert (*NbClasses > 1, "gofs_MergeClasses:   NumClasses < 2");
216    */
217 }
218 
219 
220 /*-------------------------------------------------------------------------*/
221 
gofs_WriteClasses(double NbExp[],long Loc[],long smin,long smax,long NbClasses)222 void gofs_WriteClasses (double NbExp[], long Loc[],
223                         long smin, long smax, long NbClasses)
224 {
225    /* Writes the groupings of cells before or after a merging that has */
226    /* been done by a previous call to gofs_MergeClasses.  */
227    long s, s0;
228    double somme;
229    const double epsilon = 5.0E-16;
230 
231    /* Before merging classes or cells */
232    if (NbClasses <= 0) {
233       somme = 0.0;
234       printf ("-----------------------------------------------\n"
235               "Expected numbers per class before merging:\n\n"
236               "Class s        NumExpected[s]\n");
237 
238       /* Don't print classes for which the expected number < epsilon */
239       /* Instead reset smin */
240       s = smin;
241       while (NbExp[s] < epsilon)
242          s++;
243       if (s > smin) {
244          smin = s;
245          s--;
246          printf ("<= %3ld", s);
247          num_WriteD (NbExp[s], 18, 4, 4);
248          printf ("\n");
249       }
250       /* Reset smax also */
251       s0 = s = smax;
252       while (NbExp[s] < epsilon)
253          s--;
254       if (s < smax)
255          smax = s;
256 
257       /* Now print the classes with their expected numbers */
258       for (s = smin; s <= smax; s++) {
259          somme += NbExp[s];
260          printf ("%6ld", s);
261          num_WriteD (NbExp[s], 20, 4, 4);
262          printf ("\n");
263       }
264 
265       if (s0 > smax) {
266          s = smax + 1;
267          printf (">= %3ld", s);
268          num_WriteD (NbExp[s], 18, 4, 4);
269          printf ("\n");
270       }
271 
272       printf ("\n");
273       printf ("Total No. Expected = %18.2f\n\n", somme);
274       return;
275    }
276 
277    /* NbClasses > 0: After merging classes */
278    printf ("-----------------------------------------------\n"
279            "Expected numbers per class after merging:\n"
280            "Number of classes: %4ld\n\n", NbClasses);
281    printf ("Class s     NumExpected[s]\n");
282 
283    somme = 0.0;
284    for (s = smin; s <= smax; s++) {
285       if (Loc[s] == s) {
286          somme += NbExp[s];
287          printf ("%4ld %18.4f\n", s, NbExp[s]);
288       }
289    }
290    printf ("\nTotal NumExpected = %18.2f\n\n", somme);
291    printf ("The groupings :\n Class s        Loc[s]\n");
292    for (s = smin; s <= smax; s++) {
293       if (s == smin)
294          printf ("<= ");
295       else if (s == smax)
296          printf (">= ");
297       else
298          printf ("   ");
299       printf ("%4ld  %12ld\n", s, Loc[s]);
300    }
301    printf ("\n\n");
302 }
303 
304 
305 /*-------------------------------------------------------------------------*/
306 
307 /*******************************\
308 
309   Computing EDF test statistics
310 
311 \*******************************/
312 
313 
gofs_Chi2(double NbExp[],long Count[],long smin,long smax)314 double gofs_Chi2 (double NbExp[], long Count[], long smin, long smax)
315 {
316    double Diff, Khi;
317    long s;
318 
319    Khi = 0.0;
320    for (s = smin; s <= smax; s++) {
321       if (NbExp[s] <= 0.0) {
322          util_Assert (Count[s] == 0,
323             "gofs_Chi2:   NbExp[s] = 0 and Count[s] > 0");
324       } else {
325          Diff = Count[s] - NbExp[s];
326          Khi += Diff * Diff / NbExp[s];
327       }
328    }
329    return Khi;
330 }
331 
332 /*-------------------------------------------------------------------------*/
333 
gofs_Chi2Equal(double NbExp,long Count[],long smin,long smax)334 double gofs_Chi2Equal (double NbExp, long Count[], long smin, long smax)
335 {
336    double Diff, Khi;
337    long s;
338    Khi = 0.0;
339    for (s = smin; s <= smax; s++) {
340       Diff = Count[s] - NbExp;
341       Khi += Diff * Diff;
342    }
343    return Khi / NbExp;
344 }
345 
346 /*-------------------------------------------------------------------------*/
347 
gofs_Scan(double U[],long N,double d)348 long gofs_Scan (double U[], long N, double d)
349 {
350    long m, j = 1, i = 0;
351    double High;
352 
353    High = 0.0;
354    m = 1;
355    while (j < N && High < 1.0) {
356       ++i;
357       /* Low = U[i]; */
358       High = U[i] + d;
359       while (j <= N && U[j] < High)
360          ++j;
361       /* j is now the index of the first obs. to the right of High. */
362       if (j - i > m)
363          m = j - i;
364    }
365    /* p-value = fbar_Scan (N, d, m); */
366    return m;
367 }
368 
369 /*-------------------------------------------------------------------------*/
370 
gofs_CramerMises(double U[],long N)371 double gofs_CramerMises (double U[], long N)
372 {
373    long i;
374    double W, W2;
375 
376    if (N <= 0) {
377       util_Warning (TRUE, "gofs_CramerMises:   N <= 0");
378       return 0.0;
379    }
380 
381    W2 = 1.0 / (12 * N);
382    for (i = 1; i <= N; i++) {
383       W = U[i] - (i - 0.5) / N;
384       W2 += W * W;
385    }
386    return W2;
387    /* p-value = fbar_CramerMises (N, W2); */
388 }
389 
390 /*-------------------------------------------------------------------------*/
391 
gofs_WatsonG(double U[],long N)392 double gofs_WatsonG (double U[], long N)
393 {
394    long i;
395    double SumZ;
396    double D2;
397    double DP, G;
398    double UnSurN = 1.0 / N;
399 
400    if (N <= 0) {
401       util_Warning (TRUE, "gofs_WatsonG:   N <= 0");
402       return 0.0;
403    }
404 
405    /* degenerate case N = 1 */
406    if (N == 1)
407       return 0.0;
408 
409    /* We assume that U is already sorted.  */
410    DP = SumZ = 0.0;
411    for (i = 1; i <= N; i++) {
412       D2 = i * UnSurN - U[i];
413       if (D2 > DP)
414          DP = D2;
415       SumZ += U[i];
416    }
417    SumZ = SumZ * UnSurN - 0.5;
418    G = sqrt ((double) N) * (DP + SumZ);
419    return G;
420    /* p-value = fbar_WatsonG (N, G); */
421 }
422 
423 /*-------------------------------------------------------------------------*/
424 
gofs_WatsonU(double U[],long N)425 double gofs_WatsonU (double U[], long N)
426 {
427    long i;
428    double SumZ, W, W2, U2;
429 
430    if (N <= 0) {
431       util_Warning (TRUE, "gofs_WatsonU:   N <= 0");
432       return 0.0;
433    }
434 
435    /* degenerate case N = 1 */
436    if (N == 1) {
437       return 1.0 / 12.0;
438    }
439 
440    SumZ = 0.0;
441    W2 = 1.0 / (12 * N);
442    for (i = 1; i <= N; i++) {
443       SumZ += U[i];
444       W = U[i] - (i - 0.5) / N;
445       W2 += W * W;
446    }
447    SumZ = SumZ / N - 0.5;
448    U2 = W2 - SumZ * SumZ * N;
449    return U2;
450    /* p-value = fbar_WatsonU (N, U2); */
451 }
452 
453 /*-------------------------------------------------------------------------*/
454 
gofs_AndersonDarling(double V[],long N)455 double gofs_AndersonDarling (double V[], long N)
456 {
457    long i;
458    double U1;
459    double U, A2;
460 
461    if (N <= 0) {
462       util_Warning (TRUE, "gofs_AndersonDarling:   N <= 0");
463       return 0.0;
464    }
465 
466    A2 = 0.0;
467    for (i = 1; i <= N; i++) {
468       U1 = U = V[i];
469       if (U <= gofs_EpsilonAD) {
470          U1 = U = gofs_EpsilonAD;
471       } else if (U >= 1 - gofs_EpsilonAD)
472          U1 = 1.0 - gofs_EpsilonAD;
473       A2 += (2 * i - 1) * log (U) + (1 + 2 * (N - i)) * num2_log1p (-U1);
474    }
475    A2 = -N - A2 / N;
476    return A2;
477    /* p-value = fbar_AndersonDarling (N, A2); */
478 }
479 
480 /*-------------------------------------------------------------------------*/
481 
gofs_KSJumpOne(double U[],long N,double a,double * DP,double * DM)482 void gofs_KSJumpOne (double U[], long N, double a, double *DP, double *DM)
483    /* Statistics KS+ and KS-. Case with 1 jump at a, near the lower tail of
484       the distribution. */
485 {
486    long j, i;
487    double D2, D1, UnSurN;
488 
489    if (N <= 0) {
490       *DP = *DM = 0.0;
491       util_Warning (TRUE, "gofs_KSJumpOne:   N <= 0");
492       return;
493    }
494 
495    *DP = 0.0;
496    *DM = 0.0;
497    UnSurN = 1.0 / N;
498    j = 1;
499    while (j < N && U[j] <= a + EpsilonD)
500       ++j;
501    for (i = j - 1; i <= N; i++) {
502       if (i >= 1) {
503          D1 = i * UnSurN - U[i];
504          if (D1 > *DP)
505             *DP = D1;
506       }
507       if (i >= j) {
508          D2 = U[i] - (i - 1) * UnSurN;
509          if (D2 > *DM)
510             *DM = D2;
511       }
512    }
513 }
514 
515 /*-------------------------------------------------------------------------*/
516 
gofs_KS(double U[],long N,double * DP,double * DM,double * D)517 void gofs_KS (double U[], long N, double *DP, double *DM, double *D)
518 {
519    if (N <= 0) {
520       *DP = *DM = *D = 0.0;
521       util_Warning (TRUE, "gofs_KS:   N <= 0");
522       return;
523    }
524 
525    gofs_KSJumpOne (U, N, 0.0, DP, DM);
526    if (*DM > *DP)
527       *D = *DM;
528    else
529       *D = *DP;
530    /*   pp = fbar_KSPlus (N, *DP);
531         pm = fbar_KSPlus (N, *DM);
532         p  = fbar_KS (N, *D);      */
533 }
534 
535 /*-------------------------------------------------------------------------*/
536 #if 0
537 
538 void gofs_KSJumpsMany (double X[], int N, wdist_CFUNC F, double W[],
539                        double *DP, double *DM, int Detail)
540 {
541    int i;
542    double y, UnSurN, D;
543 
544    if (N <= 0) {
545       *DP = *DM = 0.0;
546       util_Warning (TRUE, "gofs_KSJumpsMany:   N <= 0");
547       return;
548    }
549 
550    util_Assert (N > 0, "gofs_KSJumpsMany:   N <= 0");
551    UnSurN = 1.0 / N;
552    *DP = 0.0;
553    *DM = 0.0;
554 
555    if (Detail > 0) {
556       printf ("-----------------------------------------------\n"
557               "Values of the distribution F(x+0) :\n\n");
558    }
559    /* Assume that the X[i] are already sorted */
560    for (i = 1; i <= N; i++) {
561       /* Compute KS+ */
562       y = F (W, X[i]);
563       D = i * UnSurN - y;
564       if (D > *DP)
565          *DP = D;
566       if (Detail > 0) {
567          printf ("%14.6f  %14.6f\n", X[i], y);
568       }
569       /* Compute KS- */
570       y = F (W, X[i] - EpsilonD);
571       D = y - (i - 1) * UnSurN;
572       if (D > *DM)
573          *DM = D;
574    }
575    if (Detail > 0)
576       printf ("\n\n");
577 }
578 #endif
579