1 /*************************************************************************\
2  *
3  * Package:        TestU01
4  * File:           scomp.c
5  * Environment:    ANSI C
6  *
7  * Copyright (c) 2002 Pierre L'Ecuyer, DIRO, Université de Montréal.
8  * e-mail: lecuyer@iro.umontreal.ca
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted without a fee for private, research,
13  * academic, or other non-commercial purposes.
14  * Any use of this software in a commercial environment requires a
15  * written licence from the copyright owner.
16  *
17  * Any changes made to this package must be clearly identified as such.
18  *
19  * In scientific publications which used this software, a reference to it
20  * would be appreciated.
21  *
22  * Redistributions of source code must retain this copyright notice
23  * and the following disclaimer.
24  *
25  * THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
27  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28  *
29 \*************************************************************************/
30 
31 #include "util.h"
32 #include "chrono.h"
33 #include "num.h"
34 #include "tables.h"
35 
36 #include "scomp.h"
37 #include "sres.h"
38 #include "swrite.h"
39 #include "unif01.h"
40 
41 #include "fbar.h"
42 #include "wdist.h"
43 #include "gofw.h"
44 #include "gofs.h"
45 #include "statcoll.h"
46 
47 #include <math.h>
48 #include <float.h>
49 #include <stdlib.h>
50 
51 
52 #define LENGTH 100
53 
54 /* Empirical Mean for Lempel-Ziv test for 2^3 <= n <= 2^28. Obtained by
55    simulation with N = 1000  */
56 static const double LZMu[] = {
57    0.0,   0.0,   0.0,   4.44,   7.64,
58    12.5,   20.8,   34.8,   58.9,   101.1,
59    176.0,   310.0,   551.9,   992.3,   1799.,
60    3286.2,   6041.5,   11171.5,   20761.8,   38760.4,
61    72654.,   136677.,   257949.,   488257.,   926658.,
62    1762965.,   3361490.,   6422497.,   12293930.
63 };
64 
65 /* Empirical Standard Deviation for Lempel-Ziv test for 2^3 <= n <= 2^28 */
66 static const double LZSigma[] = {
67    0.0,   0.0,   0.0,   0.49,   0.51,
68    0.62,   0.75,   0.78,   0.86,   0.94,
69    1.03,   1.19,   1.43,   1.68,   2.09,
70    2.46,   3.36,   4.2,   5.4,   6.8,
71    9.1,   10.9,   14.7,   19.1,   25.2,
72    33.5,   44.546,   58.194,   75.513
73 };
74 
75 
76 
77 /*--------------------------------- Types ---------------------------------*/
78 
79 /* Bit trie used in Lempel-Ziv test. If left != NULL, this means a 0 bit.
80    If right != NULL, this means a 1 bit. The word is the sequence obtained
81    by following the tree until a NULL pointer is met. */
82 
83 struct BitTrie_t {
84    struct BitTrie_t *left;
85    struct BitTrie_t *right;
86 };
87 typedef struct BitTrie_t BitTrie_t;
88 
89 
90 
91 
92 /*-------------------------------- Functions ------------------------------*/
93 
DeleteBitTrie(BitTrie_t * tree)94 static void DeleteBitTrie (BitTrie_t *tree)
95 {
96    if (tree == NULL)
97       return;
98    DeleteBitTrie (tree->left);
99    DeleteBitTrie (tree->right);
100    util_Free (tree);
101 }
102 
103 
104 /*=========================================================================*/
105 
106 
InitRes(scomp_Res * res,long N,int jmax,int tmax)107 static void InitRes (
108    scomp_Res *res,            /* Results holder */
109    long N,                    /* Number of replications */
110    int jmax,                  /* Max class index for size of jumps */
111    int tmax                   /* Max class index for linear complexity */
112 )
113 /*
114  * Initializes the scomp_Res structure
115  */
116 {
117    sres_InitBasic (res->JumpNum, N,
118       "scomp_LinearComp:   Number of Jumps");
119    sres_InitChi2 (res->JumpSize, N, jmax,
120       "scomp_LinearComp:   Size of Jumps");
121    sres_InitChi2 (res->LinComp, N, tmax,
122       "scomp_LinearComp:   Linear Complexity");
123 }
124 
125 
126 /*-------------------------------------------------------------------------*/
127 
scomp_CreateRes(void)128 scomp_Res * scomp_CreateRes (void)
129 {
130    scomp_Res *res;
131    res = util_Malloc (sizeof (scomp_Res));
132    res->JumpNum = sres_CreateBasic ();
133    res->JumpSize = sres_CreateChi2 ();
134    res->LinComp = sres_CreateChi2 ();
135    return res;
136 }
137 
138 
139 /*-------------------------------------------------------------------------*/
140 
scomp_DeleteRes(scomp_Res * res)141 void scomp_DeleteRes (scomp_Res *res)
142 {
143    if (res == NULL)
144       return;
145    sres_DeleteBasic (res->JumpNum);
146    sres_DeleteChi2 (res->JumpSize);
147    sres_DeleteChi2 (res->LinComp);
148    util_Free (res);
149 }
150 
151 
152 /*=========================================================================*/
153 
WriteDataJumps(unif01_Gen * gen,char * TestName,long N,long n,int r,int s,double muComp,double mu,double sigma)154 static void WriteDataJumps (unif01_Gen *gen, char *TestName, long N, long n,
155    int r, int s, double muComp, double mu, double sigma)
156 {
157    swrite_Head (gen, TestName, N, n, r);
158    printf (",    s = %1d\n", s);
159    if (swrite_Parameters) {
160       printf ("\n      muComp = ");
161       num_WriteD (muComp, 12, 4, 2);
162       printf ("\n      Mu     = ");
163       num_WriteD (mu, 12, 4, 2);
164       printf ("\n      Sigma  = ");
165       num_WriteD (sigma, 12, 4, 2);
166    }
167    printf ("\n\n");
168 }
169 
170 
171 /*-------------------------------------------------------------------------*/
172 
BerlekampMassey(scomp_Res * res,long n,double * pComp,double * pNumJ,int * Bits,int * Polyb,int * Polyc,int * PolycOld)173 static void BerlekampMassey (
174    scomp_Res *res,
175    long n,                  /* Number of bits */
176    double *pComp,           /* Linear complexity */
177    double *pNumJ,           /* Number of jumps */
178    int *Bits,
179    int *Polyb,
180    int *Polyc,
181    int *PolycOld
182    )
183 /*
184  * Berlekamp-Massey algorithm to calculate the linear complexity.
185  */
186 {
187    int b;
188    long Loc;
189    long i;
190    long m;
191    long k;
192    long L;                  /* Linear complexity */
193    long NumJ;               /* Number of jumps */
194    sres_Chi2 *resl = res->JumpSize;
195 
196    for (k = 0; k <= resl->jmax; k++)
197       resl->Count[k] = 0;
198 
199    Polyc[0] = 1;
200    Polyb[0] = 1;
201    L = 0;
202    NumJ = 0;
203    k = 0;
204    m = -1;
205 
206    while (k < n) {
207       /* Return the value of the current polynomial to see if it can
208          generate the next bit */
209       b = 0;
210       for (i = 1; i <= L; i++)
211          /* b ^= Polyc[i] * Bits[k + 1 - i]; */
212          b = (b + Polyc[i] * Bits[k + 1 - i]) & 1;
213 
214       if (Bits[k + 1] != b) {
215          /* Update c(x)_old and c(x) */
216          for (i = 0; i <= L; i++)
217             PolycOld[i] = Polyc[i];
218 
219          for (i = 0; i <= L; i++) {
220             if (Polyb[i] == 1)
221                Polyc[k - m + i] = (++Polyc[k - m + i]) & 1;
222          }
223          if (2 * L <= k) {
224             L = k + 1 - L;
225             NumJ++;
226             Loc = labs (k + 1 - 2 * L);
227             if (Loc <= resl->jmax)
228                ++resl->Count[Loc];
229             else
230                ++resl->Count[resl->jmax];
231             /* Update B */
232             for (i = 0; i <= L; i++)
233                Polyb[i] = PolycOld[i];
234             m = k;
235          }
236       }
237       ++k;
238    }
239    *pNumJ = NumJ;
240    *pComp = L;
241 }
242 
243 
244 /*=========================================================================*/
245 
scomp_LinearComp(unif01_Gen * gen,scomp_Res * res,long N,long n,int r,int s)246 void scomp_LinearComp (unif01_Gen *gen, scomp_Res *res,
247                        long N, long n, int r, int s)
248 {
249    const double epsilon = 1.0E-10;
250    const int tt = 1 - 0.5 * num_Log2 (3 * epsilon); /* Dimension */
251    const long K0 = n/s;
252    long i, Seq;
253    int j, k;
254    int M0;
255    double NumJ;                   /* Number of Jumps */
256    double sigma, mu;              /* Parameters of number of jumps */
257    double comp;                   /* Linear complexity */
258    double muComp;                 /* Mean of linear complexity */
259    unsigned long Nombre;          /* Random number */
260    int Parite;
261    double *Prob;
262    long *Loca;
263    long tmin, tmax, NbClasses;
264    double X2;
265    double temp;
266    int *Bits;                     /* 4 Arrays of bits */
267    int *Polyb;
268    int *Polyc;
269    int *PolycOld;
270    double Param[1];
271    char str[LENGTH + 1];
272    lebool localRes = FALSE;
273    chrono_Chrono *Timer;
274    char *TestName = "scomp_LinearComp test";
275    sres_Basic *resJN;
276    sres_Chi2 *resJL;
277    sres_Chi2 *resLC;
278    lebool JL_OK = TRUE;          /* If TRUE do the JL test, otherwise not */
279    lebool LC_OK = FALSE;         /* If TRUE do the LC test, otherwise not */
280 
281    Timer = chrono_Create ();
282    n = K0 * s;
283    Parite = n & 1;
284    if (n >= DBL_MAX_EXP)
285       temp = 0.0;
286    else
287       temp = pow (2.0, -(double) n);
288 
289    mu = n / 4.0 + (4 + Parite) / 12.0 - temp / 3.0;
290    sigma = n / 8.0 - (2 - Parite)/(9.0 - Parite) + n * temp / 6.0
291            + (6 + Parite) * temp / 18.0 - temp * temp / 9.0;
292    sigma = sqrt (sigma);
293    muComp = n / 2.0 + (4 + Parite) / 18.0;
294    M0 = num_Log2 (mu / gofs_MinExpected);
295    if (M0 < 2) {
296       /* 0 degree of freedom for the chi2, do not do the test JL. */
297       JL_OK = FALSE;
298    }
299 
300    if (swrite_Basic)
301       WriteDataJumps (gen, TestName, N, n, r, s, muComp, mu, sigma);
302 
303    /* util_Assert (M0 > 1, "scomp_LinearComp:   n*s is too small"); */
304    util_Assert (M0 <= num_MaxTwoExp, "scomp_LinearComp:   M0 > num_MaxTwoExp");
305 
306    Prob = util_Calloc (1 + (size_t) tt, sizeof (double));
307    Bits = util_Calloc ((size_t) n + 1, sizeof (int));
308    Polyb = util_Calloc ((size_t) n + 1, sizeof (int));
309    Polyc = util_Calloc ((size_t) n + 1, sizeof (int));
310    PolycOld = util_Calloc ((size_t) n + 1, sizeof (int));
311 
312    if (res == NULL) {
313       localRes = TRUE;
314       res = scomp_CreateRes ();
315    }
316    M0 = util_Max (M0, 1);
317    InitRes (res, N, M0, tt);
318    resJN = res->JumpNum;
319    resJL = res->JumpSize;
320    resLC = res->LinComp;
321    Loca = resLC->Loc;
322 
323    if (N > 2.0 * gofs_MinExpected) {
324       /* Compute the expected probabilities for the linear complexity. */
325       /* We put in Prob[k] the probabilities for i = k and i = -k of   */
326       /* the statistic defined in the NIST document 800-22, p. 86. */
327       temp = Prob[0] = 0.5;
328       for (k = 1; k < tt; k++) {
329 	 Prob[k] = 1.5 * pow (4.0, -(double) k);
330 	 temp += Prob[k];
331       }
332       Prob[tt] = 1.0 - temp;
333       for (k = 0; k <= tt; k++) {
334 	 resLC->Count[k] = 0;
335 	 resLC->NbExp[k] = N * Prob[k];
336       }
337       tmin = 0;
338       tmax = tt;
339       if (swrite_Classes) {
340 	 printf ("Classes for the linear complexity:\n");
341 	 gofs_WriteClasses (resLC->NbExp, Loca, tmin, tmax, 0);
342       }
343       gofs_MergeClasses (resLC->NbExp, Loca, &tmin, &tmax, &NbClasses);
344       resLC->jmax = tmax;
345       resLC->jmin = tmin;
346       resLC->degFree = NbClasses - 1;
347       if (NbClasses < 2) {
348 	 /* 0 degree of freedom for the chi2, do not do the test LC. */
349 	 LC_OK = FALSE;
350       } else
351 	 LC_OK = TRUE;
352    }
353 
354    statcoll_SetDesc (resJN->sVal1,
355       "The number of jumps: the N statistic values (a standard normal):");
356    sprintf (str, "The jumps size: the N statistic values (a ChiSquare"
357                  " with %1d degrees of freedom):", M0 - 1);
358    statcoll_SetDesc (resJL->sVal1, str);
359 
360    for (Seq = 1; Seq <= N; Seq++) {
361       for (i = 0; i < K0; i++) {
362          Nombre = unif01_StripB (gen, r, s);
363          for (j = s; j >= 1; j--) {
364             Bits[s * i + j] = Nombre & 1;
365             Nombre >>= 1;
366          }
367       }
368 
369       BerlekampMassey (res, n, &comp, &NumJ, Bits, Polyb, Polyc, PolycOld);
370 
371       /* Value of the statistic for the linear complexity */
372       if (LC_OK) {
373 	 comp = comp - muComp;
374 	 if (Parite)
375 	    comp = -comp;
376 	 comp += 2.0 / 9.0;
377          /* comp is now an integer: truncate correctly and avoid off-by-1
378             error because of small floating-point inaccuracies. */
379 	 if (comp >= 0.0)
380 	    k = comp + 0.5;
381 	 else
382 	    k = comp - 0.5;
383 	 if (k < 0)
384 	    k = -k;
385 	 if (k >= tt)
386 	    ++resLC->Count[Loca[tt]];
387 	 else
388 	    ++resLC->Count[Loca[k]];
389       }
390 
391       /* Value of the normal statistic for the number of jumps */
392       statcoll_AddObs (resJN->sVal1, (NumJ - mu) / sigma);
393 
394       /* Value of the statistic for the size of the jumps */
395       if (JL_OK) {
396 	 for (k = 1; k < M0; k++) {
397 	    resJL->NbExp[k] = NumJ / num_TwoExp[k];
398 	    resJL->Loc[k] = k;
399 	 }
400 	 resJL->NbExp[M0] = NumJ / num_TwoExp[M0 - 1];
401 	 resJL->Loc[M0] = M0;
402 	 resJL->jmax = M0;
403 	 resJL->jmin = 1;
404 	 resJL->degFree = M0 - 1;
405 
406 	 X2 = gofs_Chi2 (resJL->NbExp, resJL->Count, 1, M0);
407 	 statcoll_AddObs (resJL->sVal1, X2);
408 	 if (swrite_Classes) {
409 	    printf ("\n\nClasses for the size of the jumps:\n");
410 	    gofs_WriteClasses (resJL->NbExp, (long *) NULL, 1, M0, 0);
411 	 }
412          if (swrite_Counters)
413             tables_WriteTabL (resJL->Count, 1, M0, 5, 10,
414                 "Size of the jumps:   observed numbers");
415       }
416    }
417 
418    gofw_ActiveTests2 (resJN->sVal1->V, resJN->pVal1->V, N, wdist_Normal,
419       (double *) NULL, resJN->sVal2, resJN->pVal2);
420    resJN->pVal1->NObs = N;
421    sres_GetNormalSumStat (resJN);
422 
423    if (JL_OK) {
424       Param[0] = M0 - 1;
425       gofw_ActiveTests2 (resJL->sVal1->V, resJL->pVal1->V, N,
426          wdist_ChiSquare, Param, resJL->sVal2, resJL->pVal2);
427       resJL->pVal1->NObs = N;
428       sres_GetChi2SumStat (resJL);
429    }
430 
431    if (LC_OK) {
432       X2 = gofs_Chi2 (resLC->NbExp, resLC->Count, tmin, tmax);
433       resLC->sVal2[gofw_Mean] = X2;
434       resLC->pVal2[gofw_Mean] = fbar_ChiSquare2 (NbClasses - 1, 8, X2);
435    }
436 
437    if (swrite_Basic) {
438       if (JL_OK) {
439 	 printf ("\n-----------------------------------------------\n");
440 	 if (N == 1) {
441             printf ("Number of degrees of freedom          : %4ld\n",
442                     resJL->degFree);
443 	    printf ("Chi2 statistic for size of jumps      :");
444 	    gofw_Writep2 (resJL->sVal2[gofw_Mean], resJL->pVal2[gofw_Mean]);
445 	 } else {
446 	    printf ("Test results for the size of jumps:\n");
447 	    gofw_WriteActiveTests0 (N, resJL->sVal2, resJL->pVal2);
448             swrite_Chi2SumTest (N, resJL);
449 	 }
450 	 if (swrite_Collectors)
451 	    statcoll_Write (resJL->sVal1, 5, 14, 4, 3);
452       }
453 
454       printf ("\n-----------------------------------------------\n");
455       if (N == 1) {
456          printf ("Normal statistic for number of jumps  :");
457          gofw_Writep2 (resJN->sVal2[gofw_Mean], resJN->pVal2[gofw_Mean]);
458       } else {
459          printf ("Test results for the number of jumps:\n");
460          gofw_WriteActiveTests0 (N, resJN->sVal2, resJN->pVal2);
461          swrite_NormalSumTest (N, resJN);
462       }
463       if (swrite_Collectors)
464          statcoll_Write (resJN->sVal1, 5, 14, 4, 3);
465 
466       if (LC_OK) {
467          printf ("\n-----------------------------------------------\n");
468          printf ("Test results for the linear complexity:\n\n");
469          printf ("Number of degrees of freedom          : %4ld\n",
470                  resLC->degFree);
471          printf ("Chi2 statistic on the N replications  :");
472          gofw_Writep2 (resLC->sVal2[gofw_Mean], resLC->pVal2[gofw_Mean]);
473          if (swrite_Classes)
474 	    gofs_WriteClasses (resLC->NbExp, Loca, tmin, tmax, NbClasses);
475          if (swrite_Counters)
476             tables_WriteTabL (resLC->Count, tmin, tmax, 5, 10,
477                "Linear Complexity:   observed numbers");
478       }
479 
480       printf ("\n\n");
481       swrite_Final (gen, Timer);
482    }
483 
484    util_Free (Prob);
485    util_Free (Bits);
486    util_Free (Polyb);
487    util_Free (Polyc);
488    util_Free (PolycOld);
489    if (localRes)
490       scomp_DeleteRes (res);
491    chrono_Delete (Timer);
492 }
493 
494 
495 /*=========================================================================*/
496 
WriteDataLZ(unif01_Gen * gen,char * Test,long N,int k,int r,int s)497 static void WriteDataLZ (
498    unif01_Gen *gen,      /* generator */
499    char *Test,           /* Test name */
500    long N,               /* Number of replications */
501    int k,                /* Sample size n = 2^k */
502    int r,                /* r first bits of each random number dropped */
503    int s                 /* s bits of each random number used */
504 )
505 {
506    long n;
507    n = num_TwoExp[k];
508    swrite_Head (gen, Test, N, n, r);
509    printf (",   s = %4d,   k = %4d\n\n", s, k);
510 }
511 
512 
513 /*-------------------------------------------------------------------------*/
514 
LZ78(unif01_Gen * gen,long n,int r,int s)515 static long LZ78 (unif01_Gen * gen, long n, int r, int s)
516 /*
517  * The parameters are the same as in scomp_LempelZiv. The trie contains
518  * a left (right) branch if a word with a 0 (1) bit after the prefix has
519  * been seen before. We descend one level in the trie with each bit until
520  * a leaf is met. Add a branch for a new word, and restart at root.
521  */
522 {
523    const unsigned long kMAX = 1UL << (s - 1);
524    unsigned long Y, k;
525    long i;                        /* Count the number of bits overall */
526    long W;                        /* Count the number of words */
527    lebool done = FALSE;          /* Start a new word */
528    BitTrie_t *trie, *root;
529 
530    W = i = 0;
531    trie = root = util_Malloc (sizeof (BitTrie_t));
532    trie->left = trie->right = NULL;
533    Y = unif01_StripB (gen, r, s);
534    k = kMAX;
535 
536    while (i < n) {
537       /* Start a new word: match it as far as possible in the trie */
538       done = FALSE;
539       trie = root;
540       while (!done) {
541          if ((Y & k) == 0) {      /* Bit 0 */
542             if (trie->left) {
543 	       /* We have seen it before: descend in branch */
544                trie = trie->left;
545             } else {
546 	       /* A leaf: this is a new word */
547                W++;
548                done = TRUE;
549                trie->left = util_Malloc (sizeof (BitTrie_t));
550                trie = trie->left;
551                trie->left = trie->right = NULL;
552             }
553 
554          } else {                 /* Bit 1 */
555             if (trie->right) {
556                trie = trie->right;
557             } else {
558                W++;
559                done = TRUE;
560                trie->right = util_Malloc (sizeof (BitTrie_t));
561                trie = trie->right;
562                trie->left = trie->right = NULL;
563             }
564          }
565          i++;
566          if (i >= n) {
567             done = TRUE;
568             if ((trie->left != NULL) || (trie->right != NULL))
569                W++;
570             break;
571 	 }
572          k >>= 1;
573          if (k == 0) {
574 	    /* Have used the s bits in the number; generate a new number */
575             Y = unif01_StripB (gen, r, s);
576             k = kMAX;
577          }
578       }
579    }
580    DeleteBitTrie (root);
581    return W;
582 }
583 
584 
585 /*-------------------------------------------------------------------------*/
586 
scomp_LempelZiv(unif01_Gen * gen,sres_Basic * res,long N,int t,int r,int s)587 void scomp_LempelZiv (unif01_Gen *gen, sres_Basic *res,
588    long N, int t, int r, int s)
589 {
590    long Seq, n;
591    double X;
592    /*   const double lg_n = num_Log2 ((double) n); */
593    long W;
594    lebool localRes = FALSE;
595    chrono_Chrono *Timer;
596    char *TestName = "scomp_LempelZiv test";
597 
598    Timer = chrono_Create ();
599    if (swrite_Basic)
600       WriteDataLZ (gen, TestName, N, t, r, s);
601    util_Assert (r + s <= 32, "scomp_LempelZiv:   r + s > 32");
602    util_Assert (t <= 28, "scomp_LempelZiv:   k > 28");
603    if (res == NULL) {
604       localRes = TRUE;
605       res = sres_CreateBasic ();
606    }
607    n = num_TwoExp[t];
608    sres_InitBasic (res, N, "scomp_LempelZiv");
609    statcoll_SetDesc (res->sVal1, "sVal1:   a standard normal");
610 
611    for (Seq = 1; Seq <= N; Seq++) {
612       W = LZ78 (gen, n, r, s);
613       /*  X = (W - n / lg_n) / sqrt (0.266 * n / (lg_n * lg_n * lg_n)); */
614       X = (W - LZMu[t]) / LZSigma[t];
615       statcoll_AddObs (res->sVal1, X);
616       if (swrite_Counters) {
617          printf ("%12ld ", W);
618          if (Seq % 5 == 0)
619             printf ("\n");
620          if (Seq >= N)
621             printf ("\n\n");
622       }
623    }
624 
625    gofw_ActiveTests2 (res->sVal1->V, res->pVal1->V, N, wdist_Normal,
626       (double *) NULL, res->sVal2, res->pVal2);
627    res->pVal1->NObs = N;
628    sres_GetNormalSumStat (res);
629 
630    if (swrite_Collectors)
631       statcoll_Write (res->sVal1, 5, 12, 4, 3);
632 
633    if (swrite_Basic) {
634       gofw_WriteActiveTests2 (N, res->sVal2, res->pVal2,
635          "Normal statistic                      :");
636       swrite_NormalSumTest (N, res);
637       swrite_Final (gen, Timer);
638    }
639    if (localRes)
640       sres_DeleteBasic (res);
641    chrono_Delete (Timer);
642 }
643