1 /*************************************************************************\
2 *
3 * Package: TestU01
4 * File: scomp.c
5 * Environment: ANSI C
6 *
7 * Copyright (c) 2002 Pierre L'Ecuyer, DIRO, Université de Montréal.
8 * e-mail: lecuyer@iro.umontreal.ca
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted without a fee for private, research,
13 * academic, or other non-commercial purposes.
14 * Any use of this software in a commercial environment requires a
15 * written licence from the copyright owner.
16 *
17 * Any changes made to this package must be clearly identified as such.
18 *
19 * In scientific publications which used this software, a reference to it
20 * would be appreciated.
21 *
22 * Redistributions of source code must retain this copyright notice
23 * and the following disclaimer.
24 *
25 * THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
27 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
28 *
29 \*************************************************************************/
30
31 #include "util.h"
32 #include "chrono.h"
33 #include "num.h"
34 #include "tables.h"
35
36 #include "scomp.h"
37 #include "sres.h"
38 #include "swrite.h"
39 #include "unif01.h"
40
41 #include "fbar.h"
42 #include "wdist.h"
43 #include "gofw.h"
44 #include "gofs.h"
45 #include "statcoll.h"
46
47 #include <math.h>
48 #include <float.h>
49 #include <stdlib.h>
50
51
52 #define LENGTH 100
53
54 /* Empirical Mean for Lempel-Ziv test for 2^3 <= n <= 2^28. Obtained by
55 simulation with N = 1000 */
56 static const double LZMu[] = {
57 0.0, 0.0, 0.0, 4.44, 7.64,
58 12.5, 20.8, 34.8, 58.9, 101.1,
59 176.0, 310.0, 551.9, 992.3, 1799.,
60 3286.2, 6041.5, 11171.5, 20761.8, 38760.4,
61 72654., 136677., 257949., 488257., 926658.,
62 1762965., 3361490., 6422497., 12293930.
63 };
64
65 /* Empirical Standard Deviation for Lempel-Ziv test for 2^3 <= n <= 2^28 */
66 static const double LZSigma[] = {
67 0.0, 0.0, 0.0, 0.49, 0.51,
68 0.62, 0.75, 0.78, 0.86, 0.94,
69 1.03, 1.19, 1.43, 1.68, 2.09,
70 2.46, 3.36, 4.2, 5.4, 6.8,
71 9.1, 10.9, 14.7, 19.1, 25.2,
72 33.5, 44.546, 58.194, 75.513
73 };
74
75
76
77 /*--------------------------------- Types ---------------------------------*/
78
79 /* Bit trie used in Lempel-Ziv test. If left != NULL, this means a 0 bit.
80 If right != NULL, this means a 1 bit. The word is the sequence obtained
81 by following the tree until a NULL pointer is met. */
82
83 struct BitTrie_t {
84 struct BitTrie_t *left;
85 struct BitTrie_t *right;
86 };
87 typedef struct BitTrie_t BitTrie_t;
88
89
90
91
92 /*-------------------------------- Functions ------------------------------*/
93
DeleteBitTrie(BitTrie_t * tree)94 static void DeleteBitTrie (BitTrie_t *tree)
95 {
96 if (tree == NULL)
97 return;
98 DeleteBitTrie (tree->left);
99 DeleteBitTrie (tree->right);
100 util_Free (tree);
101 }
102
103
104 /*=========================================================================*/
105
106
InitRes(scomp_Res * res,long N,int jmax,int tmax)107 static void InitRes (
108 scomp_Res *res, /* Results holder */
109 long N, /* Number of replications */
110 int jmax, /* Max class index for size of jumps */
111 int tmax /* Max class index for linear complexity */
112 )
113 /*
114 * Initializes the scomp_Res structure
115 */
116 {
117 sres_InitBasic (res->JumpNum, N,
118 "scomp_LinearComp: Number of Jumps");
119 sres_InitChi2 (res->JumpSize, N, jmax,
120 "scomp_LinearComp: Size of Jumps");
121 sres_InitChi2 (res->LinComp, N, tmax,
122 "scomp_LinearComp: Linear Complexity");
123 }
124
125
126 /*-------------------------------------------------------------------------*/
127
scomp_CreateRes(void)128 scomp_Res * scomp_CreateRes (void)
129 {
130 scomp_Res *res;
131 res = util_Malloc (sizeof (scomp_Res));
132 res->JumpNum = sres_CreateBasic ();
133 res->JumpSize = sres_CreateChi2 ();
134 res->LinComp = sres_CreateChi2 ();
135 return res;
136 }
137
138
139 /*-------------------------------------------------------------------------*/
140
scomp_DeleteRes(scomp_Res * res)141 void scomp_DeleteRes (scomp_Res *res)
142 {
143 if (res == NULL)
144 return;
145 sres_DeleteBasic (res->JumpNum);
146 sres_DeleteChi2 (res->JumpSize);
147 sres_DeleteChi2 (res->LinComp);
148 util_Free (res);
149 }
150
151
152 /*=========================================================================*/
153
WriteDataJumps(unif01_Gen * gen,char * TestName,long N,long n,int r,int s,double muComp,double mu,double sigma)154 static void WriteDataJumps (unif01_Gen *gen, char *TestName, long N, long n,
155 int r, int s, double muComp, double mu, double sigma)
156 {
157 swrite_Head (gen, TestName, N, n, r);
158 printf (", s = %1d\n", s);
159 if (swrite_Parameters) {
160 printf ("\n muComp = ");
161 num_WriteD (muComp, 12, 4, 2);
162 printf ("\n Mu = ");
163 num_WriteD (mu, 12, 4, 2);
164 printf ("\n Sigma = ");
165 num_WriteD (sigma, 12, 4, 2);
166 }
167 printf ("\n\n");
168 }
169
170
171 /*-------------------------------------------------------------------------*/
172
BerlekampMassey(scomp_Res * res,long n,double * pComp,double * pNumJ,int * Bits,int * Polyb,int * Polyc,int * PolycOld)173 static void BerlekampMassey (
174 scomp_Res *res,
175 long n, /* Number of bits */
176 double *pComp, /* Linear complexity */
177 double *pNumJ, /* Number of jumps */
178 int *Bits,
179 int *Polyb,
180 int *Polyc,
181 int *PolycOld
182 )
183 /*
184 * Berlekamp-Massey algorithm to calculate the linear complexity.
185 */
186 {
187 int b;
188 long Loc;
189 long i;
190 long m;
191 long k;
192 long L; /* Linear complexity */
193 long NumJ; /* Number of jumps */
194 sres_Chi2 *resl = res->JumpSize;
195
196 for (k = 0; k <= resl->jmax; k++)
197 resl->Count[k] = 0;
198
199 Polyc[0] = 1;
200 Polyb[0] = 1;
201 L = 0;
202 NumJ = 0;
203 k = 0;
204 m = -1;
205
206 while (k < n) {
207 /* Return the value of the current polynomial to see if it can
208 generate the next bit */
209 b = 0;
210 for (i = 1; i <= L; i++)
211 /* b ^= Polyc[i] * Bits[k + 1 - i]; */
212 b = (b + Polyc[i] * Bits[k + 1 - i]) & 1;
213
214 if (Bits[k + 1] != b) {
215 /* Update c(x)_old and c(x) */
216 for (i = 0; i <= L; i++)
217 PolycOld[i] = Polyc[i];
218
219 for (i = 0; i <= L; i++) {
220 if (Polyb[i] == 1)
221 Polyc[k - m + i] = (++Polyc[k - m + i]) & 1;
222 }
223 if (2 * L <= k) {
224 L = k + 1 - L;
225 NumJ++;
226 Loc = labs (k + 1 - 2 * L);
227 if (Loc <= resl->jmax)
228 ++resl->Count[Loc];
229 else
230 ++resl->Count[resl->jmax];
231 /* Update B */
232 for (i = 0; i <= L; i++)
233 Polyb[i] = PolycOld[i];
234 m = k;
235 }
236 }
237 ++k;
238 }
239 *pNumJ = NumJ;
240 *pComp = L;
241 }
242
243
244 /*=========================================================================*/
245
scomp_LinearComp(unif01_Gen * gen,scomp_Res * res,long N,long n,int r,int s)246 void scomp_LinearComp (unif01_Gen *gen, scomp_Res *res,
247 long N, long n, int r, int s)
248 {
249 const double epsilon = 1.0E-10;
250 const int tt = 1 - 0.5 * num_Log2 (3 * epsilon); /* Dimension */
251 const long K0 = n/s;
252 long i, Seq;
253 int j, k;
254 int M0;
255 double NumJ; /* Number of Jumps */
256 double sigma, mu; /* Parameters of number of jumps */
257 double comp; /* Linear complexity */
258 double muComp; /* Mean of linear complexity */
259 unsigned long Nombre; /* Random number */
260 int Parite;
261 double *Prob;
262 long *Loca;
263 long tmin, tmax, NbClasses;
264 double X2;
265 double temp;
266 int *Bits; /* 4 Arrays of bits */
267 int *Polyb;
268 int *Polyc;
269 int *PolycOld;
270 double Param[1];
271 char str[LENGTH + 1];
272 lebool localRes = FALSE;
273 chrono_Chrono *Timer;
274 char *TestName = "scomp_LinearComp test";
275 sres_Basic *resJN;
276 sres_Chi2 *resJL;
277 sres_Chi2 *resLC;
278 lebool JL_OK = TRUE; /* If TRUE do the JL test, otherwise not */
279 lebool LC_OK = FALSE; /* If TRUE do the LC test, otherwise not */
280
281 Timer = chrono_Create ();
282 n = K0 * s;
283 Parite = n & 1;
284 if (n >= DBL_MAX_EXP)
285 temp = 0.0;
286 else
287 temp = pow (2.0, -(double) n);
288
289 mu = n / 4.0 + (4 + Parite) / 12.0 - temp / 3.0;
290 sigma = n / 8.0 - (2 - Parite)/(9.0 - Parite) + n * temp / 6.0
291 + (6 + Parite) * temp / 18.0 - temp * temp / 9.0;
292 sigma = sqrt (sigma);
293 muComp = n / 2.0 + (4 + Parite) / 18.0;
294 M0 = num_Log2 (mu / gofs_MinExpected);
295 if (M0 < 2) {
296 /* 0 degree of freedom for the chi2, do not do the test JL. */
297 JL_OK = FALSE;
298 }
299
300 if (swrite_Basic)
301 WriteDataJumps (gen, TestName, N, n, r, s, muComp, mu, sigma);
302
303 /* util_Assert (M0 > 1, "scomp_LinearComp: n*s is too small"); */
304 util_Assert (M0 <= num_MaxTwoExp, "scomp_LinearComp: M0 > num_MaxTwoExp");
305
306 Prob = util_Calloc (1 + (size_t) tt, sizeof (double));
307 Bits = util_Calloc ((size_t) n + 1, sizeof (int));
308 Polyb = util_Calloc ((size_t) n + 1, sizeof (int));
309 Polyc = util_Calloc ((size_t) n + 1, sizeof (int));
310 PolycOld = util_Calloc ((size_t) n + 1, sizeof (int));
311
312 if (res == NULL) {
313 localRes = TRUE;
314 res = scomp_CreateRes ();
315 }
316 M0 = util_Max (M0, 1);
317 InitRes (res, N, M0, tt);
318 resJN = res->JumpNum;
319 resJL = res->JumpSize;
320 resLC = res->LinComp;
321 Loca = resLC->Loc;
322
323 if (N > 2.0 * gofs_MinExpected) {
324 /* Compute the expected probabilities for the linear complexity. */
325 /* We put in Prob[k] the probabilities for i = k and i = -k of */
326 /* the statistic defined in the NIST document 800-22, p. 86. */
327 temp = Prob[0] = 0.5;
328 for (k = 1; k < tt; k++) {
329 Prob[k] = 1.5 * pow (4.0, -(double) k);
330 temp += Prob[k];
331 }
332 Prob[tt] = 1.0 - temp;
333 for (k = 0; k <= tt; k++) {
334 resLC->Count[k] = 0;
335 resLC->NbExp[k] = N * Prob[k];
336 }
337 tmin = 0;
338 tmax = tt;
339 if (swrite_Classes) {
340 printf ("Classes for the linear complexity:\n");
341 gofs_WriteClasses (resLC->NbExp, Loca, tmin, tmax, 0);
342 }
343 gofs_MergeClasses (resLC->NbExp, Loca, &tmin, &tmax, &NbClasses);
344 resLC->jmax = tmax;
345 resLC->jmin = tmin;
346 resLC->degFree = NbClasses - 1;
347 if (NbClasses < 2) {
348 /* 0 degree of freedom for the chi2, do not do the test LC. */
349 LC_OK = FALSE;
350 } else
351 LC_OK = TRUE;
352 }
353
354 statcoll_SetDesc (resJN->sVal1,
355 "The number of jumps: the N statistic values (a standard normal):");
356 sprintf (str, "The jumps size: the N statistic values (a ChiSquare"
357 " with %1d degrees of freedom):", M0 - 1);
358 statcoll_SetDesc (resJL->sVal1, str);
359
360 for (Seq = 1; Seq <= N; Seq++) {
361 for (i = 0; i < K0; i++) {
362 Nombre = unif01_StripB (gen, r, s);
363 for (j = s; j >= 1; j--) {
364 Bits[s * i + j] = Nombre & 1;
365 Nombre >>= 1;
366 }
367 }
368
369 BerlekampMassey (res, n, &comp, &NumJ, Bits, Polyb, Polyc, PolycOld);
370
371 /* Value of the statistic for the linear complexity */
372 if (LC_OK) {
373 comp = comp - muComp;
374 if (Parite)
375 comp = -comp;
376 comp += 2.0 / 9.0;
377 /* comp is now an integer: truncate correctly and avoid off-by-1
378 error because of small floating-point inaccuracies. */
379 if (comp >= 0.0)
380 k = comp + 0.5;
381 else
382 k = comp - 0.5;
383 if (k < 0)
384 k = -k;
385 if (k >= tt)
386 ++resLC->Count[Loca[tt]];
387 else
388 ++resLC->Count[Loca[k]];
389 }
390
391 /* Value of the normal statistic for the number of jumps */
392 statcoll_AddObs (resJN->sVal1, (NumJ - mu) / sigma);
393
394 /* Value of the statistic for the size of the jumps */
395 if (JL_OK) {
396 for (k = 1; k < M0; k++) {
397 resJL->NbExp[k] = NumJ / num_TwoExp[k];
398 resJL->Loc[k] = k;
399 }
400 resJL->NbExp[M0] = NumJ / num_TwoExp[M0 - 1];
401 resJL->Loc[M0] = M0;
402 resJL->jmax = M0;
403 resJL->jmin = 1;
404 resJL->degFree = M0 - 1;
405
406 X2 = gofs_Chi2 (resJL->NbExp, resJL->Count, 1, M0);
407 statcoll_AddObs (resJL->sVal1, X2);
408 if (swrite_Classes) {
409 printf ("\n\nClasses for the size of the jumps:\n");
410 gofs_WriteClasses (resJL->NbExp, (long *) NULL, 1, M0, 0);
411 }
412 if (swrite_Counters)
413 tables_WriteTabL (resJL->Count, 1, M0, 5, 10,
414 "Size of the jumps: observed numbers");
415 }
416 }
417
418 gofw_ActiveTests2 (resJN->sVal1->V, resJN->pVal1->V, N, wdist_Normal,
419 (double *) NULL, resJN->sVal2, resJN->pVal2);
420 resJN->pVal1->NObs = N;
421 sres_GetNormalSumStat (resJN);
422
423 if (JL_OK) {
424 Param[0] = M0 - 1;
425 gofw_ActiveTests2 (resJL->sVal1->V, resJL->pVal1->V, N,
426 wdist_ChiSquare, Param, resJL->sVal2, resJL->pVal2);
427 resJL->pVal1->NObs = N;
428 sres_GetChi2SumStat (resJL);
429 }
430
431 if (LC_OK) {
432 X2 = gofs_Chi2 (resLC->NbExp, resLC->Count, tmin, tmax);
433 resLC->sVal2[gofw_Mean] = X2;
434 resLC->pVal2[gofw_Mean] = fbar_ChiSquare2 (NbClasses - 1, 8, X2);
435 }
436
437 if (swrite_Basic) {
438 if (JL_OK) {
439 printf ("\n-----------------------------------------------\n");
440 if (N == 1) {
441 printf ("Number of degrees of freedom : %4ld\n",
442 resJL->degFree);
443 printf ("Chi2 statistic for size of jumps :");
444 gofw_Writep2 (resJL->sVal2[gofw_Mean], resJL->pVal2[gofw_Mean]);
445 } else {
446 printf ("Test results for the size of jumps:\n");
447 gofw_WriteActiveTests0 (N, resJL->sVal2, resJL->pVal2);
448 swrite_Chi2SumTest (N, resJL);
449 }
450 if (swrite_Collectors)
451 statcoll_Write (resJL->sVal1, 5, 14, 4, 3);
452 }
453
454 printf ("\n-----------------------------------------------\n");
455 if (N == 1) {
456 printf ("Normal statistic for number of jumps :");
457 gofw_Writep2 (resJN->sVal2[gofw_Mean], resJN->pVal2[gofw_Mean]);
458 } else {
459 printf ("Test results for the number of jumps:\n");
460 gofw_WriteActiveTests0 (N, resJN->sVal2, resJN->pVal2);
461 swrite_NormalSumTest (N, resJN);
462 }
463 if (swrite_Collectors)
464 statcoll_Write (resJN->sVal1, 5, 14, 4, 3);
465
466 if (LC_OK) {
467 printf ("\n-----------------------------------------------\n");
468 printf ("Test results for the linear complexity:\n\n");
469 printf ("Number of degrees of freedom : %4ld\n",
470 resLC->degFree);
471 printf ("Chi2 statistic on the N replications :");
472 gofw_Writep2 (resLC->sVal2[gofw_Mean], resLC->pVal2[gofw_Mean]);
473 if (swrite_Classes)
474 gofs_WriteClasses (resLC->NbExp, Loca, tmin, tmax, NbClasses);
475 if (swrite_Counters)
476 tables_WriteTabL (resLC->Count, tmin, tmax, 5, 10,
477 "Linear Complexity: observed numbers");
478 }
479
480 printf ("\n\n");
481 swrite_Final (gen, Timer);
482 }
483
484 util_Free (Prob);
485 util_Free (Bits);
486 util_Free (Polyb);
487 util_Free (Polyc);
488 util_Free (PolycOld);
489 if (localRes)
490 scomp_DeleteRes (res);
491 chrono_Delete (Timer);
492 }
493
494
495 /*=========================================================================*/
496
WriteDataLZ(unif01_Gen * gen,char * Test,long N,int k,int r,int s)497 static void WriteDataLZ (
498 unif01_Gen *gen, /* generator */
499 char *Test, /* Test name */
500 long N, /* Number of replications */
501 int k, /* Sample size n = 2^k */
502 int r, /* r first bits of each random number dropped */
503 int s /* s bits of each random number used */
504 )
505 {
506 long n;
507 n = num_TwoExp[k];
508 swrite_Head (gen, Test, N, n, r);
509 printf (", s = %4d, k = %4d\n\n", s, k);
510 }
511
512
513 /*-------------------------------------------------------------------------*/
514
LZ78(unif01_Gen * gen,long n,int r,int s)515 static long LZ78 (unif01_Gen * gen, long n, int r, int s)
516 /*
517 * The parameters are the same as in scomp_LempelZiv. The trie contains
518 * a left (right) branch if a word with a 0 (1) bit after the prefix has
519 * been seen before. We descend one level in the trie with each bit until
520 * a leaf is met. Add a branch for a new word, and restart at root.
521 */
522 {
523 const unsigned long kMAX = 1UL << (s - 1);
524 unsigned long Y, k;
525 long i; /* Count the number of bits overall */
526 long W; /* Count the number of words */
527 lebool done = FALSE; /* Start a new word */
528 BitTrie_t *trie, *root;
529
530 W = i = 0;
531 trie = root = util_Malloc (sizeof (BitTrie_t));
532 trie->left = trie->right = NULL;
533 Y = unif01_StripB (gen, r, s);
534 k = kMAX;
535
536 while (i < n) {
537 /* Start a new word: match it as far as possible in the trie */
538 done = FALSE;
539 trie = root;
540 while (!done) {
541 if ((Y & k) == 0) { /* Bit 0 */
542 if (trie->left) {
543 /* We have seen it before: descend in branch */
544 trie = trie->left;
545 } else {
546 /* A leaf: this is a new word */
547 W++;
548 done = TRUE;
549 trie->left = util_Malloc (sizeof (BitTrie_t));
550 trie = trie->left;
551 trie->left = trie->right = NULL;
552 }
553
554 } else { /* Bit 1 */
555 if (trie->right) {
556 trie = trie->right;
557 } else {
558 W++;
559 done = TRUE;
560 trie->right = util_Malloc (sizeof (BitTrie_t));
561 trie = trie->right;
562 trie->left = trie->right = NULL;
563 }
564 }
565 i++;
566 if (i >= n) {
567 done = TRUE;
568 if ((trie->left != NULL) || (trie->right != NULL))
569 W++;
570 break;
571 }
572 k >>= 1;
573 if (k == 0) {
574 /* Have used the s bits in the number; generate a new number */
575 Y = unif01_StripB (gen, r, s);
576 k = kMAX;
577 }
578 }
579 }
580 DeleteBitTrie (root);
581 return W;
582 }
583
584
585 /*-------------------------------------------------------------------------*/
586
scomp_LempelZiv(unif01_Gen * gen,sres_Basic * res,long N,int t,int r,int s)587 void scomp_LempelZiv (unif01_Gen *gen, sres_Basic *res,
588 long N, int t, int r, int s)
589 {
590 long Seq, n;
591 double X;
592 /* const double lg_n = num_Log2 ((double) n); */
593 long W;
594 lebool localRes = FALSE;
595 chrono_Chrono *Timer;
596 char *TestName = "scomp_LempelZiv test";
597
598 Timer = chrono_Create ();
599 if (swrite_Basic)
600 WriteDataLZ (gen, TestName, N, t, r, s);
601 util_Assert (r + s <= 32, "scomp_LempelZiv: r + s > 32");
602 util_Assert (t <= 28, "scomp_LempelZiv: k > 28");
603 if (res == NULL) {
604 localRes = TRUE;
605 res = sres_CreateBasic ();
606 }
607 n = num_TwoExp[t];
608 sres_InitBasic (res, N, "scomp_LempelZiv");
609 statcoll_SetDesc (res->sVal1, "sVal1: a standard normal");
610
611 for (Seq = 1; Seq <= N; Seq++) {
612 W = LZ78 (gen, n, r, s);
613 /* X = (W - n / lg_n) / sqrt (0.266 * n / (lg_n * lg_n * lg_n)); */
614 X = (W - LZMu[t]) / LZSigma[t];
615 statcoll_AddObs (res->sVal1, X);
616 if (swrite_Counters) {
617 printf ("%12ld ", W);
618 if (Seq % 5 == 0)
619 printf ("\n");
620 if (Seq >= N)
621 printf ("\n\n");
622 }
623 }
624
625 gofw_ActiveTests2 (res->sVal1->V, res->pVal1->V, N, wdist_Normal,
626 (double *) NULL, res->sVal2, res->pVal2);
627 res->pVal1->NObs = N;
628 sres_GetNormalSumStat (res);
629
630 if (swrite_Collectors)
631 statcoll_Write (res->sVal1, 5, 12, 4, 3);
632
633 if (swrite_Basic) {
634 gofw_WriteActiveTests2 (N, res->sVal2, res->pVal2,
635 "Normal statistic :");
636 swrite_NormalSumTest (N, res);
637 swrite_Final (gen, Timer);
638 }
639 if (localRes)
640 sres_DeleteBasic (res);
641 chrono_Delete (Timer);
642 }
643