1 /* Chi-squared test for mpfr_nrandom
2 
3 Copyright 2011-2020 Free Software Foundation, Inc.
4 Contributed by Charles Karney <charles@karney.com>, SRI International.
5 
6 This file is part of the GNU MPFR Library.
7 
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12 
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16 License for more details.
17 
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22 
23 #include "mpfr-test.h"
24 
25 /* Return Phi(x) = erf(x / sqrt(2)) / 2, the cumulative probability function
26  * for the normal distribution.  We only take differences of this function so
27  * the offset doesn't matter; here Phi(0) = 0. */
28 static void
normal_cumulative(mpfr_ptr z,mpfr_ptr x,mpfr_rnd_t rnd)29 normal_cumulative (mpfr_ptr z, mpfr_ptr x, mpfr_rnd_t rnd)
30 {
31   mpfr_sqrt_ui (z, 2, rnd);
32   mpfr_div (z, x, z, rnd);
33   mpfr_erf (z, z, rnd);
34   mpfr_div_ui (z, z, 2, rnd);
35 }
36 
37 /* Given nu and chisqp, compute probability that chisq > chisqp.  This uses,
38  * A&S 26.4.16,
39  *
40  * Q(nu,chisqp) =
41  *     erfc( (3/2)*sqrt(nu) * ( cbrt(chisqp/nu) - 1 + 2/(9*nu) ) ) / 2
42  *
43  * which is valid for nu > 30.  This is the basis for the formula in Knuth,
44  * TAOCP, Vol 2, 3.3.1, Table 1.  It more accurate than the similar formula,
45  * DLMF 8.11.10. */
46 static void
chisq_prob(mpfr_ptr q,long nu,mpfr_ptr chisqp)47 chisq_prob (mpfr_ptr q, long nu, mpfr_ptr chisqp)
48 {
49   mpfr_t t;
50   mpfr_rnd_t rnd;
51 
52   rnd = MPFR_RNDN;  /* This uses an approx formula.  Might as well use RNDN. */
53   mpfr_init2 (t, mpfr_get_prec (q));
54 
55   mpfr_div_si (q, chisqp, nu, rnd); /* chisqp/nu */
56   mpfr_cbrt (q, q, rnd);            /* (chisqp/nu)^(1/3) */
57   mpfr_sub_ui (q, q, 1, rnd);       /* (chisqp/nu)^(1/3) - 1 */
58   mpfr_set_ui (t, 2, rnd);
59   mpfr_div_si (t, t, 9*nu, rnd); /* 2/(9*nu) */
60   mpfr_add (q, q, t, rnd);       /* (chisqp/nu)^(1/3) - 1 + 2/(9*nu) */
61   mpfr_sqrt_ui (t, nu, rnd);     /* sqrt(nu) */
62   mpfr_mul_d (t, t, 1.5, rnd);   /* (3/2)*sqrt(nu) */
63   mpfr_mul (q, q, t, rnd);       /* arg to erfc */
64   mpfr_erfc (q, q, rnd);         /* erfc(...) */
65   mpfr_div_ui (q, q, 2, rnd);    /* erfc(...)/2 */
66 
67   mpfr_clear (t);
68 }
69 
70 /* The continuous chi-squared test on with a set of bins of equal width.
71  *
72  * A single precision is picked for sampling and the chi-squared calculation.
73  * This should picked high enough so that binning in test doesn't need to be
74  * accurately aligned with possible values of the deviates.  Also we need the
75  * precision big enough that chi-squared calculation itself is reliable.
76  *
77  * There's no particular benefit is testing with at very higher precisions;
78  * because of the way tnrandom samples, this just adds additional barely
79  * significant random bits to the deviates.  So this chi-squared test with
80  * continuous equal width bins isn't a good tool for finding problems here.
81  *
82  * The testing of low precision normal deviates is done by
83  * test_nrandom_chisq_disc. */
84 static double
test_nrandom_chisq_cont(long num,mpfr_prec_t prec,int nu,double xmin,double xmax,int verbose)85 test_nrandom_chisq_cont (long num, mpfr_prec_t prec, int nu,
86                          double xmin, double xmax, int verbose)
87 {
88   mpfr_t x, a, b, dx, z, pa, pb, ps, t;
89   long *counts;
90   int i, inexact;
91   long k;
92   mpfr_rnd_t rnd, rndd;
93   double Q, chisq;
94 
95   rnd = MPFR_RNDN;              /* For chi-squared calculation */
96   rndd = MPFR_RNDD;             /* For sampling and figuring the bins */
97   mpfr_inits2 (prec, x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
98 
99   counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
100   for (i = 0; i <= nu; i++)
101     counts[i] = 0;
102 
103   /* a and b are bounds of nu equally spaced bins.  Set dx = (b-a)/nu */
104   mpfr_set_d (a, xmin, rnd);
105   mpfr_set_d (b, xmax, rnd);
106 
107   mpfr_sub (dx, b, a, rnd);
108   mpfr_div_si (dx, dx, nu, rnd);
109 
110   for (k = 0; k < num; ++k)
111     {
112       inexact = mpfr_nrandom (x, RANDS, rndd);
113       if (inexact == 0)
114         {
115           /* one call in the loop pretended to return an exact number! */
116           printf ("Error: mpfr_nrandom() returns a zero ternary value.\n");
117           exit (1);
118         }
119       mpfr_sub (x, x, a, rndd);
120       mpfr_div (x, x, dx, rndd);
121       i = mpfr_get_si (x, rndd);
122       ++counts[i >= 0 && i < nu ? i : nu];
123     }
124 
125   mpfr_set (x, a, rnd);
126   normal_cumulative (pa, x, rnd);
127   mpfr_add_ui (ps, pa, 1, rnd);
128   mpfr_set_zero (t, 1);
129   for (i = 0; i <= nu; ++i)
130     {
131       if (i < nu)
132         {
133           mpfr_add (x, x, dx, rnd);
134           normal_cumulative (pb, x, rnd);
135           mpfr_sub (pa, pb, pa, rnd); /* prob for this bin */
136         }
137       else
138         mpfr_sub (pa, ps, pa, rnd); /* prob for last bin, i = nu */
139 
140       /* Compute z = counts[i] - num * p; t += z * z / (num * p) */
141       mpfr_mul_ui (pa, pa, num, rnd);
142       mpfr_ui_sub (z, counts[i], pa, rnd);
143       mpfr_sqr (z, z, rnd);
144       mpfr_div (z, z, pa, rnd);
145       mpfr_add (t, t, z, rnd);
146       mpfr_swap (pa, pb);       /* i.e., pa = pb */
147     }
148 
149   chisq = mpfr_get_d (t, rnd);
150   chisq_prob (t, nu, t);
151   Q = mpfr_get_d (t, rnd);
152   if (verbose)
153     {
154       printf ("num = %ld, equal bins in [%.2f, %.2f], nu = %d: chisq = %.2f\n",
155               num, xmin, xmax, nu, chisq);
156       if (Q < 0.05)
157         printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
158     }
159 
160   tests_free (counts, (nu + 1) * sizeof (long));
161   mpfr_clears (x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
162   return Q;
163 }
164 
165 /* Return a sequential number for a positive low-precision x.  x is altered by
166  * this function.  low precision means prec = 2, 3, or 4.  High values of
167  * precision will result in integer overflow. */
168 static long
sequential(mpfr_ptr x)169 sequential (mpfr_ptr x)
170 {
171   long expt, prec;
172 
173   prec = mpfr_get_prec (x);
174   expt =  mpfr_get_exp (x);
175   mpfr_mul_2si (x, x, prec - expt, MPFR_RNDN);
176 
177   return expt * (1 << (prec - 1)) + mpfr_get_si (x, MPFR_RNDN);
178 }
179 
180 /* The chi-squared test on low precision normal deviates.  wprec is the working
181  * precision for the chi-squared calculation.  prec is the precision for the
182  * sampling; choose this in [2,5].  The bins consist of all the possible
183  * deviate values in the range [xmin, xmax] coupled with the value of inexact.
184  * Thus with prec = 2, the bins are
185  *   ...
186  *   (7/16, 1/2)  x = 1/2, inexact = +1
187  *   (1/2 , 5/8)  x = 1/2, inexact = -1
188  *   (5/8 , 3/4)  x = 3/4, inexact = +1
189  *   (3/4 , 7/8)  x = 3/4, inexact = -1
190  *   (7/8 , 1  )  x = 1  , inexact = +1
191  *   (1   , 5/4)  x = 1  , inexact = -1
192  *   (5/4 , 3/2)  x = 3/2, inexact = +1
193  *   (3/2 , 7/4)  x = 3/2, inexact = -1
194  *   ...
195  * In addition, two bins are allocated for [0,xmin) and (xmax,inf).
196  *
197  * This test is applied to the absolute values of the deviates.  The sign is
198  * tested by test_nrandom_chisq_cont.  In any case, the way the sign is
199  * assigned in mpfr_nrandom is trivial.  In addition, the sampling is with
200  * MPFR_RNDN.  This is the rounding mode which elicits the most information.
201  * trandom_deviate includes checks on the consistency of the results extracted
202  * from a random_deviate with other rounding modes.  */
203 static double
test_nrandom_chisq_disc(long num,mpfr_prec_t wprec,int prec,double xmin,double xmax,int verbose)204 test_nrandom_chisq_disc (long num, mpfr_prec_t wprec, int prec,
205                          double xmin, double xmax, int verbose)
206 {
207   mpfr_t x, v, pa, pb, z, t;
208   mpfr_rnd_t rnd;
209   int i, inexact, nu;
210   long *counts;
211   long k, seqmin, seqmax, seq;
212   double Q, chisq;
213 
214   rnd = MPFR_RNDN;
215   mpfr_init2 (x, prec);
216   mpfr_init2 (v, prec+1);
217   mpfr_inits2 (wprec, pa, pb, z, t, (mpfr_ptr) 0);
218 
219   mpfr_set_d (x, xmin, rnd);
220   xmin = mpfr_get_d (x, rnd);
221   mpfr_set (v, x, rnd);
222   seqmin = sequential (x);
223   mpfr_set_d (x, xmax, rnd);
224   xmax = mpfr_get_d (x, rnd);
225   seqmax = sequential (x);
226 
227   /* Two bins for each sequential number (for inexact = +/- 1), plus 1 for u <
228    * umin and 1 for u > umax, minus 1 for degrees of freedom */
229   nu = 2 * (seqmax - seqmin + 1) + 2 - 1;
230   counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
231   for (i = 0; i <= nu; i++)
232     counts[i] = 0;
233 
234   for (k = 0; k < num; ++k)
235     {
236       inexact = mpfr_nrandom (x, RANDS, rnd);
237       if (mpfr_signbit (x))
238         {
239           inexact = -inexact;
240           mpfr_setsign (x, x, 0, rnd);
241         }
242       /* Don't call sequential with small args to avoid undefined behavior with
243        * zero and possibility of overflow. */
244       seq = mpfr_greaterequal_p (x, v) ? sequential (x) : seqmin - 1;
245       ++counts[seq < seqmin ? 0 :
246                seq <= seqmax ? 2 * (seq - seqmin) + 1 + (inexact > 0 ? 0 : 1) :
247                nu];
248     }
249 
250   mpfr_set_zero (v, 1);
251   normal_cumulative (pa, v, rnd);
252   /* Cycle through all the bin boundaries using mpfr_nextabove at precision
253    * prec + 1 starting at mpfr_nextbelow (xmin) */
254   mpfr_set_d (x, xmin, rnd);
255   mpfr_set (v, x, rnd);
256   mpfr_nextbelow (v);
257   mpfr_nextbelow (v);
258   mpfr_set_zero (t, 1);
259   for (i = 0; i <= nu; ++i)
260     {
261       if (i < nu)
262         mpfr_nextabove (v);
263       else
264         mpfr_set_inf (v, 1);
265       normal_cumulative (pb, v, rnd);
266       mpfr_sub (pa, pb, pa, rnd);
267 
268       /* Compute z = counts[i] - num * p; t += z * z / (num * p).  2*num to
269        * account for the fact the p needs to be doubled since we are
270        * considering only the absolute value of the deviates. */
271       mpfr_mul_ui (pa, pa, 2*num, rnd);
272       mpfr_ui_sub (z, counts[i], pa, rnd);
273       mpfr_sqr (z, z, rnd);
274       mpfr_div (z, z, pa, rnd);
275       mpfr_add (t, t, z, rnd);
276       mpfr_swap (pa, pb);       /* i.e., pa = pb */
277     }
278 
279   chisq = mpfr_get_d (t, rnd);
280   chisq_prob (t, nu, t);
281   Q = mpfr_get_d (t, rnd);
282   if (verbose)
283     {
284       printf ("num = %ld, discrete (prec = %d) bins in [%.6f, %.2f], "
285               "nu = %d: chisq = %.2f\n", num, prec, xmin, xmax, nu, chisq);
286       if (Q < 0.05)
287         printf ("    WARNING: probability (less than 5%%) = %.2e\n", Q);
288     }
289 
290   tests_free (counts, (nu + 1) * sizeof (long));
291   mpfr_clears (x, v, pa, pb, z, t, (mpfr_ptr) 0);
292   return Q;
293 }
294 
295 static void
run_chisq(double (* f)(long,mpfr_prec_t,int,double,double,int),long num,mpfr_prec_t prec,int bin,double xmin,double xmax,int verbose)296 run_chisq (double (*f)(long, mpfr_prec_t, int, double, double, int),
297            long num, mpfr_prec_t prec, int bin,
298            double xmin, double xmax, int verbose)
299 {
300   double Q, Qcum, Qbad, Qthresh;
301   int i;
302 
303   Qcum = 1;
304   Qbad = 1.e-9;
305   Qthresh = 0.01;
306   for (i = 0; i < 3; ++i)
307     {
308       Q = (*f)(num, prec, bin, xmin, xmax, verbose);
309       Qcum *= Q;
310       if (Q > Qthresh)
311         return;
312       else if (Q < Qbad)
313         {
314           printf ("Error: mpfr_nrandom chi-squared failure "
315                   "(prob = %.2e)\n", Q);
316           exit (1);
317         }
318       num *= 10;
319       Qthresh /= 10;
320     }
321   if (Qcum < Qbad)              /* Presumably this is true */
322     {
323       printf ("Error: mpfr_nrandom combined chi-squared failure "
324               "(prob = %.2e)\n", Qcum);
325       exit (1);
326     }
327 }
328 
329 int
main(int argc,char * argv[])330 main (int argc, char *argv[])
331 {
332   long nbtests;
333   int verbose;
334 
335   tests_start_mpfr ();
336 
337   verbose = 0;
338   nbtests = 100000;
339   if (argc > 1)
340     {
341       long a = atol (argv[1]);
342       verbose = 1;
343       if (a != 0)
344         nbtests = a;
345     }
346 
347   run_chisq (test_nrandom_chisq_cont, nbtests, 64, 60, -4, 4, verbose);
348   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 2, 0.0005, 3, verbose);
349   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 3, 0.002, 4, verbose);
350   run_chisq (test_nrandom_chisq_disc, nbtests, 64, 4, 0.004, 4, verbose);
351 
352   tests_end_mpfr ();
353   return 0;
354 }
355