1 /* Chi-squared test for mpfr_nrandom
2
3 Copyright 2011-2020 Free Software Foundation, Inc.
4 Contributed by Charles Karney <charles@karney.com>, SRI International.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #include "mpfr-test.h"
24
25 /* Return Phi(x) = erf(x / sqrt(2)) / 2, the cumulative probability function
26 * for the normal distribution. We only take differences of this function so
27 * the offset doesn't matter; here Phi(0) = 0. */
28 static void
normal_cumulative(mpfr_ptr z,mpfr_ptr x,mpfr_rnd_t rnd)29 normal_cumulative (mpfr_ptr z, mpfr_ptr x, mpfr_rnd_t rnd)
30 {
31 mpfr_sqrt_ui (z, 2, rnd);
32 mpfr_div (z, x, z, rnd);
33 mpfr_erf (z, z, rnd);
34 mpfr_div_ui (z, z, 2, rnd);
35 }
36
37 /* Given nu and chisqp, compute probability that chisq > chisqp. This uses,
38 * A&S 26.4.16,
39 *
40 * Q(nu,chisqp) =
41 * erfc( (3/2)*sqrt(nu) * ( cbrt(chisqp/nu) - 1 + 2/(9*nu) ) ) / 2
42 *
43 * which is valid for nu > 30. This is the basis for the formula in Knuth,
44 * TAOCP, Vol 2, 3.3.1, Table 1. It more accurate than the similar formula,
45 * DLMF 8.11.10. */
46 static void
chisq_prob(mpfr_ptr q,long nu,mpfr_ptr chisqp)47 chisq_prob (mpfr_ptr q, long nu, mpfr_ptr chisqp)
48 {
49 mpfr_t t;
50 mpfr_rnd_t rnd;
51
52 rnd = MPFR_RNDN; /* This uses an approx formula. Might as well use RNDN. */
53 mpfr_init2 (t, mpfr_get_prec (q));
54
55 mpfr_div_si (q, chisqp, nu, rnd); /* chisqp/nu */
56 mpfr_cbrt (q, q, rnd); /* (chisqp/nu)^(1/3) */
57 mpfr_sub_ui (q, q, 1, rnd); /* (chisqp/nu)^(1/3) - 1 */
58 mpfr_set_ui (t, 2, rnd);
59 mpfr_div_si (t, t, 9*nu, rnd); /* 2/(9*nu) */
60 mpfr_add (q, q, t, rnd); /* (chisqp/nu)^(1/3) - 1 + 2/(9*nu) */
61 mpfr_sqrt_ui (t, nu, rnd); /* sqrt(nu) */
62 mpfr_mul_d (t, t, 1.5, rnd); /* (3/2)*sqrt(nu) */
63 mpfr_mul (q, q, t, rnd); /* arg to erfc */
64 mpfr_erfc (q, q, rnd); /* erfc(...) */
65 mpfr_div_ui (q, q, 2, rnd); /* erfc(...)/2 */
66
67 mpfr_clear (t);
68 }
69
70 /* The continuous chi-squared test on with a set of bins of equal width.
71 *
72 * A single precision is picked for sampling and the chi-squared calculation.
73 * This should picked high enough so that binning in test doesn't need to be
74 * accurately aligned with possible values of the deviates. Also we need the
75 * precision big enough that chi-squared calculation itself is reliable.
76 *
77 * There's no particular benefit is testing with at very higher precisions;
78 * because of the way tnrandom samples, this just adds additional barely
79 * significant random bits to the deviates. So this chi-squared test with
80 * continuous equal width bins isn't a good tool for finding problems here.
81 *
82 * The testing of low precision normal deviates is done by
83 * test_nrandom_chisq_disc. */
84 static double
test_nrandom_chisq_cont(long num,mpfr_prec_t prec,int nu,double xmin,double xmax,int verbose)85 test_nrandom_chisq_cont (long num, mpfr_prec_t prec, int nu,
86 double xmin, double xmax, int verbose)
87 {
88 mpfr_t x, a, b, dx, z, pa, pb, ps, t;
89 long *counts;
90 int i, inexact;
91 long k;
92 mpfr_rnd_t rnd, rndd;
93 double Q, chisq;
94
95 rnd = MPFR_RNDN; /* For chi-squared calculation */
96 rndd = MPFR_RNDD; /* For sampling and figuring the bins */
97 mpfr_inits2 (prec, x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
98
99 counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
100 for (i = 0; i <= nu; i++)
101 counts[i] = 0;
102
103 /* a and b are bounds of nu equally spaced bins. Set dx = (b-a)/nu */
104 mpfr_set_d (a, xmin, rnd);
105 mpfr_set_d (b, xmax, rnd);
106
107 mpfr_sub (dx, b, a, rnd);
108 mpfr_div_si (dx, dx, nu, rnd);
109
110 for (k = 0; k < num; ++k)
111 {
112 inexact = mpfr_nrandom (x, RANDS, rndd);
113 if (inexact == 0)
114 {
115 /* one call in the loop pretended to return an exact number! */
116 printf ("Error: mpfr_nrandom() returns a zero ternary value.\n");
117 exit (1);
118 }
119 mpfr_sub (x, x, a, rndd);
120 mpfr_div (x, x, dx, rndd);
121 i = mpfr_get_si (x, rndd);
122 ++counts[i >= 0 && i < nu ? i : nu];
123 }
124
125 mpfr_set (x, a, rnd);
126 normal_cumulative (pa, x, rnd);
127 mpfr_add_ui (ps, pa, 1, rnd);
128 mpfr_set_zero (t, 1);
129 for (i = 0; i <= nu; ++i)
130 {
131 if (i < nu)
132 {
133 mpfr_add (x, x, dx, rnd);
134 normal_cumulative (pb, x, rnd);
135 mpfr_sub (pa, pb, pa, rnd); /* prob for this bin */
136 }
137 else
138 mpfr_sub (pa, ps, pa, rnd); /* prob for last bin, i = nu */
139
140 /* Compute z = counts[i] - num * p; t += z * z / (num * p) */
141 mpfr_mul_ui (pa, pa, num, rnd);
142 mpfr_ui_sub (z, counts[i], pa, rnd);
143 mpfr_sqr (z, z, rnd);
144 mpfr_div (z, z, pa, rnd);
145 mpfr_add (t, t, z, rnd);
146 mpfr_swap (pa, pb); /* i.e., pa = pb */
147 }
148
149 chisq = mpfr_get_d (t, rnd);
150 chisq_prob (t, nu, t);
151 Q = mpfr_get_d (t, rnd);
152 if (verbose)
153 {
154 printf ("num = %ld, equal bins in [%.2f, %.2f], nu = %d: chisq = %.2f\n",
155 num, xmin, xmax, nu, chisq);
156 if (Q < 0.05)
157 printf (" WARNING: probability (less than 5%%) = %.2e\n", Q);
158 }
159
160 tests_free (counts, (nu + 1) * sizeof (long));
161 mpfr_clears (x, a, b, dx, z, pa, pb, ps, t, (mpfr_ptr) 0);
162 return Q;
163 }
164
165 /* Return a sequential number for a positive low-precision x. x is altered by
166 * this function. low precision means prec = 2, 3, or 4. High values of
167 * precision will result in integer overflow. */
168 static long
sequential(mpfr_ptr x)169 sequential (mpfr_ptr x)
170 {
171 long expt, prec;
172
173 prec = mpfr_get_prec (x);
174 expt = mpfr_get_exp (x);
175 mpfr_mul_2si (x, x, prec - expt, MPFR_RNDN);
176
177 return expt * (1 << (prec - 1)) + mpfr_get_si (x, MPFR_RNDN);
178 }
179
180 /* The chi-squared test on low precision normal deviates. wprec is the working
181 * precision for the chi-squared calculation. prec is the precision for the
182 * sampling; choose this in [2,5]. The bins consist of all the possible
183 * deviate values in the range [xmin, xmax] coupled with the value of inexact.
184 * Thus with prec = 2, the bins are
185 * ...
186 * (7/16, 1/2) x = 1/2, inexact = +1
187 * (1/2 , 5/8) x = 1/2, inexact = -1
188 * (5/8 , 3/4) x = 3/4, inexact = +1
189 * (3/4 , 7/8) x = 3/4, inexact = -1
190 * (7/8 , 1 ) x = 1 , inexact = +1
191 * (1 , 5/4) x = 1 , inexact = -1
192 * (5/4 , 3/2) x = 3/2, inexact = +1
193 * (3/2 , 7/4) x = 3/2, inexact = -1
194 * ...
195 * In addition, two bins are allocated for [0,xmin) and (xmax,inf).
196 *
197 * This test is applied to the absolute values of the deviates. The sign is
198 * tested by test_nrandom_chisq_cont. In any case, the way the sign is
199 * assigned in mpfr_nrandom is trivial. In addition, the sampling is with
200 * MPFR_RNDN. This is the rounding mode which elicits the most information.
201 * trandom_deviate includes checks on the consistency of the results extracted
202 * from a random_deviate with other rounding modes. */
203 static double
test_nrandom_chisq_disc(long num,mpfr_prec_t wprec,int prec,double xmin,double xmax,int verbose)204 test_nrandom_chisq_disc (long num, mpfr_prec_t wprec, int prec,
205 double xmin, double xmax, int verbose)
206 {
207 mpfr_t x, v, pa, pb, z, t;
208 mpfr_rnd_t rnd;
209 int i, inexact, nu;
210 long *counts;
211 long k, seqmin, seqmax, seq;
212 double Q, chisq;
213
214 rnd = MPFR_RNDN;
215 mpfr_init2 (x, prec);
216 mpfr_init2 (v, prec+1);
217 mpfr_inits2 (wprec, pa, pb, z, t, (mpfr_ptr) 0);
218
219 mpfr_set_d (x, xmin, rnd);
220 xmin = mpfr_get_d (x, rnd);
221 mpfr_set (v, x, rnd);
222 seqmin = sequential (x);
223 mpfr_set_d (x, xmax, rnd);
224 xmax = mpfr_get_d (x, rnd);
225 seqmax = sequential (x);
226
227 /* Two bins for each sequential number (for inexact = +/- 1), plus 1 for u <
228 * umin and 1 for u > umax, minus 1 for degrees of freedom */
229 nu = 2 * (seqmax - seqmin + 1) + 2 - 1;
230 counts = (long *) tests_allocate ((nu + 1) * sizeof (long));
231 for (i = 0; i <= nu; i++)
232 counts[i] = 0;
233
234 for (k = 0; k < num; ++k)
235 {
236 inexact = mpfr_nrandom (x, RANDS, rnd);
237 if (mpfr_signbit (x))
238 {
239 inexact = -inexact;
240 mpfr_setsign (x, x, 0, rnd);
241 }
242 /* Don't call sequential with small args to avoid undefined behavior with
243 * zero and possibility of overflow. */
244 seq = mpfr_greaterequal_p (x, v) ? sequential (x) : seqmin - 1;
245 ++counts[seq < seqmin ? 0 :
246 seq <= seqmax ? 2 * (seq - seqmin) + 1 + (inexact > 0 ? 0 : 1) :
247 nu];
248 }
249
250 mpfr_set_zero (v, 1);
251 normal_cumulative (pa, v, rnd);
252 /* Cycle through all the bin boundaries using mpfr_nextabove at precision
253 * prec + 1 starting at mpfr_nextbelow (xmin) */
254 mpfr_set_d (x, xmin, rnd);
255 mpfr_set (v, x, rnd);
256 mpfr_nextbelow (v);
257 mpfr_nextbelow (v);
258 mpfr_set_zero (t, 1);
259 for (i = 0; i <= nu; ++i)
260 {
261 if (i < nu)
262 mpfr_nextabove (v);
263 else
264 mpfr_set_inf (v, 1);
265 normal_cumulative (pb, v, rnd);
266 mpfr_sub (pa, pb, pa, rnd);
267
268 /* Compute z = counts[i] - num * p; t += z * z / (num * p). 2*num to
269 * account for the fact the p needs to be doubled since we are
270 * considering only the absolute value of the deviates. */
271 mpfr_mul_ui (pa, pa, 2*num, rnd);
272 mpfr_ui_sub (z, counts[i], pa, rnd);
273 mpfr_sqr (z, z, rnd);
274 mpfr_div (z, z, pa, rnd);
275 mpfr_add (t, t, z, rnd);
276 mpfr_swap (pa, pb); /* i.e., pa = pb */
277 }
278
279 chisq = mpfr_get_d (t, rnd);
280 chisq_prob (t, nu, t);
281 Q = mpfr_get_d (t, rnd);
282 if (verbose)
283 {
284 printf ("num = %ld, discrete (prec = %d) bins in [%.6f, %.2f], "
285 "nu = %d: chisq = %.2f\n", num, prec, xmin, xmax, nu, chisq);
286 if (Q < 0.05)
287 printf (" WARNING: probability (less than 5%%) = %.2e\n", Q);
288 }
289
290 tests_free (counts, (nu + 1) * sizeof (long));
291 mpfr_clears (x, v, pa, pb, z, t, (mpfr_ptr) 0);
292 return Q;
293 }
294
295 static void
run_chisq(double (* f)(long,mpfr_prec_t,int,double,double,int),long num,mpfr_prec_t prec,int bin,double xmin,double xmax,int verbose)296 run_chisq (double (*f)(long, mpfr_prec_t, int, double, double, int),
297 long num, mpfr_prec_t prec, int bin,
298 double xmin, double xmax, int verbose)
299 {
300 double Q, Qcum, Qbad, Qthresh;
301 int i;
302
303 Qcum = 1;
304 Qbad = 1.e-9;
305 Qthresh = 0.01;
306 for (i = 0; i < 3; ++i)
307 {
308 Q = (*f)(num, prec, bin, xmin, xmax, verbose);
309 Qcum *= Q;
310 if (Q > Qthresh)
311 return;
312 else if (Q < Qbad)
313 {
314 printf ("Error: mpfr_nrandom chi-squared failure "
315 "(prob = %.2e)\n", Q);
316 exit (1);
317 }
318 num *= 10;
319 Qthresh /= 10;
320 }
321 if (Qcum < Qbad) /* Presumably this is true */
322 {
323 printf ("Error: mpfr_nrandom combined chi-squared failure "
324 "(prob = %.2e)\n", Qcum);
325 exit (1);
326 }
327 }
328
329 int
main(int argc,char * argv[])330 main (int argc, char *argv[])
331 {
332 long nbtests;
333 int verbose;
334
335 tests_start_mpfr ();
336
337 verbose = 0;
338 nbtests = 100000;
339 if (argc > 1)
340 {
341 long a = atol (argv[1]);
342 verbose = 1;
343 if (a != 0)
344 nbtests = a;
345 }
346
347 run_chisq (test_nrandom_chisq_cont, nbtests, 64, 60, -4, 4, verbose);
348 run_chisq (test_nrandom_chisq_disc, nbtests, 64, 2, 0.0005, 3, verbose);
349 run_chisq (test_nrandom_chisq_disc, nbtests, 64, 3, 0.002, 4, verbose);
350 run_chisq (test_nrandom_chisq_disc, nbtests, 64, 4, 0.004, 4, verbose);
351
352 tests_end_mpfr ();
353 return 0;
354 }
355