1// $Id: gaussTables.src,v 1.3 2003/08/13 20:00:12 garren Exp $
2// -*- C++ -*-
3//
4// gaussTables.src
5//
6// This program creates the data tables that RandGauss will use to
7// interpolate from the flat random R to the gaussian unit normal G.
8// The tables are produced in files gaussTables.cdat.  The data is
9// organized into values and derivatives (for a cubic spline interpolation)
10// at 5 ranges of values of R:
11//	by 2.0E-13 up to 4.0E-11
12//	by 4.0E-11 up to 1.0E-8
13//	by 1.0E-8  up to 2.0E-6
14//	by 2.0E-6  up to 5.0E-4
15//	by 5.0E-4  up to .5
16//
17// These are included from RandGauss.cc in the fire() routine, for example,
18//	static const double gaussTables[] = {
19//    include "gaussTables.cdat"
20//      }
21//
22// At the end of this file is a lengthy comment describing just what is going
23// on mathematically.
24//
25// main         - Sets up TableInstructions then opens the output file and
26//		  lets writeTabels do its thing.
27// writeTables 	- Driver to create and write the gaussTables.cdat file by
28//		  calling tabulateCDFvsX and then many calls to interpolate.
29// tabulateCDFvsX - Integrate the pdf to create a table of cumulative density
30//		    function (CDF) vs X with uniform spacing in X.
31// interpolate  - Given the CDF values produced by tabulateCDFvsX, find X for
32//		  a specified value of CDF (not necessarily at a node).
33// pdf          - The gaussian distribution probability density function.
34// approxErrInt - Does an approximation of the error integral.
35//
36// This file need be compiled only once at one location; the gaussTables.cdat
37// generated is completely portable.  Therefore, one time only, gaussTables.src
38// has been renamed gaussTables.cc, and compiled and run to create
39// gaussTables.cdat.  The source gaussTables.src is provided with CLHEP as a
40// mode of concrete documentation of how those numers were computed.
41//
42// This file is stylistically not suitable for distribution as a reusable
43// module.
44//
45// 9/21/99   mf	Created, with traces for seeing intermediate results.
46// 9/24/99   mf Traces removed, the results being valid.  The file
47//		gaussTables.cc-trace still contains those traces.
48// 10/25/99  mf	Having been used (from test area) to create gaussTables.cdat,
49//		gaussTables.cc was renamed .src and moved to Random area.
50// --------------------------------------------------------------
51
52#include "CLHEP/Random/defs.h"
53#include "CLHEP/Units/PhysicalConstants.h"
54#include <iostream>
55#include <fstream>
56#include <iomanip>
57
58#define IOS_OK
59#ifdef  IOS_OK
60#include <ios.h>
61#endif
62
63using std::cout;
64
65class TableInstructions {
66public:
67  double startingX;		// Value of first X in table of CDF vs X
68				//
69  double intervalX;		// (Uniform) spacing of X's in table of CDF
70				//
71  int    NCDFvsX;		// Number of entries in table of CDF vs X
72				//
73  int	 NnextXmin;		// Number of X point matching first X point
74				//   in the next table of CDF vs X
75  double startingCDF;		// Desired first CDF point to fill in table of
76				//    X vs CDF value
77  double intervalCDF;		// (Uniform) spacing in table of X vs CDF
78				//
79  int    NCDF;			// Number of entries in table of X vs CDF
80};
81
82double pdf (double x);
83
84void writeTables ( std::ostream & output, int nTables,
85			const TableInstructions tables[] );
86
87void tabulateCDFvsX (double CDFbelow,
88		     double startingX,
89		     double intervalX,
90		     int N,
91		     double pdf(double x),
92		     double* CDFTable,
93		     int NnextXmin,
94		     double & nextCDFbelow );
95
96double interpolate ( double yStart, double Yspacing, int N, double* xTable,
97			 double pdf(double xx), double x, int fi, int& newfi );
98
99double approxErrInt (double v);
100
101
102int main() {
103
104  TableInstructions tables[6];
105
106  // Set up for computing each table:
107  //
108  // For each table, we will have about 10000 integration steps
109  // in the actual range, and more outside for safety.
110
111  const double Xstep = .0002;
112  const int    Xints =  500;	// number of steps in an interval of .1
113				// 5000 points per unit X is found to give
114				// the best accuracy.
115
116  //	by 2.0E-13 up to 4.0E-11	200 R values
117
118  tables[0].startingX   = -7.5;		// errInt (-7.5) ~ 3.0E-14
119  tables[0].intervalX   = Xstep;
120  tables[0].NCDFvsX     = 12*Xints;	// errInt (-6.3) ~ 1.5E-10
121  tables[0].NnextXmin   =  9*Xints;	// to place it at -6.6
122  tables[0].startingCDF = 2.0E-13;
123  tables[0].intervalCDF = 2.0E-13;
124  tables[0].NCDF        = 200;
125
126  //	by 4.0E-11 up to 1.0E-8		250 R values
127
128  tables[1].startingX   = -6.6;		// errInt (-6.6) ~ 2.1E-11
129  tables[1].intervalX   = Xstep;
130  tables[1].NCDFvsX     = 12*Xints;	// errInt (-5.4) ~ 3.4E-8
131  tables[1].NnextXmin   =  9*Xints;	// to place it at -5.7
132  tables[1].startingCDF = 4.0E-11;
133  tables[1].intervalCDF = 4.0E-11;
134  tables[1].NCDF        = 250;
135
136  //	by 1.0E-8  up to 2.0E-6		200 R values
137
138  tables[2].startingX   = -5.7;		// errInt (-5.7) ~ 6.2E-9
139  tables[2].intervalX   = Xstep;
140  tables[2].NCDFvsX     = 12*Xints;	// errInt (-4.5) ~ 4.0E-6
141  tables[2].NnextXmin   =  9*Xints;	// to place it at -4.8
142  tables[2].startingCDF = 1.0E-8;
143  tables[2].intervalCDF = 1.0E-8;
144  tables[2].NCDF        = 200;
145
146  //	by 2.0E-6  up to 5.0E-4		 250 R values
147
148  tables[3].startingX   = -4.8;		// errInt (-4.8) ~ 8.3E-7
149  tables[3].intervalX   = Xstep;
150  tables[3].NCDFvsX     = 16*Xints;	// errInt (-3.2) ~ 7.4E-4
151  tables[3].NnextXmin   = 12*Xints;	// to place it at -3.6
152  tables[3].startingCDF = 2.0E-6;
153  tables[3].intervalCDF = 2.0E-6;
154  tables[3].NCDF        = 250;
155
156  //	by 5.0E-4  up to .5		1000 R values
157
158  tables[4].startingX   = -3.6;		// errInt (-3.6) ~ 1.7E-4
159  tables[4].intervalX   = Xstep;
160  tables[4].NCDFvsX     = 37*Xints;	// errInt (+0.1) > .5
161  tables[4].NnextXmin   = 0;		// arbitrary, since value won't be used
162  tables[4].startingCDF = 5.0E-4;
163  tables[4].intervalCDF = 5.0E-4;
164  tables[4].NCDF        = 1000;
165
166  //    by 5.0E-4  up to .5 (reverse)	1000 R values
167
168  tables[5].startingX   = 0.0;
169  tables[5].intervalX   = -Xstep;
170  tables[5].NCDFvsX     = 36*Xints;	// errInt (-3.6) ~ 1.7E-4
171  tables[5].NnextXmin   = 0;		// arbitrary, since value won't be used
172  tables[5].startingCDF = 5.0E-4;
173  tables[5].intervalCDF = 5.0E-4;
174  tables[5].NCDF        = 1000;
175
176  // Open the output file
177
178  std::ofstream output ( "gaussTables.cdat", std::ios::out );
179
180  // Write the tables
181
182  writeTables ( output, 5, tables );
183
184  cout << " Tables completed \n";
185
186  return 0;
187}
188
189double pdf (double x) {
190  return exp(-x*x/2.0)/sqrt(CLHEP::twopi);
191}
192
193
194void writeTables ( std::ostream & output, int nTables,
195			const TableInstructions tables[] ) {
196  // Write nTables tables out to output, guided by the tables[] instructions.
197
198  double nextCDFbelow = 0;
199
200  for ( int nt = 0; nt < nTables; nt++ ) {
201
202    TableInstructions insts = tables[nt];
203
204    // **** First, tabulate the integrated cdf - which will be matched to r -
205    //	    vs X (the limit of integration) which will be used as the deviate.
206
207    double CDFbelow;
208    if (nt == 0) {
209      CDFbelow = approxErrInt ( insts.startingX );
210    } else {
211      CDFbelow = nextCDFbelow;
212      cout << "approxErrInt(" << insts.startingX << ") = "
213	   << approxErrInt(insts.startingX) << "   CDFbelow = "
214	   << CDFbelow << "\n";
215    }
216
217    int NCDFvsX = insts.NCDFvsX;
218    double * CDFtable = new double [NCDFvsX];
219    tabulateCDFvsX (CDFbelow, insts.startingX, insts.intervalX,
220		NCDFvsX, pdf, CDFtable, insts.NnextXmin, nextCDFbelow );
221
222    // CDFtable now contains NCDFvsX points, each representing the CDF
223    // at a point X in a uniformly spaced set of X's.  Also, nextCDFbelow
224    // now contains the CDF at the desired point for the next interval.
225
226    // **** From this table of CDF values, fill in the values of inverse CDF
227    //      at each of the specified r points, by interpolation.
228
229	// Two things could go wrong here:
230	// 1- The r points might lie outside the range of f(r) given in the
231	//    instructions).  In that case, we will report it and punt.
232	// 2- The estimated error of interpolation may be unacceptably large.
233	//    In that case, we could go back and halve the spacing in the table.
234 	//    However, we have computed in advance the necessary spacing, so
235	//    we this error will always be acceptably small.
236
237    if ( (insts.startingCDF + (insts.NCDF-1)*insts.intervalCDF) >
238						CDFtable [NCDFvsX-1] ) {
239	cout << "Problem in table " << nt << ": r outside range tabulated\n";
240	exit(1);
241    }
242
243    double * Xtable = new double [insts.NCDF];
244
245    int fi = 0;
246    int newfi;
247
248    int i;
249    double r;
250    double f;
251    for ( i = 0; i < insts.NCDF; i++ ) {
252      r = insts.startingCDF + i * insts.intervalCDF;
253      f = interpolate
254		( insts.startingX, insts.intervalX, insts.NCDFvsX, CDFtable,
255		  pdf, r, fi, newfi );
256      Xtable[i] = f;
257      fi = newfi;
258    }
259
260
261    // **** For just the last table, correct the endpoint by
262    //	    tabulating CDF backward from 0, re-forming the Xtable,
263    //      and takingthe appropriate linear combination of the two.
264    if (nt == nTables-1) {
265      insts = tables[nTables];
266      NCDFvsX = insts.NCDFvsX;
267      tabulateCDFvsX (.5, insts.startingX, insts.intervalX,
268		NCDFvsX, pdf, CDFtable, insts.NnextXmin, nextCDFbelow );
269      // This table is not usable for the interpolation because it is backward:
270      // reverse it!
271      int n1; int n2;
272      double temp;
273      for ( n1 = 0, n2 = NCDFvsX -1; n1 < n2; n1++, n2-- ) {
274	temp = CDFtable[n1];
275	CDFtable[n1] = CDFtable[n2];
276	CDFtable[n2] = temp;
277      }
278      // Form a second Xtable, just the way you did the first
279      double * Xtable2 = new double [insts.NCDF];
280      fi = 0;
281      for ( i = 0; i < insts.NCDF; i++ ) {
282        r = insts.startingCDF + i * insts.intervalCDF;
283	// Note in interpolation that the MEANING of the dependent (X)
284	// points in the CDFtable does not depend on startingX and
285	// intervalX in the same way as before, since these values would
286	// give the reversed table.
287        f = interpolate
288		( insts.startingX+insts.intervalX*(insts.NCDFvsX-1),
289		  -insts.intervalX, insts.NCDFvsX, CDFtable,
290		  pdf, r, fi, newfi );
291        Xtable2[i] = f;
292        fi = newfi;
293      }
294      // Finally, correct the Xtable based on Xtable2 near .5
295      double top    = insts.startingCDF + (insts.NCDF-1) * insts.intervalCDF;
296      double bottom = insts.startingCDF;
297      for ( i = 0; i < insts.NCDF; i++ ) {
298        r = insts.startingCDF + i * insts.intervalCDF;
299        Xtable[i] = ( Xtable2[i] * (r - bottom) +
300		      Xtable[i]  * (top - r) ) / ( top - bottom );
301      }
302      delete Xtable2;
303    } // End of correction for last table.
304
305    // **** Write this table in the proper format
306    //	    The table has value and derivative; the derivative is given by
307    //	    the reciprocal of the pdf (f(r) at each point.
308
309    output << "\n";
310    for ( i = 0; i < insts.NCDF; i++ ) {
311      double deriv;
312      r = insts.startingCDF + i * insts.intervalCDF;
313      f = Xtable[i];
314      deriv = 1.0/pdf(f);
315      output.setf(ios_base::fixed,ios_base::floatfield);
316      output.precision(17);
317      output << std::setw(25) << f << ",      "
318		<< std::setw(25) << deriv;
319      output.setf(0,ios_base::floatfield);
320      output.precision(6);
321      output << ",      // " << r << "\n";
322    }
323
324    output << "\n";
325
326    delete Xtable;
327    delete CDFtable;
328
329  } // Now on to the next table.
330
331} // writeTables
332
333
334
335void tabulateCDFvsX (double CDFbelow,
336		     double startingX,
337		     double intervalX,
338		     int N,
339		     double pdf(double x),
340		     double* CDFtable,
341		     int NnextXmin,
342		     double & nextCDFbelow )
343{
344  // Create a table of integral from -infinity to f of pdf(x) dx, where we
345  // assume the integral from -infinity to the startingF is given by rBelow.
346
347  double f = startingX;
348  double integral = CDFbelow;
349  double halfStep = intervalX/2;
350  const double h_6 = intervalX / 6.0;
351
352  double* table = CDFtable;
353
354  double d1;
355  double d_midpt;
356  double d2;
357
358  d1 = pdf(f);
359  *table = integral;
360  table++;
361
362  for ( int i = 1; i<N; i++ ) {
363    d_midpt = pdf(f+halfStep);
364    f += intervalX;
365    d2 = pdf(f);
366    integral += (d1 + 4*d_midpt + d2) * h_6;
367    *table = integral;
368    table++;
369    d1 = d2;
370    if ( i == NnextXmin ) {
371      nextCDFbelow = integral;
372    }
373  }
374
375} // tabulateCDFvsX
376
377
378
379double interpolate ( double yStart, double Yspacing, int N, double* xTable,
380			 double pdf(double xx), double x, int fi, int& newfi )
381{
382  // Given a table xTable of X values, for which the corresponding Y values
383  // are equal to yS, yS + Yspacing, ... yS + (N-1)*Yspacing; and a function
384  // pdf which represents a derivative
385
386  // Find the highest point j such that x[j] does not exceed x (or
387  // use point N-2 if j is larger than that x[N-2]).  Start from fi; and leave
388  // newfi as that end point.
389
390  if (xTable[fi]  > x) {
391    cout << "xTable[fi] too large?? \n";
392    cout << "fi = " << fi << " xTable[fi] = " << xTable[fi]
393	 << " x = " << x << "\n";
394    exit(1);
395  }
396  int j;
397  for ( j = fi; j < N-1; j++ ) {
398    if ( xTable[j] > x ) break;
399  }
400  j = j-1;
401  newfi = j;
402
403  // Perform the interpolation:
404
405  double  y0 = yStart +   j  *Yspacing;
406  double  y1 = yStart + (j+1)*Yspacing;
407  double  d0 = 1.0/pdf(y0);
408  double  d1 = 1.0/pdf(y1);
409
410  double x0 = xTable[j];
411  double x1 = xTable[j+1];
412
413  double  h = x1 - x0;
414
415  double  dx = (x - x0)/h;
416  double  x2 = dx * dx;
417  double  oneMinusX = 1 - dx;
418  double  oneMinusX2 = oneMinusX * oneMinusX;
419
420  double  f0 = (2. * dx + 1.) * oneMinusX2;
421  double  f1 = (3. - 2. * dx) * x2;
422  double  g0 =  h * dx * oneMinusX2;
423  double  g1 =  - h * oneMinusX * x2;
424
425  double answer;
426
427  answer = f0 * y0 + f1 * y1 + g0 * d0 + g1 * d1 ;
428
429  return answer;
430
431} // interpolate
432
433
434double approxErrInt (double v) {
435
436  // To use this routine, v MUST be negative and for accuracy should be < -4.
437
438  // First approximation is e**(-v**2/2) / ( v* sqrt(2PI) )
439
440  double errInt = exp(-v*v/2);
441  errInt = - errInt / ( v*sqrt(CLHEP::twopi) );
442
443  // correction factor of
444  // 	(1 - 1/v**2 + 3/v**4 - 3*5/v**6 + ... -3*5*7*9*11*13/v**14)
445
446  double correction = 1;
447  for ( int n = 12; n >= 0; n -= 2 ) {
448    correction = 1.0 - correction*(n+1)/(v*v);
449  }
450
451  errInt *= correction;
452
453  // This is accurate to 1 part in 2000 for v < -4, and one part in
454  // 50,000 for v < -5.  Since we are using it at v = -7.5, the accuracy is
455  // a part in 8E-8, but since this is multiplied by 3E-14, the error
456  // introduced into our returned deviates is about a 2E-21 or less.  This
457  // of course is much less than the machine epsilon.
458
459  //  cout << "errInt (" << v << ") = " << errInt << "\n";
460
461  return errInt;
462}
463
464
465#ifdef STRATEGY
466
467What RandGauss will try to do is, given a value r, produce an estimate
468of f such that
469
470  integral (-infinity, f, pdf(x)dx) = r,
471
472where pdf(x) is the gaussian distribution exp(-x**2/2)/srqt(2*PI).
473
474Excising details, the strategy is:
475
476The first step is to build up a table of
477  integral (-infinity, x, pdf(v)dv)  vs  x.
478The table goes in steps of n*some small epsilon.
479(We call it CDFtable, and compute it in tabulateCDFvsX.)
480
481From this table, we can find values of x corresponding to the particular
482values of cdf that appear in the table.  From this, we can find for a given
483r the hypthetical X value that would give that r as the cdf.  (This is computed
484in interpolate().)
485
486Thus we can build a table of inverse_CDF(r) vs r, for r at our desired
487spaced intervals, by interpolation using each r point.  We call this table
488Xtable, since the inverse CDF or r is the number that RandGauss needs to return
489as the deviate when supplied with the uniform random r.
490
491We can also place the derivatives of this function in the table; these are
492needed when the distribution is fired.
493
494When RandGauss is fired, we will find the appropriate realm and do cubic spline
495interpolation there.
496
497
498- - - - -
499
500The first step is to build up a table of
501
502  integral (-infinity, x, pdf(v)dv)  vs  x.
503
504This is done by 4-th order numerical integration, using a fine spacing.
505How do we handle the limit at -infinity?  We will start with some estimate for
506  integral (-infinity, v, pdf(x)dx)
507for a fairly negative value of v, as follows:
508Fortuitously, for reasons of accuracy at small r, we are breaking the
509problem into multiplt realms anyway.  For each realm of calculation
510other than the first, we have the integral as computed in the previous realm.
511For the first realm, we take our largest negative f, and apply the asymptotic
512formulat for the error integral, which is:
513
514  integral (-infinity, v, pdf(x)dx)
515	= exp(-x**2/2)*(1-1/v**2+3/v**4-3*5/v**6...)/srqt(2*PI*v)
516
517In the range of r this small, all we care about is a reasonable relative
518error, since the absolute error will be very small (and the tail will thus
519work properly).
520
521The table goes in steps of n*some small epsilon.  For the main table, the
522step size has a natural limit:  The integration error will go as h**4 in each
523step, but will never be smaller than machine_epsilon times the step
524contribution.  What this tells us is that beyond about some number of (~8000)
525points, your roundoff error is the dominant factor and increasing your
526granularity will probably hurt more than help.  The observed behavor is that
527the accuracy - measured by the difference between values for N and 2N points -
528is best at a step size of about 1/5000.
529
530From this table, we can find values of x corresponding to the particular
531values of cdf that appear in the table.  From this, we can find for a given
532r the hypothetical X that would be give that r value as the cdf.
533The appropriate interpolation scheme is the cubic spline, since that gives
534us full accuracy with the spacing already present.
535
536	[ This step is done in nextInterpolate ]
537
538Thus we can build a table of inverse_CDF(r) vs r, for r at our desired
539spaced intervals, by interpolation using each r point.  We can also place
540the derivatives of this function in the table; these are needed when the
541distribution is fired.  The derivatives are merely 1/pdf(that inverse_CDF).
542In principle, the fire() routine could calculate this; for efficiency, it is
543pre-tabulated.
544
545Although this is a double precision routine, for most values of r, accuracy of
5462**-40 is perfectly OK (in fact, distortion at the level of one part in 2**30
547would be undetectable in several year's running).  However, for very small
548values of r, we need to do a bit better so as to have the correct shape of the
549distribution tail.
550
551So our realms are intervals of roughly 2**-11, -19, -27, -35 and -42.  Since we
552will only need to tabulate up to r = .5, the total data amounts to 32K bytes.
553
554A final subtlety:  We know that when 5=.5, the value of inverse CDF is exactly
555zero.  But the integration leaves it at some small number (3E-12) instead.
556We can fix this without introducing discontinuities by redoing the last table
557starting from the top being exactly 0 and working backward, re-formulating a
558second table of X vs R, and then taking as our final table a linear combination
559which is the backward one at .5 and the forward one at .0005.
560
561When RandGauss is fired, we will find the appropriate realm and do cubic spline
562interpolation there.  If r < 2**-42, we will instead iteratively apply the
563asymptotic formula to solve for v; this is time-consuming but would happen only
564a vanishingly small fraction of the time.
565
566#endif
567