1 // $Id: RandPoissonQ.cc,v 1.7 2010/06/16 17:24:53 garren Exp $
2 // -*- C++ -*-
3 //
4 // -----------------------------------------------------------------------
5 //                             HEP Random
6 //                         --- RandPoissonQ ---
7 //                      class implementation file
8 // -----------------------------------------------------------------------
9 
10 // =======================================================================
11 // M. Fischler    - Implemented new, much faster table-driven algorithm
12 //		    applicable for mu < 100
13 //		  - Implemented "quick()" methods, shich are the same as the
14 //		    new methods for mu < 100 and are a skew-corrected gaussian
15 //		    approximation for large mu.
16 // M. Fischler	  - Removed mean=100 from the table-driven set, since it
17 //		    uses a value just off the end of the table.  (April 2004)
18 // M Fischler     - put and get to/from streams 12/15/04
19 // M Fischler     - Utilize RandGaussQ rather than RandGauss, as clearly
20 //		    intended by the inclusion of RandGaussQ.h.  Using RandGauss
21 //		    introduces a subtle trap in that the state of RandPoissonQ
22 //		    can never be properly captured without also saveing the
23 //		    state of RandGauss!  RandGaussQ is, on the other hand,
24 //		    stateless except for the engine used.
25 // M Fisculer	  - Modified use of wrong engine when shoot (anEngine, mean)
26 //		    is called.  This flaw was preventing any hope of proper
27 //		    saving and restoring in the instance cases.
28 // M Fischler     - fireArray using defaultMean 2/10/05
29 // M Fischler	      - put/get to/from streams uses pairs of ulongs when
30 //			+ storing doubles avoid problems with precision
31 //			4/14/05
32 // M Fisculer	  - Modified use of shoot (mean) instead of
33 //		    shoot(getLocalEngine(), mean) when fire(mean) is called.
34 //		    This flaw was causing bad "cross-talk" between modules
35 //		    in CMS, where one used its own engine, and the other
36 //		    used the static generator.  10/18/07
37 //
38 // =======================================================================
39 
40 #include "CLHEP/Random/defs.h"
41 #include "CLHEP/Random/RandPoissonQ.h"
42 #include "CLHEP/Random/RandGaussQ.h"
43 #include "CLHEP/Random/DoubConv.hh"
44 #include "CLHEP/Random/Stat.h"
45 #include "CLHEP/Utility/thread_local.h"
46 #include <cmath>	// for std::pow()
47 
48 namespace CLHEP {
49 
name() const50 std::string RandPoissonQ::name() const {return "RandPoissonQ";}
engine()51 HepRandomEngine & RandPoissonQ::engine() {return RandPoisson::engine();}
52 
53 // Initialization of static data:  Note that this is all const static data,
54 // so that saveEngineStatus properly saves all needed information.
55 
56   // The following MUST MATCH the corresponding values used (in
57   // poissonTables.cc) when poissonTables.cdat was created.
58 
59 const double RandPoissonQ::FIRST_MU = 10;// lowest mu value in table
60 const double RandPoissonQ::LAST_MU =  95;// highest mu value
61 const double RandPoissonQ::S = 5;        // Spacing between mu values
62 const int RandPoissonQ::BELOW = 30;      // Starting point for N is at mu - BELOW
63 const int RandPoissonQ::ENTRIES = 51;    // Number of entries in each mu row
64 
65 const double RandPoissonQ::MAXIMUM_POISSON_DEVIATE = 2.0E9;
66 	// Careful -- this is NOT the maximum number that can be held in
67 	// a long.  It actually should be some large number of sigma below
68 	// that.
69 
70   // Here comes the big (9K bytes) table, kept in a file of
71   // ENTRIES * (FIRST_MU - LAST_MU + 1)/S doubles
72 
73 static const double poissonTables [ 51 * ( (95-10)/5 + 1 ) ] = {
74 #include "poissonTables.cdat"
75 };
76 
77 
78 //
79 // Constructors and destructors:
80 //
81 
~RandPoissonQ()82 RandPoissonQ::~RandPoissonQ() {
83 }
84 
setupForDefaultMu()85 void RandPoissonQ::setupForDefaultMu() {
86 
87   // The following are useful for quick approximation, for large mu
88 
89   double sig2 = defaultMean * (.9998654 - .08346/defaultMean);
90   sigma = std::sqrt(sig2);
91 	// sigma for the Guassian which approximates the Poisson -- naively
92 	// std::sqrt (defaultMean).
93 	//
94 	// The multiplier corrects for fact that discretization of the form
95 	// [gaussian+.5] increases the second moment by a small amount.
96 
97   double t = 1./(sig2);
98 
99   a2 = t/6 + t*t/324;
100   a1 = std::sqrt (1-2*a2*a2*sig2);
101   a0 = defaultMean + .5 - sig2 * a2;
102 
103   // The formula will be a0 + a1*x + a2*x*x where x has 2nd moment of sigma.
104   // The coeffeicients are chosen to match the first THREE moments of the
105   // true Poisson distribution.
106   //
107   // Actually, if the correction for discretization were not needed, then
108   // a2 could be taken one order higher by adding t*t*t/5832.  However,
109   // the discretization correction is not perfect, leading to inaccuracy
110   // on the order to 1/mu**2, so adding a third term is overkill.
111 
112 } // setupForDefaultMu()
113 
114 
115 //
116 // fire, quick, operator(), and shoot methods:
117 //
118 
shoot(double xm)119 long RandPoissonQ::shoot(double xm) {
120   return shoot(getTheEngine(), xm);
121 }
122 
operator ()()123 double RandPoissonQ::operator()() {
124   return (double) fire();
125 }
126 
operator ()(double mean)127 double RandPoissonQ::operator()( double mean ) {
128   return (double) fire(mean);
129 }
130 
fire(double mean)131 long RandPoissonQ::fire(double mean) {
132   return shoot(getLocalEngine(), mean);
133 }
134 
fire()135 long RandPoissonQ::fire() {
136   if ( defaultMean < LAST_MU + S ) {
137     return poissonDeviateSmall ( getLocalEngine(), defaultMean );
138   } else {
139     return poissonDeviateQuick ( getLocalEngine(), a0, a1, a2, sigma );
140   }
141 } // fire()
142 
shoot(HepRandomEngine * anEngine,double mean)143 long RandPoissonQ::shoot(HepRandomEngine* anEngine, double mean) {
144 
145   // The following variables, static to this method, apply to the
146   // last time a large mean was supplied; they obviate certain calculations
147   // if consecutive calls use the same mean.
148 
149   static CLHEP_THREAD_LOCAL double lastLargeMean = -1.;	// Mean from previous shoot
150 					// requiring poissonDeviateQuick()
151   static CLHEP_THREAD_LOCAL double lastA0;
152   static CLHEP_THREAD_LOCAL double lastA1;
153   static CLHEP_THREAD_LOCAL double lastA2;
154   static CLHEP_THREAD_LOCAL double lastSigma;
155 
156   if ( mean < LAST_MU + S ) {
157     return poissonDeviateSmall ( anEngine, mean );
158   } else {
159     if ( mean != lastLargeMean ) {
160       // Compute the coefficients defining the quadratic transformation from a
161       // Gaussian to a Poisson for this mean.  Also save these for next time.
162       double sig2 = mean * (.9998654 - .08346/mean);
163       lastSigma = std::sqrt(sig2);
164       double t = 1./sig2;
165       lastA2 = t*(1./6.) + t*t*(1./324.);
166       lastA1 = std::sqrt (1-2*lastA2*lastA2*sig2);
167       lastA0 = mean + .5 - sig2 * lastA2;
168     }
169     return poissonDeviateQuick ( anEngine, lastA0, lastA1, lastA2, lastSigma );
170   }
171 
172 } // shoot (anEngine, mean)
173 
shootArray(const int size,long * vect,double m)174 void RandPoissonQ::shootArray(const int size, long* vect, double m) {
175   for( long* v = vect; v != vect + size; ++v )
176     *v = shoot(m);
177      // Note: We could test for m > 100, and if it is, precompute a0, a1, a2,
178      // and sigma and call the appropriate form of poissonDeviateQuick.
179      // But since those are cached anyway, not much time would be saved.
180 }
181 
fireArray(const int size,long * vect,double m)182 void RandPoissonQ::fireArray(const int size, long* vect, double m) {
183   for( long* v = vect; v != vect + size; ++v )
184     *v = fire( m );
185 }
186 
fireArray(const int size,long * vect)187 void RandPoissonQ::fireArray(const int size, long* vect) {
188   for( long* v = vect; v != vect + size; ++v )
189     *v = fire( defaultMean );
190 }
191 
192 
193 // Quick Poisson deviate algorithm used by quick for large mu:
194 
poissonDeviateQuick(HepRandomEngine * e,double mu)195 long RandPoissonQ::poissonDeviateQuick ( HepRandomEngine *e, double mu ) {
196 
197   // Compute the coefficients defining the quadratic transformation from a
198   // Gaussian to a Poisson:
199 
200   double sig2 = mu * (.9998654 - .08346/mu);
201   double sig = std::sqrt(sig2);
202 	// The multiplier corrects for fact that discretization of the form
203 	// [gaussian+.5] increases the second moment by a small amount.
204 
205   double t = 1./sig2;
206 
207   double sa2 = t*(1./6.) + t*t*(1./324.);
208   double sa1 = std::sqrt (1-2*sa2*sa2*sig2);
209   double sa0 = mu + .5 - sig2 * sa2;
210 
211   // The formula will be sa0 + sa1*x + sa2*x*x where x has sigma of sq.
212   // The coeffeicients are chosen to match the first THREE moments of the
213   // true Poisson distribution.
214 
215   return poissonDeviateQuick ( e, sa0, sa1, sa2, sig );
216 }
217 
218 
poissonDeviateQuick(HepRandomEngine * e,double A0,double A1,double A2,double sig)219 long RandPoissonQ::poissonDeviateQuick ( HepRandomEngine *e,
220 		double A0, double A1, double A2, double sig) {
221 //
222 // Quick Poisson deviate algorithm used by quick for large mu:
223 //
224 // The principle:  For very large mu, a poisson distribution can be approximated
225 // by a gaussian: return the integer part of mu + .5 + g where g is a unit
226 // normal.  However, this yelds a miserable approximation at values as
227 // "large" as 100.  The primary problem is that the poisson distribution is
228 // supposed to have a skew of 1/mu**2, and the zero skew of the Guassian
229 // leads to errors of order as big as 1/mu**2.
230 //
231 // We substitute for the gaussian a quadratic function of that gaussian random.
232 // The expression looks very nearly like mu + .5 - 1/6 + g + g**2/(6*mu).
233 // The small positive quadratic term causes the resulting variate to have
234 // a positive skew; the -1/6 constant term is there to correct for this bias
235 // in the mean.  By adjusting these two and the linear term, we can match the
236 // first three moments to high accuracy in 1/mu.
237 //
238 // The sigma used is not precisely std::sqrt(mu) since a rounded-off Gaussian
239 // has a second moment which is slightly larger than that of the Gaussian.
240 // To compensate, sig is multiplied by a factor which is slightly less than 1.
241 
242   //  double g = RandGauss::shootQuick( e );   // TEMPORARY MOD:
243   double g = RandGaussQ::shoot( e );   // Unit normal
244   g *= sig;
245   double p = A2*g*g + A1*g + A0;
246   if ( p < 0 ) return 0;	// Shouldn't ever possibly happen since
247 				// mean should not be less than 100, but
248 				// we check due to paranoia.
249   if ( p > MAXIMUM_POISSON_DEVIATE ) p = MAXIMUM_POISSON_DEVIATE;
250 
251   return long(p);
252 
253 } // poissonDeviateQuick ()
254 
255 
256 
poissonDeviateSmall(HepRandomEngine * e,double mean)257 long RandPoissonQ::poissonDeviateSmall (HepRandomEngine * e, double mean) {
258   long N1;
259   long N2;
260   // The following are for later use to form a secondary random s:
261   double rRange; 	    // This will hold the interval between cdf for the
262 			    // computed N1 and cdf for N1+1.
263   double rRemainder = 0; // This will hold the length into that interval.
264 
265   // Coming in, mean should not be more than LAST_MU + S.  However, we will
266   // be paranoid and test for this:
267 
268   if ( mean > LAST_MU + S ) {
269     return RandPoisson::shoot(e, mean);
270   }
271 
272   if (mean <= 0) {
273     return 0;			// Perhaps we ought to balk harder here!
274   }
275 
276   // >>> 1 <<<
277   // 	Generate the first random, which we always will need.
278 
279   double r = e->flat();
280 
281   // >>> 2 <<<
282   // 	For small mean, below the start of the tables,
283   // 	do the series for cdf directly.
284 
285   // In this case, since we know the series will terminate relatively quickly,
286   // almost alwaye use a precomputed 1/N array without fear of overrunning it.
287 
288   static const double oneOverN[50] =
289   {    0,   1.,    1/2.,  1/3.,  1/4.,  1/5.,  1/6.,  1/7.,  1/8.,  1/9.,
290    1/10.,  1/11.,  1/12., 1/13., 1/14., 1/15., 1/16., 1/17., 1/18., 1/19.,
291    1/20.,  1/21.,  1/22., 1/23., 1/24., 1/25., 1/26., 1/27., 1/28., 1/29.,
292    1/30.,  1/31.,  1/32., 1/33., 1/34., 1/35., 1/36., 1/37., 1/38., 1/39.,
293    1/40.,  1/41.,  1/42., 1/43., 1/44., 1/45., 1/46., 1/47., 1/48., 1/49. };
294 
295 
296   if ( mean < FIRST_MU ) {
297 
298     long N = 0;
299     double term = std::exp(-mean);
300     double cdf = term;
301 
302     if ( r < (1 - 1.0E-9) ) {
303       //
304       // **** This is a normal path: ****
305       //
306       // Except when r is very close to 1, it is certain that we will exceed r
307       // before the 30-th term in the series, so a simple while loop is OK.
308       const double* oneOverNptr = oneOverN;
309       while( cdf <= r ) {
310         N++ ;
311         oneOverNptr++;
312         term *= ( mean * (*oneOverNptr) );
313         cdf  += term;
314       }
315       return N;
316       //
317       // **** ****
318       //
319     } else { // r is almost 1...
320       // For r very near to 1 we would have to check that we don't fall
321       // off the end of the table of 1/N.  Since this is very rare, we just
322       // ignore the table and do the identical while loop, using explicit
323       // division.
324       double cdf0;
325       while ( cdf <= r ) {
326         N++ ;
327         term *= ( mean / N );
328         cdf0 = cdf;
329         cdf  += term;
330 	if (cdf == cdf0) break; // Can't happen, but just in case...
331       }
332       return N;
333     } // end of if ( r compared to (1 - 1.0E-9) )
334 
335   } // End of the code for mean < FIRST_MU
336 
337   // >>> 3 <<<
338   // 	Find the row of the tables corresponding to the highest tabulated mu
339   //	which is no greater than our actual mean.
340 
341   int rowNumber = int((mean - FIRST_MU)/S);
342   const double * cdfs = &poissonTables [rowNumber*ENTRIES];
343   double mu = FIRST_MU + rowNumber*S;
344   double deltaMu = mean - mu;
345   int Nmin = int(mu - BELOW);
346   if (Nmin < 1) Nmin = 1;
347   int Nmax = Nmin + (ENTRIES - 1);
348 
349 
350   // >>> 4 <<<
351   // 	If r is less that the smallest entry in the row, then
352   //	generate the deviate directly from the series.
353 
354   if ( r < cdfs[0] ) {
355 
356     // In this case, we are tempted to use the actual mean, and not
357     // generate a second deviate to account for the leftover part mean - mu.
358     // That would be an error, generating a distribution with enough excess
359     // at Nmin + (mean-mu)/2 to be detectable in 4,000,000 trials.
360 
361     // Since this case is very rare (never more than .2% of the r values)
362     // and can happen where N will be large (up to 65 for the mu=95 row)
363     // we use explicit division so as to avoid having to worry about running
364     // out of oneOverN table.
365 
366     long N = 0;
367     double term = std::exp(-mu);
368     double cdf = term;
369     double cdf0;
370 
371     while(cdf <= r) {
372         N++ ;
373         term *= ( mu / N );
374         cdf0 = cdf;
375         cdf  += term;
376 	if (cdf == cdf0) break; // Can't happen, but just in case...
377     }
378     N1 = N;
379 		// std::cout << r << "   " << N << "   ";
380 		// DBG_small = true;
381     rRange = 0;		// In this case there is always a second r needed
382 
383   } // end of small-r case
384 
385 
386   // >>> 5 <<<
387   // 	Assuming r lies within the scope of the row for this mu, find the
388   //	largest entry not greater than r.  N1 is the N corresponding to the
389   //	index a.
390 
391   else if ( r < cdfs[ENTRIES-1] ) { 		// r is also >= cdfs[0]
392 
393     //
394     // **** This is the normal code path ****
395     //
396 
397     int a = 0;                  // Highest value of index such that cdfs[a]
398 				// is known NOT to be greater than r.
399     int b = ENTRIES - 1;	// Lowest value of index such that cdfs[b] is
400 				// known to exeed r.
401 
402     while (b != (a+1) ) {
403       int c = (a+b+1)>>1;
404       if (r > cdfs[c]) {
405         a = c;
406       } else {
407         b = c;
408       }
409     }
410 
411     N1 = Nmin + a;
412     rRange = cdfs[a+1] - cdfs[a];
413     rRemainder = r - cdfs[a];
414 
415     //
416     // **** ****
417     //
418 
419   } // end of medium-r (normal) case
420 
421 
422   // >>> 6 <<<
423   //	If r exceeds the greatest entry in the table for this mu, then start
424   // 	from that cdf, and use the series to compute from there until r is
425   //	exceeded.
426 
427   else { //		  if ( r >= cdfs[ENTRIES-1] ) {
428 
429     // Here, division must be done explicitly, and we must also protect against
430     // roundoff preventing termination.
431 
432 	//
433 	//+++ cdfs[ENTRIES-1] is std::exp(-mu) sum (mu**m/m! , m=0 to Nmax)
434 	//+++ (where Nmax = mu - BELOW + ENTRIES - 1)
435 	//+++ cdfs[ENTRIES-1]-cdfs[ENTRIES-2] is std::exp(-mu) mu**(Nmax)/(Nmax)!
436 	//+++ If the sum up to k-1 <= r < sum up to k, then N = k-1
437 	//+++ Consider k = Nmax in the above statement:
438 	//+++ If cdfs[ENTRIES-2] <= r < cdfs[ENTRIES-1], N would be Nmax-1
439 	//+++ But here r >= cdfs[ENTRIES-1] so N >= Nmax
440 	//
441 
442 	// Erroneous:
443 	//+++ cdfs[ENTRIES-1] is std::exp(-mu) sum (mu**m/m! , m=0 to Nmax-1)
444 	//+++ cdfs[ENTRIES-1]-cdfs[ENTRIES-2] is std::exp(-mu) mu**(Nmax-1)/(Nmax-1)!
445 	//+++ If a sum up to k-1 <= r < sum up to k, then N = k-1
446 	//+++ So if cdfs[ENTRIES-1] were > r, N would be Nmax-1 (or less)
447 	//+++ But here r >= cdfs[ENTRIES-1] so N >= Nmax
448 	//
449 
450 		//	std::cout << "r = " << r << " mu = " << mu << "\n";
451     long N = Nmax -1;
452     double cdf = cdfs[ENTRIES-1];
453     double term = cdf - cdfs[ENTRIES-2];
454     double cdf0;
455     while(cdf <= r) {
456         N++ ;
457 		//	std::cout << "  N " << N << " term " <<
458 		//	term << " cdf " << cdf << "\n";
459         term *= ( mu / N );
460 	cdf0  = cdf;
461         cdf  += term;
462 	if (cdf == cdf0) break; // If term gets so small cdf stops increasing,
463 				// terminate using that value of N since we
464 				// would never reach r.
465     }
466     N1 = N;
467     rRange = 0; 	// We can't validly omit the second true random
468 
469 	//	    N = Nmax -1;
470 	//	    cdf = cdfs[ENTRIES-1];
471 	//	    term = cdf - cdfs[ENTRIES-2];
472 	//	    for (int isxz=0; isxz < 100; isxz++) {
473 	//	        N++ ;
474 	//	        term *= ( mu / N );
475 	//		cdf0  = cdf;
476 	//	        cdf  += term;
477 	//	    }
478 	//	    std::cout.precision(20);
479 	//	    std::cout << "Final sum is " << cdf << "\n";
480 
481   } // end of large-r case
482 
483 
484 
485   // >>> 7 <<<
486   // 	Form a second random, s, based on the position of r within the range
487   //	of this table entry to the next entry.
488 
489   // However, if this range is very small, then we lose too many bits of
490   // randomness.  In that situation, we generate a second random for s.
491 
492   double s;
493 
494   static const double MINRANGE = .01;  	// Sacrifice up to two digits of
495 				       	// randomness when using r to produce
496 					// a second random s.  Leads to up to
497 					// .09 extra randoms each time.
498 
499   if ( rRange > MINRANGE ) {
500     //
501     // **** This path taken 90% of the time ****
502     //
503     s = rRemainder / rRange;
504   } else {
505     s = e->flat();	// extra true random needed about one time in 10.
506   }
507 
508   // >>> 8 <<<
509   // 	Use the direct summation method to form a second poisson deviate N2
510   //	from deltaMu and s.
511 
512   N2 = 0;
513   double term = std::exp(-deltaMu);
514   double cdf = term;
515 
516   if ( s < (1 - 1.0E-10) ) {
517       //
518       // This is the normal path:
519       //
520       const double* oneOverNptr = oneOverN;
521       while( cdf <= s ) {
522         N2++ ;
523         oneOverNptr++;
524         term *= ( deltaMu * (*oneOverNptr) );
525         cdf  += term;
526       }
527   } else { // s is almost 1...
528       while( cdf <= s ) {
529         N2++ ;
530         term *= ( deltaMu / N2 );
531         cdf  += term;
532       }
533   } // end of if ( s compared to (1 - 1.0E-10) )
534 
535   // >>> 9 <<<
536   // 	The result is the sum of those two deviates
537 
538 		// if (DBG_small) {
539 		//   std::cout << N2 << "   " << N1+N2 << "\n";
540 		//   DBG_small = false;
541 		// }
542 
543   return N1 + N2;
544 
545 } // poissonDeviate()
546 
put(std::ostream & os) const547 std::ostream & RandPoissonQ::put ( std::ostream & os ) const {
548   int pr=os.precision(20);
549   std::vector<unsigned long> t(2);
550   os << " " << name() << "\n";
551   os << "Uvec" << "\n";
552   t = DoubConv::dto2longs(a0);
553   os << a0 << " " << t[0] << " " << t[1] << "\n";
554   t = DoubConv::dto2longs(a1);
555   os << a1 << " " << t[0] << " " << t[1] << "\n";
556   t = DoubConv::dto2longs(a2);
557   os << a2 << " " << t[0] << " " << t[1] << "\n";
558   t = DoubConv::dto2longs(sigma);
559   os << sigma << " " << t[0] << " " << t[1] << "\n";
560   RandPoisson::put(os);
561   os.precision(pr);
562   return os;
563 #ifdef REMOVED
564   int pr=os.precision(20);
565   os << " " << name() << "\n";
566   os << a0 << " " << a1 << " " << a2 << "\n";
567   os << sigma << "\n";
568   RandPoisson::put(os);
569   os.precision(pr);
570   return os;
571 #endif
572 }
573 
get(std::istream & is)574 std::istream & RandPoissonQ::get ( std::istream & is ) {
575   std::string inName;
576   is >> inName;
577   if (inName != name()) {
578     is.clear(std::ios::badbit | is.rdstate());
579     std::cerr << "Mismatch when expecting to read state of a "
580     	      << name() << " distribution\n"
581 	      << "Name found was " << inName
582 	      << "\nistream is left in the badbit state\n";
583     return is;
584   }
585   if (possibleKeywordInput(is, "Uvec", a0)) {
586     std::vector<unsigned long> t(2);
587     is >> a0 >> t[0] >> t[1]; a0 = DoubConv::longs2double(t);
588     is >> a1 >> t[0] >> t[1]; a1 = DoubConv::longs2double(t);
589     is >> a2 >> t[0] >> t[1]; a2 = DoubConv::longs2double(t);
590     is >> sigma >> t[0] >> t[1]; sigma = DoubConv::longs2double(t);
591     RandPoisson::get(is);
592     return is;
593   }
594   // is >> a0 encompassed by possibleKeywordInput
595   is >> a1 >> a2 >> sigma;
596   RandPoisson::get(is);
597   return is;
598 }
599 
600 }  // namespace CLHEP
601 
602