1 /* Statistical routines for gamma distributions.
2  *
3  * Contents:
4  *   1. Routines for evaluating densities and distributions
5  *   2. Generic API routines: for general interface w/ histogram module
6  *   3. Dumping plots for files
7  *   4. Sampling
8  *   5. ML fitting to complete data
9  *   6. Test driver
10  *   7. Example
11  *
12  * Xref:  STL10/65
13  *
14  * To do:
15  *    - Fit*() functions should return eslEINVAL on n=0, eslENORESULT
16  *      on failure due to small n. Compare esl_gumbel. xref J12/93.
17  *      SRE, Wed Nov 27 11:18:19 2013
18  */
19 #include "esl_config.h"
20 
21 #include <stdio.h>
22 #include <math.h>
23 #include <float.h>
24 
25 #include "easel.h"
26 #include "esl_histogram.h"
27 #include "esl_random.h"
28 #include "esl_stats.h"
29 
30 #include "esl_gamma.h"
31 
32 static int    tau_by_moments(double *x, int n, double mu, double *ret_tau,
33 			     double *ret_mean, double *ret_logsum);
34 static int    tau_by_moments_binned(ESL_HISTOGRAM *g, double mu, double *ret_tau,
35 				    double *ret_mean, double *ret_logsum);
36 static double tau_function(double tau, double mean, double logsum);
37 
38 
39 /****************************************************************************
40  * 1. Routines for evaluating densities and distributions
41  ****************************************************************************/
42 
43 /* Function:  esl_gam_pdf()
44  *
45  * Purpose:   Calculates the gamma PDF $P(X=x)$ given value <x>,
46  *            location parameter <mu>, scale parameter <lambda>, and shape
47  *            parameter <tau>.
48  */
49 double
esl_gam_pdf(double x,double mu,double lambda,double tau)50 esl_gam_pdf(double x, double mu, double lambda, double tau)
51 {
52   double y = lambda * (x - mu);
53   double gamtau;
54   double val;
55 
56   if (y < 0.) return 0.;
57 
58   esl_stats_LogGamma(tau, &gamtau);
59   val  = ((tau*log(lambda) + (tau-1.)*log(x-mu)) - gamtau) - y;
60   return exp(val);
61 }
62 
63 /* Function:  esl_gam_logpdf()
64  *
65  * Purpose:   Calculates log of the probability density function
66  *            for the gamma, $\log P(X=x)$, given value <x>,
67  *            location parameter <mu>, scale parameter <lambda>, and
68  *            shape parameter <tau>.
69  */
70 double
esl_gam_logpdf(double x,double mu,double lambda,double tau)71 esl_gam_logpdf(double x, double mu, double lambda, double tau)
72 {
73   double y = lambda * (x - mu);
74   double gamtau;
75   double val;
76 
77   if (x < 0.) return -eslINFINITY;
78 
79   esl_stats_LogGamma(tau, &gamtau);
80   val = ((tau*log(lambda) + (tau-1.)*log(x-mu)) - gamtau) - y;
81   return val;
82 }
83 
84 /* Function:  esl_gam_cdf()
85  *
86  * Purpose:   Calculates the cumulative distribution function
87  *            for the gamma, $P(X \leq x)$, given value <x>,
88  *            location parameter <mu>, scale parameter <lambda>, and
89  *            shape parameter <tau>.
90  *
91  *            (For $\mu=0$, $\lambda = 1$, this is the
92  *            incomplete Gamma function $P(\tau,x)$.)
93  */
94 double
esl_gam_cdf(double x,double mu,double lambda,double tau)95 esl_gam_cdf(double x, double mu, double lambda, double tau)
96 {
97   double y = lambda * (x - mu);
98   double val;
99 
100   if (y <= 0.) return 0.;
101 
102   esl_stats_IncompleteGamma(tau, y, &val, NULL);
103   return val;
104 }
105 
106 /* Function:  esl_gam_logcdf()
107  *
108  * Purpose:   Calculates the log of the cumulative distribution function
109  *            for the gamma, $\log P(X \leq x)$, given value <x>, location
110  *            parameter <mu>, scale parameter <lambda>, and shape
111  *            parameter <tau>.
112  */
113 double
esl_gam_logcdf(double x,double mu,double lambda,double tau)114 esl_gam_logcdf(double x, double mu, double lambda, double tau)
115 {
116   double y = lambda * (x - mu);
117   double val;
118 
119   if (y <= 0.) return -eslINFINITY;
120 
121   esl_stats_IncompleteGamma(tau, y, &val, NULL);
122   return log(val);
123 }
124 
125 /* Function:  esl_gam_surv()
126  *
127  * Purpose:   Calculates the survival function for the gamma, $P(X > x)$,
128  *            given value <x>, location parameter <mu>, scale parameter
129  *            <lambda>, and shape parameter <tau>.
130  */
131 double
esl_gam_surv(double x,double mu,double lambda,double tau)132 esl_gam_surv(double x, double mu, double lambda, double tau)
133 {
134   double y = lambda * (x - mu);
135   double val;
136 
137   if (y <= 0.) return 1.0;
138 
139   esl_stats_IncompleteGamma(tau, y, NULL, &val);
140   return val;
141 }
142 
143 
144 /* Function:  esl_gam_logsurv()
145  *
146  * Purpose:   Calculates the log of the survival function for the gamma,
147  *            $\log P(X > x)$, given value <x>, location parameter <mu>,
148  *            scale parameter <lambda>, and shape parameter <tau>.
149  *
150  *            Relies on <esl_stats_IncompleteGamma()>, which has limited
151  *            dynamic range. Any result of < -700 or so will be -infinity.
152  *            To fix this, we need a log version of <esl_stats_IncompleteGamma()>.
153  */
154 double
esl_gam_logsurv(double x,double mu,double lambda,double tau)155 esl_gam_logsurv(double x, double mu, double lambda, double tau)
156 {
157   double y = lambda * (x - mu);
158   double val;
159 
160   if (y <= 0.) return 0.;
161 
162   esl_stats_IncompleteGamma(tau, y, NULL, &val);
163   return log(val);
164 }
165 
166 
167 /* Function:  esl_gam_invcdf()
168  *
169  * Purpose:   Calculates the inverse CDF for a gamma with location
170  *            parameter <mu>, scale parameter <lambda> and shape
171  *            parameter <tau>, returning the value <x> at which the
172  *            CDF is <p>.
173  *
174  *            This inverse CDF is solved by a computationally expensive,
175  *            brute force bisection search on the CDF of <x>.
176  */
177 double
esl_gam_invcdf(double p,double mu,double lambda,double tau)178 esl_gam_invcdf(double p, double mu, double lambda, double tau)
179 {
180   double x1, x2, xm;		/* low, high guesses at x */
181   double f2, fm;
182   double tol = 1e-6;
183 
184   x1 = 0.;
185   x2 = tau / lambda;
186   do {				/* bracket */
187     x2 = x2*2.;
188     f2 = esl_gam_cdf(x2, mu, lambda, tau);
189   } while (f2 < p);
190 
191   do {				/* bisection */
192     xm = (x1+x2)/ 2.;
193     fm = esl_gam_cdf(xm, mu, lambda, tau);
194 
195     if      (fm > p) x2 = xm;
196     else if (fm < p) x1 = xm;
197     else return xm;		/* unlikely exact fm==p */
198   } while ( (x2-x1)/(x1+x2) > tol);
199 
200   xm = (x1+x2)/2.;
201   return xm;
202 }
203 /*-------------------- end densities & distributions ------------------------*/
204 
205 
206 
207 
208 /****************************************************************************
209  * 2. Generic API routines: for general interface w/ histogram module
210  ****************************************************************************/
211 
212 /* Function:  esl_gam_generic_pdf()
213  *
214  * Purpose:   Generic-API wrapper around <esl_gam_pdf()>, taking
215  *            a void ptr to a double array containing $\mu$, $\lambda$,
216  *            $\tau$ parameters.
217  */
218 double
esl_gam_generic_pdf(double x,void * params)219 esl_gam_generic_pdf(double x, void *params)
220 {
221   double *p = (double *) params;
222   return esl_gam_pdf(x, p[0], p[1], p[2]);
223 }
224 
225 
226 /* Function:  esl_gam_generic_cdf()
227  *
228  * Purpose:   Generic-API wrapper around <esl_gam_cdf()>, taking
229  *            a void ptr to a double array containing $\mu$, $\lambda$,
230  *            $\tau$ parameters.
231  */
232 double
esl_gam_generic_cdf(double x,void * params)233 esl_gam_generic_cdf(double x, void *params)
234 {
235   double *p = (double *) params;
236   return esl_gam_cdf(x, p[0], p[1], p[2]);
237 }
238 
239 
240 /* Function:  esl_gam_generic_surv()
241  *
242  * Purpose:   Generic-API wrapper around <esl_gam_surv()>, taking
243  *            a void ptr to a double array containing $\mu$, $\lambda$,
244  *            $\tau$ parameters.
245  */
246 double
esl_gam_generic_surv(double x,void * params)247 esl_gam_generic_surv(double x, void *params)
248 {
249   double *p = (double *) params;
250   return esl_gam_surv(x, p[0], p[1], p[2]);
251 }
252 
253 
254 /* Function:  esl_gam_generic_invcdf()
255  *
256  * Purpose:   Generic-API wrapper around <esl_gam_invcdf()>, taking
257  *            a void ptr to a double array containing $\mu$, $\lambda$,
258  *            $\tau$ parameters.
259  */
260 double
esl_gam_generic_invcdf(double x,void * params)261 esl_gam_generic_invcdf(double x, void *params)
262 {
263   double *p = (double *) params;
264   return esl_gam_invcdf(x, p[0], p[1], p[2]);
265 }
266 /*------------------------ end generic API ---------------------------------*/
267 
268 
269 
270 /****************************************************************************
271  * 3. Dumping plots for files
272  ****************************************************************************/
273 
274 /* Function:  esl_gam_Plot()
275  *
276  * Purpose:   Plot some gamma distribution function <func> (for instance,
277  *            <esl_gam_pdf()>) for parameters <mu>, <lambda>, and <tau>, for
278  *            a range of values x from <xmin> to <xmax> in steps of <xstep>;
279  *            output to an open stream <fp> in xmgrace XY input format.
280  *
281  * Returns:   <eslOK> on success.
282  *
283  * Throws:    <eslEWRITE> on any system write error, such as a filled disk.
284  */
285 int
esl_gam_Plot(FILE * fp,double mu,double lambda,double tau,double (* func)(double x,double mu,double lambda,double tau),double xmin,double xmax,double xstep)286 esl_gam_Plot(FILE *fp, double mu, double lambda, double tau,
287 	     double (*func)(double x, double mu, double lambda, double tau),
288 	     double xmin, double xmax, double xstep)
289 {
290   double x;
291   for (x = xmin; x <= xmax; x += xstep)
292     if (fprintf(fp, "%f\t%g\n", x, (*func)(x, mu, lambda, tau)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "gamma plot write failed");
293   if (fprintf(fp, "&\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "gamma plot write failed");
294   return eslOK;
295 }
296 /*-------------------- end plot dumping routines ---------------------------*/
297 
298 
299 /****************************************************************************
300  * 4. Sampling
301  ****************************************************************************/
302 /* Function:  esl_gam_Sample()
303  *
304  * Purpose:   Sample a gamma-distributed random variate.
305  */
306 double
esl_gam_Sample(ESL_RANDOMNESS * r,double mu,double lambda,double tau)307 esl_gam_Sample(ESL_RANDOMNESS *r, double mu, double lambda, double tau)
308 {
309   double x;
310 
311   x = esl_rnd_Gamma(r, tau);
312   return (mu + x / lambda);
313 }
314 /*--------------------------- end sampling ---------------------------------*/
315 
316 
317 
318 /****************************************************************************
319  * 5. ML fitting to complete data
320  ****************************************************************************/
321 
322 /* Function:  esl_gam_FitComplete()
323  *
324  * Purpose:   Given complete data consisting of <n> samples <x[0]..x[n-1]>,
325  *            and a known location parameter <mu>, determine and return
326  *            maximum likelihood parameters <ret_lambda> and <ret_tau>.
327  *
328  * Args:      x          - complete gamma-distributed data [0..n-1]
329  *            n          - number of samples in <x>
330  *            mu         - known location parameter
331  *            ret_lambda - RETURN: ML estimate of lambda
332  *            ret_tau    - RETURN: ML estimate of tau
333  *
334  * Returns:   <eslOK> on success.
335  *
336  * Throws:    <eslENOHALT> if bracketing or bisection fails;
337  *            <eslEINVAL> if data cannot be gamma distributed (some <x[i] < mu>,
338  *            or zero variance in x).
339  *
340  * Xref:      STL10/65.
341  */
342 int
esl_gam_FitComplete(double * x,int n,double mu,double * ret_lambda,double * ret_tau)343 esl_gam_FitComplete(double *x, int n, double mu, double *ret_lambda, double *ret_tau)
344 {
345   double mean, logsum;
346   int    i;
347   double c, fc;
348   double a, fa;
349   double b, fb;
350   int    status;
351 
352   if ((status = tau_by_moments(x, n, mu, &c, &mean, &logsum) != eslOK)) goto ERROR;
353   a = b = c;
354   fc = tau_function(c, mean, logsum);
355 
356   /* Rootfinding, 1.: bracketing the root with points a,b.
357    */
358   if (fc > 0.)			/* fx>0 means tau is too small, search right */
359     {
360       for (i = 0; i < 100; i++)	/* 100 = max iterations */
361 	{
362 	  b = a * 2.;
363 	  fb = tau_function(b, mean, logsum);
364 	  if (fb < 0.) break;	/* a,b now bracket */
365 	  a = b;                /* else fb>0, so b is a better left bracket than a */
366 	}
367       if (i == 100) ESL_XEXCEPTION(eslENOHALT, "failed to bracket");
368     }
369   else if (fc < 0.)		/* fx<0 means tau is too large, search left */
370     {
371       for (i = 0; i < 100; i++)
372 	{
373 	  a = b/2.;
374 	  fa = tau_function(a, mean, logsum);
375 	  if (fa > 0.) break;   /* a,b now bracket */
376 	  b = a;                /* else fa<0, so a is a better right bracket than b */
377 	}
378       if (i == 100) ESL_XEXCEPTION(eslENOHALT, "failed to bracket");
379     }
380 
381   /* Rootfinding, 2.: Bisection search.
382    * We have the root in interval (a,b).
383    */
384   for (i = 0; i < 100; i++)
385     {
386       c  = (a+b)/2.;		/* bisection */
387       fc = tau_function(c, mean, logsum);
388       if      (fc > 0.)  a = c;
389       else if (fc < 0.)  b = c;
390       else    break;		/* unlikely event that we nail it */
391 
392       if ((b-a) <= 2.* DBL_EPSILON) {
393 	c  = (a+b)/2.;
394 	break;
395       }
396     }
397   if (i == 100) ESL_XEXCEPTION(eslENOHALT, "bisection search failed");
398 
399   *ret_lambda = c / mean;
400   *ret_tau    = c;
401   return eslOK;
402 
403 
404  ERROR:
405   *ret_lambda = 0.0;
406   *ret_tau    = 0.0;
407   return status;
408 }
409 
410 /* tau_by_moments()
411  *
412  * Obtain an initial estimate for tau by
413  * matching moments. Also returns mean and
414  * logsum, which we need for ML fitting.
415  * To obtain a lambda estimate, use
416  * lambda = tau / mean.
417  */
418 static int
tau_by_moments(double * x,int n,double mu,double * ret_tau,double * ret_mean,double * ret_logsum)419 tau_by_moments(double *x, int n, double mu, double *ret_tau, double *ret_mean, double *ret_logsum)
420 {
421   int    i;
422   double mean, var, logsum;
423 
424   mean = var = logsum = 0.;
425   for (i = 0; i < n; i++)
426     {
427       if (x[i] < mu) ESL_EXCEPTION(eslEINVAL, "No x[i] can be < mu in gamma data");
428       mean   += x[i] - mu;	   /* mean is temporarily just the sum */
429       logsum += log(x[i] - mu);
430       var  += (x[i]-mu)*(x[i]-mu); /* var is temporarily the sum of squares */
431     }
432   var     = (var - mean*mean/(double)n) / ((double)n-1); /* now var is the variance */
433   mean   /= (double) n;		/* and now mean is the mean */
434   logsum /= (double) n;
435 
436   if (var == 0.)		/* and if mean = 0, var = 0 anyway. */
437     ESL_EXCEPTION(eslEINVAL, "Zero variance in allegedly gamma-distributed dataset");
438 
439   if (ret_tau    != NULL) *ret_tau    = mean * mean / var;
440   if (ret_mean   != NULL) *ret_mean   = mean;
441   if (ret_logsum != NULL) *ret_logsum = logsum;
442   return eslOK;
443 }
444 
445 
446 /* tau_function()
447  *
448  * This is the rootfinding equation for tau...
449  * \ref{eqn:gamma_tau_root} in the documentation.
450  *   mean   is  1/N \sum (x_i - \mu)
451  *   logsum is  1/N \sum \log (x_i - \mu)
452  * These are both independent of tau, and dependent
453  * on all data points, so we require the caller to
454  * precalculate them for us.
455  *
456  * This is a decreasing function of tau:
457  * the return value is > 0 when tau is too small,
458  * and < 0 when tau is too large.
459  */
460 static double
tau_function(double tau,double mean,double logsum)461 tau_function(double tau, double mean, double logsum)
462 {
463   double psitau;
464 
465   esl_stats_Psi(tau, &psitau);
466   return ( ((log(tau) - psitau) - log(mean)) + logsum );
467 }
468 
469 /* Function:  esl_gam_FitCompleteBinned()
470  *
471  * Purpose:   Fit a complete exponential distribution to the observed
472  *            binned data in a histogram <g>, where each
473  *            bin i holds some number of observed samples x with values from
474  *            lower bound l to upper bound u (that is, $l < x \leq u$);
475  *            determine and return maximum likelihood estimates for the
476  *            parameters $\mu, \lambda, \tau$ and
477  *            return them in <*ret_mu>, <*ret_lambda>, <*ret_tau>.
478  *
479  *            Unlike the <esl_exp_FitCompleteBinned()> case where the
480  *            ML fit optimizes $\sum_i n_i \log P(a_i \leq x < b_i)$
481  *            where $a_i \leq b_i$ are the bounds of bin i with
482  *            occupancy $n_i$, here we take the approximation that
483  *            $c_i = a_i + 0.5*(b_i-a_i)$ and optimize $\log P(a_i
484  *            \leq x < b_i) \simeq \log(w) + \log P(x=c_i)$.
485  *
486  *            Since $b_i-a_i = w$ is fixed, optimizing the above
487  *            becomes equivalent to optimizing $\sum_i n_i * log P(x=c_i)$.
488  *
489  *            The optimization is then equivalent to the non-binned case,
490  *            but subsituting in averages such as $\sum_i x(i)$ by
491  *            $\sum_i n_i*c_i i$, and so forth.
492  *
493  *            If the binned data in <g> were set to focus on
494  *            a tail by virtual censoring, the "complete" exponential is
495  *            fitted to this tail. The caller then also needs to
496  *            remember what fraction of the probability mass was in this
497  *            tail.
498  *
499  * Args:      g          - histogram
500  *            ret_mu     - RETURN: given by the histogram
501  *            ret_lambda - RETURN: ML estimate of lambda
502  *            ret_tau    - RETURN: ML estimate of tau
503  *
504  * Returns:   <eslOK> on success.
505  *
506  * Throws:    <eslENOHALT> if bracketing or bisection fails;
507  *            <eslEINVAL> if data cannot be gamma distributed (some <x[i] < mu>,
508  *            or zero variance in x).
509  *
510  *
511  * Returns:   <eslOK> on success.
512  *
513  * Throws:    <eslEINVAL> if dataset is true-censored.
514  */
515 int
esl_gam_FitCompleteBinned(ESL_HISTOGRAM * g,double * ret_mu,double * ret_lambda,double * ret_tau)516 esl_gam_FitCompleteBinned(ESL_HISTOGRAM *g, double *ret_mu, double *ret_lambda, double *ret_tau)
517 {
518   double mu = 0.;
519   double mean, logsum;
520   int    i;
521   double c, fc;
522   double a, fa;
523   double b, fb;
524   double tol = 1e-6;
525   int    maxit = 100;
526   int    status;
527 
528   if (g->dataset_is == COMPLETE)
529     {
530       if   (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin);
531       else                 mu = g->xmin;
532     }
533   else if (g->dataset_is == VIRTUAL_CENSORED) /* i.e., we'll fit to tail */
534     mu = g->phi;
535   else if (g->dataset_is == TRUE_CENSORED)
536     ESL_EXCEPTION(eslEINVAL, "can't fit true censored dataset");
537 
538   if ((status = tau_by_moments_binned(g, mu, &c, &mean, &logsum) != eslOK)) goto ERROR;
539   a = b = c;
540   if (c == 1.0) {
541     *ret_mu     = mu;
542     *ret_lambda = c / mean;
543     *ret_tau    = c;
544     return eslOK;
545   }
546   fc = tau_function(c, mean, logsum);
547 
548   /* Rootfinding, 1.: bracketing the root with points a,b.
549    */
550   if (fc > 0.)			/* fx>0 means tau is too small, search right */
551     {
552       for (i = 0; i < maxit; i++)	/* max iterations */
553 	{
554 	  b = a * 2.;
555 	  fb = tau_function(b, mean, logsum);
556 
557 	  if (fb < 0.) break;	/* a,b now bracket */
558 	  a = b;                /* else fb>0, so b is a better left bracket than a */
559 	}
560       if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "failed to bracket");
561     }
562   else if (fc < 0.)		/* fx<0 means tau is too large, search left */
563     {
564       for (i = 0; i < maxit; i++)
565 	{
566 	  a = b/2.;
567 	  fa = tau_function(a, mean, logsum);
568 	  if (fa > 0.) break;   /* a,b now bracket */
569 	  b = a;                /* else fa<0, so a is a better right bracket than b */
570 	}
571       if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "failed to bracket");
572     }
573 
574   /* Rootfinding, 2.: Bisection search.
575    * We have the root in interval (a,b).
576    */
577   for (i = 0; i < maxit; i++)
578     {
579       c  = (a+b)/2.;		/* bisection */
580       fc = tau_function(c, mean, logsum);
581 
582       if      (fc > 0.)  a = c;
583       else if (fc < 0.)  b = c;
584       else    break;		/* unlikely event that we nail it */
585 
586       if ((b-a) <= tol) {
587 	c  = (a+b)/2.;
588 	break;
589       }
590     }
591   if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "bisection search failed");
592 
593   *ret_mu     = mu;
594   *ret_lambda = (mean > 0.)? c / mean : 0.0;
595   *ret_tau    = c;
596   return eslOK;
597 
598  ERROR:
599   *ret_mu     = 0.;
600   *ret_lambda = 0.;
601   *ret_tau    = 0.;
602   return status;
603 }
604 
605 /* tau_by_moments_binned()
606  *
607  * similar to tau_by_moments()
608  * where mean=\sum_i x_i now becomes mean=\sum_i n(i)*ci, ...
609  *
610  * note: the whole method relies on the property log(sum) >= logsum;
611  * which works if all points are valide, that is positive;
612  * log(0) = -inf is not a valid point,
613  * and the inequality (Jensen's inequality) does not hold.
614  */
615 static int
tau_by_moments_binned(ESL_HISTOGRAM * g,double mu,double * ret_tau,double * ret_mean,double * ret_logsum)616 tau_by_moments_binned(ESL_HISTOGRAM *g, double mu, double *ret_tau, double *ret_mean, double *ret_logsum)
617 {
618   int    i;
619   double ai, bi, ci;
620   double sum, mean, var, logsum;
621   double tol = 1e-6;
622 
623   sum = mean = var = logsum = 0.;
624   for (i = g->cmin+1; i <= g->imax; i++) /* for each occupied bin */
625     {
626       if (g->obs[i] == 0) continue;
627       ai = esl_histogram_Bin2LBound(g,i);
628       bi = esl_histogram_Bin2UBound(g,i);
629       ci = ai + 0.5 * (bi-ai);
630 
631       if (ci < mu) ESL_EXCEPTION(eslEINVAL, "No point can be < mu in gamma data");
632       sum    += (double)g->obs[i];
633       mean   += (double)g->obs[i] * (ci-mu);	                   /* mean is temporarily just the sum */
634       logsum += (ci>mu)? (double)g->obs[i] * log(ci-mu):0.0;
635       var    += (double)g->obs[i] * (ci-mu) * (ci-mu);             /* var is temporarily the sum of squares */
636     }
637 
638   var     = (sum > 1.)? (var - mean*mean/sum) / (sum-1.) : 0.0; /* now var is the variance */
639   mean   /= (sum > 0.)? sum : 1.;	                                        /* and now mean is the mean */
640   logsum /= (sum > 0.)? sum : 1.;
641 
642   if (ret_tau    != NULL) *ret_tau    = (var < tol || mean == 0.)? 1. :  mean * mean / var;
643   if (ret_mean   != NULL) *ret_mean   = mean;
644   if (ret_logsum != NULL) *ret_logsum = logsum;
645   return eslOK;
646 }
647 
648 
649 
650 /****************************************************************************
651  * 6. Test driver
652  ****************************************************************************/
653 #ifdef eslGAMMA_TESTDRIVE
654 /* Compile:
655    gcc -g -Wall -I. -I ~/src/easel -L ~/src/easel -o test -DeslGAMMA_TESTDRIVE\
656     esl_gamma.c -leasel -lm
657 */
658 #include <stdio.h>
659 #include <stdlib.h>
660 #include <string.h>
661 
662 #include "easel.h"
663 #include "esl_random.h"
664 #include "esl_histogram.h"
665 #include "esl_gamma.h"
666 
667 int
main(int argc,char ** argv)668 main(int argc, char **argv)
669 {
670   ESL_HISTOGRAM  *h;
671   ESL_RANDOMNESS *r;
672   double  mu        = -5.0;
673   double  lambda    =  2.0;
674   double  tau       =  0.7;
675   int     n         = 10000;
676   double  binwidth  = 0.0001;
677   double  emu, elambda, etau;
678   int     i;
679   double  x;
680   double *data;
681   int     ndata;
682 
683   int     opti;
684   int     be_verbose   = FALSE;
685   char   *plotfile     = NULL;
686   FILE   *pfp          = stdout;
687   int     plot_pdf     = FALSE;
688   int     plot_logpdf  = FALSE;
689   int     plot_cdf     = FALSE;
690   int     plot_logcdf  = FALSE;
691   int     plot_surv    = FALSE;
692   int     plot_logsurv = FALSE;
693   int     xmin_set     = FALSE;
694   double  xmin;
695   int     xmax_set     = FALSE;
696   double  xmax;
697   int     xstep_set    = FALSE;
698   double  xstep;
699 
700   for (opti = 1; opti < argc && *(argv[opti]) == '-'; opti++)
701     {
702       if      (strcmp(argv[opti], "-m")  == 0) mu           = atof(argv[++opti]);
703       else if (strcmp(argv[opti], "-l")  == 0) lambda       = atof(argv[++opti]);
704       else if (strcmp(argv[opti], "-n")  == 0) n            = atoi(argv[++opti]);
705       else if (strcmp(argv[opti], "-o")  == 0) plotfile     = argv[++opti];
706       else if (strcmp(argv[opti], "-t")  == 0) tau          = atof(argv[++opti]);
707       else if (strcmp(argv[opti], "-v")  == 0) be_verbose   = TRUE;
708       else if (strcmp(argv[opti], "-w")  == 0) binwidth     = atof(argv[++opti]);
709       else if (strcmp(argv[opti], "-C")  == 0) plot_cdf     = TRUE;
710       else if (strcmp(argv[opti], "-LC") == 0) plot_logcdf  = TRUE;
711       else if (strcmp(argv[opti], "-P")  == 0) plot_pdf     = TRUE;
712       else if (strcmp(argv[opti], "-LP") == 0) plot_logpdf  = TRUE;
713       else if (strcmp(argv[opti], "-S")  == 0) plot_surv    = TRUE;
714       else if (strcmp(argv[opti], "-LS") == 0) plot_logsurv = TRUE;
715       else if (strcmp(argv[opti], "-XL") == 0) { xmin_set  = TRUE; xmin  = atof(argv[++opti]); }
716       else if (strcmp(argv[opti], "-XH") == 0) { xmax_set  = TRUE; xmax  = atof(argv[++opti]); }
717       else if (strcmp(argv[opti], "-XS") == 0) { xstep_set = TRUE; xstep = atof(argv[++opti]); }
718       else esl_fatal("bad option");
719     }
720 
721   if (be_verbose)
722     printf("Parametric:  mu = %f   lambda = %f    tau = %f\n", mu, lambda, tau);
723 
724   r = esl_randomness_Create(0);
725   h = esl_histogram_CreateFull(mu, 100., binwidth);
726   if (plotfile != NULL) {
727     if ((pfp = fopen(plotfile, "w")) == NULL) esl_fatal("Failed to open plotfile");
728   }
729   if (! xmin_set)  xmin  = mu;
730   if (! xmax_set)  xmax  = mu+40*(1./lambda);
731   if (! xstep_set) xstep = 0.1;
732 
733   for (i = 0; i < n; i++)
734     {
735       x = esl_gam_Sample(r, mu, lambda, tau);
736       esl_histogram_Add(h, x);
737     }
738   esl_histogram_GetData(h, &data, &ndata);
739 
740   esl_gam_FitComplete(data, ndata, mu, &elambda, &etau);
741   if (be_verbose)
742     printf("Complete data fit:  mu = %f   lambda = %f   tau = %f\n", mu, elambda, etau);
743   if (fabs( (elambda-lambda)/lambda ) > 0.10) esl_fatal("Error in (complete) fitted lambda > 10%%\n");
744   if (fabs( (etau-tau)/tau )          > 0.10) esl_fatal("Error in (complete) fitted tau > 10%%\n");
745 
746   esl_gam_FitCompleteBinned(h, &emu, &elambda, &etau);
747   if (be_verbose) printf("Binned data fit:  mu = %f   lambda = %f  tau = %f\n", emu, elambda, etau);
748   if (fabs( (emu-mu)/mu )             > 0.01) esl_fatal("Error in (binned) fitted mu > 1%%\n");
749   if (fabs( (elambda-lambda)/lambda ) > 0.10) esl_fatal("Error in (binned) fitted lambda > 10%%\n");
750   if (fabs( (etau-tau)/tau )          > 0.10) esl_fatal("Error in (binned) fitted tau > 10%%\n");
751 
752   if (plot_pdf)     esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_pdf,     xmin, xmax, xstep);
753   if (plot_logpdf)  esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logpdf,  xmin, xmax, xstep);
754   if (plot_cdf)     esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_cdf,     xmin, xmax, xstep);
755   if (plot_logcdf)  esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logcdf,  xmin, xmax, xstep);
756   if (plot_surv)    esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_surv,    xmin, xmax, xstep);
757   if (plot_logsurv) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logsurv, xmin, xmax, xstep);
758 
759   if (plotfile != NULL) fclose(pfp);
760 
761   esl_randomness_Destroy(r);
762   esl_histogram_Destroy(h);
763   return 0;
764 }
765 #endif /*eslGAMMA_TESTDRIVE*/
766 
767 /****************************************************************************
768  * Example main()
769  ****************************************************************************/
770 #ifdef eslGAMMA_EXAMPLE
771 /*::cexcerpt::gam_example::begin::*/
772 #include <stdio.h>
773 #include "easel.h"
774 #include "esl_random.h"
775 #include "esl_histogram.h"
776 #include "esl_gamma.h"
777 
778 int
main(int argc,char ** argv)779 main(int argc, char **argv)
780 {
781   double mu         = -5.0;
782   double lambda     = 2.0;
783   double tau        = 0.7;
784   ESL_HISTOGRAM  *h = esl_histogram_CreateFull(mu, 100., 0.1);
785   ESL_RANDOMNESS *r = esl_randomness_Create(0);
786   int    n          = 10000;
787   double elam, etau;
788   int    i;
789   double x;
790   double *data;
791   int     ndata;
792 
793   /* Take <n> gamma-distributed random samples. */
794   for (i = 0; i < n; i++)
795     {
796       x  =  esl_gam_Sample(r, mu, lambda, tau);
797       esl_histogram_Add(h, x);
798     }
799   esl_histogram_GetData(h, &data, &ndata);
800 
801   /* Plot the empirical (sampled) and expected survivals */
802   esl_histogram_PlotSurvival(stdout, h);
803   esl_gam_Plot(stdout, mu, lambda, tau,
804 	       &esl_gam_surv,  h->xmin, h->xmax, 0.1);
805 
806   /* ML fit to complete data, and plot fitted survival curve */
807   esl_gam_FitComplete(data, ndata, mu, &elam, &etau);
808   esl_gam_Plot(stdout, mu, elam, etau,
809 	       &esl_gam_surv,  h->xmin, h->xmax, 0.1);
810 
811   esl_randomness_Destroy(r);
812   esl_histogram_Destroy(h);
813   return 0;
814 }
815 /*::cexcerpt::gam_example::end::*/
816 #endif /*eslGAMMA_EXAMPLE*/
817 
818 
819