1 /* Statistical routines for gamma distributions.
2 *
3 * Contents:
4 * 1. Routines for evaluating densities and distributions
5 * 2. Generic API routines: for general interface w/ histogram module
6 * 3. Dumping plots for files
7 * 4. Sampling
8 * 5. ML fitting to complete data
9 * 6. Test driver
10 * 7. Example
11 *
12 * Xref: STL10/65
13 *
14 * To do:
15 * - Fit*() functions should return eslEINVAL on n=0, eslENORESULT
16 * on failure due to small n. Compare esl_gumbel. xref J12/93.
17 * SRE, Wed Nov 27 11:18:19 2013
18 */
19 #include "esl_config.h"
20
21 #include <stdio.h>
22 #include <math.h>
23 #include <float.h>
24
25 #include "easel.h"
26 #include "esl_histogram.h"
27 #include "esl_random.h"
28 #include "esl_stats.h"
29
30 #include "esl_gamma.h"
31
32 static int tau_by_moments(double *x, int n, double mu, double *ret_tau,
33 double *ret_mean, double *ret_logsum);
34 static int tau_by_moments_binned(ESL_HISTOGRAM *g, double mu, double *ret_tau,
35 double *ret_mean, double *ret_logsum);
36 static double tau_function(double tau, double mean, double logsum);
37
38
39 /****************************************************************************
40 * 1. Routines for evaluating densities and distributions
41 ****************************************************************************/
42
43 /* Function: esl_gam_pdf()
44 *
45 * Purpose: Calculates the gamma PDF $P(X=x)$ given value <x>,
46 * location parameter <mu>, scale parameter <lambda>, and shape
47 * parameter <tau>.
48 */
49 double
esl_gam_pdf(double x,double mu,double lambda,double tau)50 esl_gam_pdf(double x, double mu, double lambda, double tau)
51 {
52 double y = lambda * (x - mu);
53 double gamtau;
54 double val;
55
56 if (y < 0.) return 0.;
57
58 esl_stats_LogGamma(tau, &gamtau);
59 val = ((tau*log(lambda) + (tau-1.)*log(x-mu)) - gamtau) - y;
60 return exp(val);
61 }
62
63 /* Function: esl_gam_logpdf()
64 *
65 * Purpose: Calculates log of the probability density function
66 * for the gamma, $\log P(X=x)$, given value <x>,
67 * location parameter <mu>, scale parameter <lambda>, and
68 * shape parameter <tau>.
69 */
70 double
esl_gam_logpdf(double x,double mu,double lambda,double tau)71 esl_gam_logpdf(double x, double mu, double lambda, double tau)
72 {
73 double y = lambda * (x - mu);
74 double gamtau;
75 double val;
76
77 if (x < 0.) return -eslINFINITY;
78
79 esl_stats_LogGamma(tau, &gamtau);
80 val = ((tau*log(lambda) + (tau-1.)*log(x-mu)) - gamtau) - y;
81 return val;
82 }
83
84 /* Function: esl_gam_cdf()
85 *
86 * Purpose: Calculates the cumulative distribution function
87 * for the gamma, $P(X \leq x)$, given value <x>,
88 * location parameter <mu>, scale parameter <lambda>, and
89 * shape parameter <tau>.
90 *
91 * (For $\mu=0$, $\lambda = 1$, this is the
92 * incomplete Gamma function $P(\tau,x)$.)
93 */
94 double
esl_gam_cdf(double x,double mu,double lambda,double tau)95 esl_gam_cdf(double x, double mu, double lambda, double tau)
96 {
97 double y = lambda * (x - mu);
98 double val;
99
100 if (y <= 0.) return 0.;
101
102 esl_stats_IncompleteGamma(tau, y, &val, NULL);
103 return val;
104 }
105
106 /* Function: esl_gam_logcdf()
107 *
108 * Purpose: Calculates the log of the cumulative distribution function
109 * for the gamma, $\log P(X \leq x)$, given value <x>, location
110 * parameter <mu>, scale parameter <lambda>, and shape
111 * parameter <tau>.
112 */
113 double
esl_gam_logcdf(double x,double mu,double lambda,double tau)114 esl_gam_logcdf(double x, double mu, double lambda, double tau)
115 {
116 double y = lambda * (x - mu);
117 double val;
118
119 if (y <= 0.) return -eslINFINITY;
120
121 esl_stats_IncompleteGamma(tau, y, &val, NULL);
122 return log(val);
123 }
124
125 /* Function: esl_gam_surv()
126 *
127 * Purpose: Calculates the survival function for the gamma, $P(X > x)$,
128 * given value <x>, location parameter <mu>, scale parameter
129 * <lambda>, and shape parameter <tau>.
130 */
131 double
esl_gam_surv(double x,double mu,double lambda,double tau)132 esl_gam_surv(double x, double mu, double lambda, double tau)
133 {
134 double y = lambda * (x - mu);
135 double val;
136
137 if (y <= 0.) return 1.0;
138
139 esl_stats_IncompleteGamma(tau, y, NULL, &val);
140 return val;
141 }
142
143
144 /* Function: esl_gam_logsurv()
145 *
146 * Purpose: Calculates the log of the survival function for the gamma,
147 * $\log P(X > x)$, given value <x>, location parameter <mu>,
148 * scale parameter <lambda>, and shape parameter <tau>.
149 *
150 * Relies on <esl_stats_IncompleteGamma()>, which has limited
151 * dynamic range. Any result of < -700 or so will be -infinity.
152 * To fix this, we need a log version of <esl_stats_IncompleteGamma()>.
153 */
154 double
esl_gam_logsurv(double x,double mu,double lambda,double tau)155 esl_gam_logsurv(double x, double mu, double lambda, double tau)
156 {
157 double y = lambda * (x - mu);
158 double val;
159
160 if (y <= 0.) return 0.;
161
162 esl_stats_IncompleteGamma(tau, y, NULL, &val);
163 return log(val);
164 }
165
166
167 /* Function: esl_gam_invcdf()
168 *
169 * Purpose: Calculates the inverse CDF for a gamma with location
170 * parameter <mu>, scale parameter <lambda> and shape
171 * parameter <tau>, returning the value <x> at which the
172 * CDF is <p>.
173 *
174 * This inverse CDF is solved by a computationally expensive,
175 * brute force bisection search on the CDF of <x>.
176 */
177 double
esl_gam_invcdf(double p,double mu,double lambda,double tau)178 esl_gam_invcdf(double p, double mu, double lambda, double tau)
179 {
180 double x1, x2, xm; /* low, high guesses at x */
181 double f2, fm;
182 double tol = 1e-6;
183
184 x1 = 0.;
185 x2 = tau / lambda;
186 do { /* bracket */
187 x2 = x2*2.;
188 f2 = esl_gam_cdf(x2, mu, lambda, tau);
189 } while (f2 < p);
190
191 do { /* bisection */
192 xm = (x1+x2)/ 2.;
193 fm = esl_gam_cdf(xm, mu, lambda, tau);
194
195 if (fm > p) x2 = xm;
196 else if (fm < p) x1 = xm;
197 else return xm; /* unlikely exact fm==p */
198 } while ( (x2-x1)/(x1+x2) > tol);
199
200 xm = (x1+x2)/2.;
201 return xm;
202 }
203 /*-------------------- end densities & distributions ------------------------*/
204
205
206
207
208 /****************************************************************************
209 * 2. Generic API routines: for general interface w/ histogram module
210 ****************************************************************************/
211
212 /* Function: esl_gam_generic_pdf()
213 *
214 * Purpose: Generic-API wrapper around <esl_gam_pdf()>, taking
215 * a void ptr to a double array containing $\mu$, $\lambda$,
216 * $\tau$ parameters.
217 */
218 double
esl_gam_generic_pdf(double x,void * params)219 esl_gam_generic_pdf(double x, void *params)
220 {
221 double *p = (double *) params;
222 return esl_gam_pdf(x, p[0], p[1], p[2]);
223 }
224
225
226 /* Function: esl_gam_generic_cdf()
227 *
228 * Purpose: Generic-API wrapper around <esl_gam_cdf()>, taking
229 * a void ptr to a double array containing $\mu$, $\lambda$,
230 * $\tau$ parameters.
231 */
232 double
esl_gam_generic_cdf(double x,void * params)233 esl_gam_generic_cdf(double x, void *params)
234 {
235 double *p = (double *) params;
236 return esl_gam_cdf(x, p[0], p[1], p[2]);
237 }
238
239
240 /* Function: esl_gam_generic_surv()
241 *
242 * Purpose: Generic-API wrapper around <esl_gam_surv()>, taking
243 * a void ptr to a double array containing $\mu$, $\lambda$,
244 * $\tau$ parameters.
245 */
246 double
esl_gam_generic_surv(double x,void * params)247 esl_gam_generic_surv(double x, void *params)
248 {
249 double *p = (double *) params;
250 return esl_gam_surv(x, p[0], p[1], p[2]);
251 }
252
253
254 /* Function: esl_gam_generic_invcdf()
255 *
256 * Purpose: Generic-API wrapper around <esl_gam_invcdf()>, taking
257 * a void ptr to a double array containing $\mu$, $\lambda$,
258 * $\tau$ parameters.
259 */
260 double
esl_gam_generic_invcdf(double x,void * params)261 esl_gam_generic_invcdf(double x, void *params)
262 {
263 double *p = (double *) params;
264 return esl_gam_invcdf(x, p[0], p[1], p[2]);
265 }
266 /*------------------------ end generic API ---------------------------------*/
267
268
269
270 /****************************************************************************
271 * 3. Dumping plots for files
272 ****************************************************************************/
273
274 /* Function: esl_gam_Plot()
275 *
276 * Purpose: Plot some gamma distribution function <func> (for instance,
277 * <esl_gam_pdf()>) for parameters <mu>, <lambda>, and <tau>, for
278 * a range of values x from <xmin> to <xmax> in steps of <xstep>;
279 * output to an open stream <fp> in xmgrace XY input format.
280 *
281 * Returns: <eslOK> on success.
282 *
283 * Throws: <eslEWRITE> on any system write error, such as a filled disk.
284 */
285 int
esl_gam_Plot(FILE * fp,double mu,double lambda,double tau,double (* func)(double x,double mu,double lambda,double tau),double xmin,double xmax,double xstep)286 esl_gam_Plot(FILE *fp, double mu, double lambda, double tau,
287 double (*func)(double x, double mu, double lambda, double tau),
288 double xmin, double xmax, double xstep)
289 {
290 double x;
291 for (x = xmin; x <= xmax; x += xstep)
292 if (fprintf(fp, "%f\t%g\n", x, (*func)(x, mu, lambda, tau)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "gamma plot write failed");
293 if (fprintf(fp, "&\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "gamma plot write failed");
294 return eslOK;
295 }
296 /*-------------------- end plot dumping routines ---------------------------*/
297
298
299 /****************************************************************************
300 * 4. Sampling
301 ****************************************************************************/
302 /* Function: esl_gam_Sample()
303 *
304 * Purpose: Sample a gamma-distributed random variate.
305 */
306 double
esl_gam_Sample(ESL_RANDOMNESS * r,double mu,double lambda,double tau)307 esl_gam_Sample(ESL_RANDOMNESS *r, double mu, double lambda, double tau)
308 {
309 double x;
310
311 x = esl_rnd_Gamma(r, tau);
312 return (mu + x / lambda);
313 }
314 /*--------------------------- end sampling ---------------------------------*/
315
316
317
318 /****************************************************************************
319 * 5. ML fitting to complete data
320 ****************************************************************************/
321
322 /* Function: esl_gam_FitComplete()
323 *
324 * Purpose: Given complete data consisting of <n> samples <x[0]..x[n-1]>,
325 * and a known location parameter <mu>, determine and return
326 * maximum likelihood parameters <ret_lambda> and <ret_tau>.
327 *
328 * Args: x - complete gamma-distributed data [0..n-1]
329 * n - number of samples in <x>
330 * mu - known location parameter
331 * ret_lambda - RETURN: ML estimate of lambda
332 * ret_tau - RETURN: ML estimate of tau
333 *
334 * Returns: <eslOK> on success.
335 *
336 * Throws: <eslENOHALT> if bracketing or bisection fails;
337 * <eslEINVAL> if data cannot be gamma distributed (some <x[i] < mu>,
338 * or zero variance in x).
339 *
340 * Xref: STL10/65.
341 */
342 int
esl_gam_FitComplete(double * x,int n,double mu,double * ret_lambda,double * ret_tau)343 esl_gam_FitComplete(double *x, int n, double mu, double *ret_lambda, double *ret_tau)
344 {
345 double mean, logsum;
346 int i;
347 double c, fc;
348 double a, fa;
349 double b, fb;
350 int status;
351
352 if ((status = tau_by_moments(x, n, mu, &c, &mean, &logsum) != eslOK)) goto ERROR;
353 a = b = c;
354 fc = tau_function(c, mean, logsum);
355
356 /* Rootfinding, 1.: bracketing the root with points a,b.
357 */
358 if (fc > 0.) /* fx>0 means tau is too small, search right */
359 {
360 for (i = 0; i < 100; i++) /* 100 = max iterations */
361 {
362 b = a * 2.;
363 fb = tau_function(b, mean, logsum);
364 if (fb < 0.) break; /* a,b now bracket */
365 a = b; /* else fb>0, so b is a better left bracket than a */
366 }
367 if (i == 100) ESL_XEXCEPTION(eslENOHALT, "failed to bracket");
368 }
369 else if (fc < 0.) /* fx<0 means tau is too large, search left */
370 {
371 for (i = 0; i < 100; i++)
372 {
373 a = b/2.;
374 fa = tau_function(a, mean, logsum);
375 if (fa > 0.) break; /* a,b now bracket */
376 b = a; /* else fa<0, so a is a better right bracket than b */
377 }
378 if (i == 100) ESL_XEXCEPTION(eslENOHALT, "failed to bracket");
379 }
380
381 /* Rootfinding, 2.: Bisection search.
382 * We have the root in interval (a,b).
383 */
384 for (i = 0; i < 100; i++)
385 {
386 c = (a+b)/2.; /* bisection */
387 fc = tau_function(c, mean, logsum);
388 if (fc > 0.) a = c;
389 else if (fc < 0.) b = c;
390 else break; /* unlikely event that we nail it */
391
392 if ((b-a) <= 2.* DBL_EPSILON) {
393 c = (a+b)/2.;
394 break;
395 }
396 }
397 if (i == 100) ESL_XEXCEPTION(eslENOHALT, "bisection search failed");
398
399 *ret_lambda = c / mean;
400 *ret_tau = c;
401 return eslOK;
402
403
404 ERROR:
405 *ret_lambda = 0.0;
406 *ret_tau = 0.0;
407 return status;
408 }
409
410 /* tau_by_moments()
411 *
412 * Obtain an initial estimate for tau by
413 * matching moments. Also returns mean and
414 * logsum, which we need for ML fitting.
415 * To obtain a lambda estimate, use
416 * lambda = tau / mean.
417 */
418 static int
tau_by_moments(double * x,int n,double mu,double * ret_tau,double * ret_mean,double * ret_logsum)419 tau_by_moments(double *x, int n, double mu, double *ret_tau, double *ret_mean, double *ret_logsum)
420 {
421 int i;
422 double mean, var, logsum;
423
424 mean = var = logsum = 0.;
425 for (i = 0; i < n; i++)
426 {
427 if (x[i] < mu) ESL_EXCEPTION(eslEINVAL, "No x[i] can be < mu in gamma data");
428 mean += x[i] - mu; /* mean is temporarily just the sum */
429 logsum += log(x[i] - mu);
430 var += (x[i]-mu)*(x[i]-mu); /* var is temporarily the sum of squares */
431 }
432 var = (var - mean*mean/(double)n) / ((double)n-1); /* now var is the variance */
433 mean /= (double) n; /* and now mean is the mean */
434 logsum /= (double) n;
435
436 if (var == 0.) /* and if mean = 0, var = 0 anyway. */
437 ESL_EXCEPTION(eslEINVAL, "Zero variance in allegedly gamma-distributed dataset");
438
439 if (ret_tau != NULL) *ret_tau = mean * mean / var;
440 if (ret_mean != NULL) *ret_mean = mean;
441 if (ret_logsum != NULL) *ret_logsum = logsum;
442 return eslOK;
443 }
444
445
446 /* tau_function()
447 *
448 * This is the rootfinding equation for tau...
449 * \ref{eqn:gamma_tau_root} in the documentation.
450 * mean is 1/N \sum (x_i - \mu)
451 * logsum is 1/N \sum \log (x_i - \mu)
452 * These are both independent of tau, and dependent
453 * on all data points, so we require the caller to
454 * precalculate them for us.
455 *
456 * This is a decreasing function of tau:
457 * the return value is > 0 when tau is too small,
458 * and < 0 when tau is too large.
459 */
460 static double
tau_function(double tau,double mean,double logsum)461 tau_function(double tau, double mean, double logsum)
462 {
463 double psitau;
464
465 esl_stats_Psi(tau, &psitau);
466 return ( ((log(tau) - psitau) - log(mean)) + logsum );
467 }
468
469 /* Function: esl_gam_FitCompleteBinned()
470 *
471 * Purpose: Fit a complete exponential distribution to the observed
472 * binned data in a histogram <g>, where each
473 * bin i holds some number of observed samples x with values from
474 * lower bound l to upper bound u (that is, $l < x \leq u$);
475 * determine and return maximum likelihood estimates for the
476 * parameters $\mu, \lambda, \tau$ and
477 * return them in <*ret_mu>, <*ret_lambda>, <*ret_tau>.
478 *
479 * Unlike the <esl_exp_FitCompleteBinned()> case where the
480 * ML fit optimizes $\sum_i n_i \log P(a_i \leq x < b_i)$
481 * where $a_i \leq b_i$ are the bounds of bin i with
482 * occupancy $n_i$, here we take the approximation that
483 * $c_i = a_i + 0.5*(b_i-a_i)$ and optimize $\log P(a_i
484 * \leq x < b_i) \simeq \log(w) + \log P(x=c_i)$.
485 *
486 * Since $b_i-a_i = w$ is fixed, optimizing the above
487 * becomes equivalent to optimizing $\sum_i n_i * log P(x=c_i)$.
488 *
489 * The optimization is then equivalent to the non-binned case,
490 * but subsituting in averages such as $\sum_i x(i)$ by
491 * $\sum_i n_i*c_i i$, and so forth.
492 *
493 * If the binned data in <g> were set to focus on
494 * a tail by virtual censoring, the "complete" exponential is
495 * fitted to this tail. The caller then also needs to
496 * remember what fraction of the probability mass was in this
497 * tail.
498 *
499 * Args: g - histogram
500 * ret_mu - RETURN: given by the histogram
501 * ret_lambda - RETURN: ML estimate of lambda
502 * ret_tau - RETURN: ML estimate of tau
503 *
504 * Returns: <eslOK> on success.
505 *
506 * Throws: <eslENOHALT> if bracketing or bisection fails;
507 * <eslEINVAL> if data cannot be gamma distributed (some <x[i] < mu>,
508 * or zero variance in x).
509 *
510 *
511 * Returns: <eslOK> on success.
512 *
513 * Throws: <eslEINVAL> if dataset is true-censored.
514 */
515 int
esl_gam_FitCompleteBinned(ESL_HISTOGRAM * g,double * ret_mu,double * ret_lambda,double * ret_tau)516 esl_gam_FitCompleteBinned(ESL_HISTOGRAM *g, double *ret_mu, double *ret_lambda, double *ret_tau)
517 {
518 double mu = 0.;
519 double mean, logsum;
520 int i;
521 double c, fc;
522 double a, fa;
523 double b, fb;
524 double tol = 1e-6;
525 int maxit = 100;
526 int status;
527
528 if (g->dataset_is == COMPLETE)
529 {
530 if (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin);
531 else mu = g->xmin;
532 }
533 else if (g->dataset_is == VIRTUAL_CENSORED) /* i.e., we'll fit to tail */
534 mu = g->phi;
535 else if (g->dataset_is == TRUE_CENSORED)
536 ESL_EXCEPTION(eslEINVAL, "can't fit true censored dataset");
537
538 if ((status = tau_by_moments_binned(g, mu, &c, &mean, &logsum) != eslOK)) goto ERROR;
539 a = b = c;
540 if (c == 1.0) {
541 *ret_mu = mu;
542 *ret_lambda = c / mean;
543 *ret_tau = c;
544 return eslOK;
545 }
546 fc = tau_function(c, mean, logsum);
547
548 /* Rootfinding, 1.: bracketing the root with points a,b.
549 */
550 if (fc > 0.) /* fx>0 means tau is too small, search right */
551 {
552 for (i = 0; i < maxit; i++) /* max iterations */
553 {
554 b = a * 2.;
555 fb = tau_function(b, mean, logsum);
556
557 if (fb < 0.) break; /* a,b now bracket */
558 a = b; /* else fb>0, so b is a better left bracket than a */
559 }
560 if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "failed to bracket");
561 }
562 else if (fc < 0.) /* fx<0 means tau is too large, search left */
563 {
564 for (i = 0; i < maxit; i++)
565 {
566 a = b/2.;
567 fa = tau_function(a, mean, logsum);
568 if (fa > 0.) break; /* a,b now bracket */
569 b = a; /* else fa<0, so a is a better right bracket than b */
570 }
571 if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "failed to bracket");
572 }
573
574 /* Rootfinding, 2.: Bisection search.
575 * We have the root in interval (a,b).
576 */
577 for (i = 0; i < maxit; i++)
578 {
579 c = (a+b)/2.; /* bisection */
580 fc = tau_function(c, mean, logsum);
581
582 if (fc > 0.) a = c;
583 else if (fc < 0.) b = c;
584 else break; /* unlikely event that we nail it */
585
586 if ((b-a) <= tol) {
587 c = (a+b)/2.;
588 break;
589 }
590 }
591 if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "bisection search failed");
592
593 *ret_mu = mu;
594 *ret_lambda = (mean > 0.)? c / mean : 0.0;
595 *ret_tau = c;
596 return eslOK;
597
598 ERROR:
599 *ret_mu = 0.;
600 *ret_lambda = 0.;
601 *ret_tau = 0.;
602 return status;
603 }
604
605 /* tau_by_moments_binned()
606 *
607 * similar to tau_by_moments()
608 * where mean=\sum_i x_i now becomes mean=\sum_i n(i)*ci, ...
609 *
610 * note: the whole method relies on the property log(sum) >= logsum;
611 * which works if all points are valide, that is positive;
612 * log(0) = -inf is not a valid point,
613 * and the inequality (Jensen's inequality) does not hold.
614 */
615 static int
tau_by_moments_binned(ESL_HISTOGRAM * g,double mu,double * ret_tau,double * ret_mean,double * ret_logsum)616 tau_by_moments_binned(ESL_HISTOGRAM *g, double mu, double *ret_tau, double *ret_mean, double *ret_logsum)
617 {
618 int i;
619 double ai, bi, ci;
620 double sum, mean, var, logsum;
621 double tol = 1e-6;
622
623 sum = mean = var = logsum = 0.;
624 for (i = g->cmin+1; i <= g->imax; i++) /* for each occupied bin */
625 {
626 if (g->obs[i] == 0) continue;
627 ai = esl_histogram_Bin2LBound(g,i);
628 bi = esl_histogram_Bin2UBound(g,i);
629 ci = ai + 0.5 * (bi-ai);
630
631 if (ci < mu) ESL_EXCEPTION(eslEINVAL, "No point can be < mu in gamma data");
632 sum += (double)g->obs[i];
633 mean += (double)g->obs[i] * (ci-mu); /* mean is temporarily just the sum */
634 logsum += (ci>mu)? (double)g->obs[i] * log(ci-mu):0.0;
635 var += (double)g->obs[i] * (ci-mu) * (ci-mu); /* var is temporarily the sum of squares */
636 }
637
638 var = (sum > 1.)? (var - mean*mean/sum) / (sum-1.) : 0.0; /* now var is the variance */
639 mean /= (sum > 0.)? sum : 1.; /* and now mean is the mean */
640 logsum /= (sum > 0.)? sum : 1.;
641
642 if (ret_tau != NULL) *ret_tau = (var < tol || mean == 0.)? 1. : mean * mean / var;
643 if (ret_mean != NULL) *ret_mean = mean;
644 if (ret_logsum != NULL) *ret_logsum = logsum;
645 return eslOK;
646 }
647
648
649
650 /****************************************************************************
651 * 6. Test driver
652 ****************************************************************************/
653 #ifdef eslGAMMA_TESTDRIVE
654 /* Compile:
655 gcc -g -Wall -I. -I ~/src/easel -L ~/src/easel -o test -DeslGAMMA_TESTDRIVE\
656 esl_gamma.c -leasel -lm
657 */
658 #include <stdio.h>
659 #include <stdlib.h>
660 #include <string.h>
661
662 #include "easel.h"
663 #include "esl_random.h"
664 #include "esl_histogram.h"
665 #include "esl_gamma.h"
666
667 int
main(int argc,char ** argv)668 main(int argc, char **argv)
669 {
670 ESL_HISTOGRAM *h;
671 ESL_RANDOMNESS *r;
672 double mu = -5.0;
673 double lambda = 2.0;
674 double tau = 0.7;
675 int n = 10000;
676 double binwidth = 0.0001;
677 double emu, elambda, etau;
678 int i;
679 double x;
680 double *data;
681 int ndata;
682
683 int opti;
684 int be_verbose = FALSE;
685 char *plotfile = NULL;
686 FILE *pfp = stdout;
687 int plot_pdf = FALSE;
688 int plot_logpdf = FALSE;
689 int plot_cdf = FALSE;
690 int plot_logcdf = FALSE;
691 int plot_surv = FALSE;
692 int plot_logsurv = FALSE;
693 int xmin_set = FALSE;
694 double xmin;
695 int xmax_set = FALSE;
696 double xmax;
697 int xstep_set = FALSE;
698 double xstep;
699
700 for (opti = 1; opti < argc && *(argv[opti]) == '-'; opti++)
701 {
702 if (strcmp(argv[opti], "-m") == 0) mu = atof(argv[++opti]);
703 else if (strcmp(argv[opti], "-l") == 0) lambda = atof(argv[++opti]);
704 else if (strcmp(argv[opti], "-n") == 0) n = atoi(argv[++opti]);
705 else if (strcmp(argv[opti], "-o") == 0) plotfile = argv[++opti];
706 else if (strcmp(argv[opti], "-t") == 0) tau = atof(argv[++opti]);
707 else if (strcmp(argv[opti], "-v") == 0) be_verbose = TRUE;
708 else if (strcmp(argv[opti], "-w") == 0) binwidth = atof(argv[++opti]);
709 else if (strcmp(argv[opti], "-C") == 0) plot_cdf = TRUE;
710 else if (strcmp(argv[opti], "-LC") == 0) plot_logcdf = TRUE;
711 else if (strcmp(argv[opti], "-P") == 0) plot_pdf = TRUE;
712 else if (strcmp(argv[opti], "-LP") == 0) plot_logpdf = TRUE;
713 else if (strcmp(argv[opti], "-S") == 0) plot_surv = TRUE;
714 else if (strcmp(argv[opti], "-LS") == 0) plot_logsurv = TRUE;
715 else if (strcmp(argv[opti], "-XL") == 0) { xmin_set = TRUE; xmin = atof(argv[++opti]); }
716 else if (strcmp(argv[opti], "-XH") == 0) { xmax_set = TRUE; xmax = atof(argv[++opti]); }
717 else if (strcmp(argv[opti], "-XS") == 0) { xstep_set = TRUE; xstep = atof(argv[++opti]); }
718 else esl_fatal("bad option");
719 }
720
721 if (be_verbose)
722 printf("Parametric: mu = %f lambda = %f tau = %f\n", mu, lambda, tau);
723
724 r = esl_randomness_Create(0);
725 h = esl_histogram_CreateFull(mu, 100., binwidth);
726 if (plotfile != NULL) {
727 if ((pfp = fopen(plotfile, "w")) == NULL) esl_fatal("Failed to open plotfile");
728 }
729 if (! xmin_set) xmin = mu;
730 if (! xmax_set) xmax = mu+40*(1./lambda);
731 if (! xstep_set) xstep = 0.1;
732
733 for (i = 0; i < n; i++)
734 {
735 x = esl_gam_Sample(r, mu, lambda, tau);
736 esl_histogram_Add(h, x);
737 }
738 esl_histogram_GetData(h, &data, &ndata);
739
740 esl_gam_FitComplete(data, ndata, mu, &elambda, &etau);
741 if (be_verbose)
742 printf("Complete data fit: mu = %f lambda = %f tau = %f\n", mu, elambda, etau);
743 if (fabs( (elambda-lambda)/lambda ) > 0.10) esl_fatal("Error in (complete) fitted lambda > 10%%\n");
744 if (fabs( (etau-tau)/tau ) > 0.10) esl_fatal("Error in (complete) fitted tau > 10%%\n");
745
746 esl_gam_FitCompleteBinned(h, &emu, &elambda, &etau);
747 if (be_verbose) printf("Binned data fit: mu = %f lambda = %f tau = %f\n", emu, elambda, etau);
748 if (fabs( (emu-mu)/mu ) > 0.01) esl_fatal("Error in (binned) fitted mu > 1%%\n");
749 if (fabs( (elambda-lambda)/lambda ) > 0.10) esl_fatal("Error in (binned) fitted lambda > 10%%\n");
750 if (fabs( (etau-tau)/tau ) > 0.10) esl_fatal("Error in (binned) fitted tau > 10%%\n");
751
752 if (plot_pdf) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_pdf, xmin, xmax, xstep);
753 if (plot_logpdf) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logpdf, xmin, xmax, xstep);
754 if (plot_cdf) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_cdf, xmin, xmax, xstep);
755 if (plot_logcdf) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logcdf, xmin, xmax, xstep);
756 if (plot_surv) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_surv, xmin, xmax, xstep);
757 if (plot_logsurv) esl_gam_Plot(pfp, mu, lambda, tau, &esl_gam_logsurv, xmin, xmax, xstep);
758
759 if (plotfile != NULL) fclose(pfp);
760
761 esl_randomness_Destroy(r);
762 esl_histogram_Destroy(h);
763 return 0;
764 }
765 #endif /*eslGAMMA_TESTDRIVE*/
766
767 /****************************************************************************
768 * Example main()
769 ****************************************************************************/
770 #ifdef eslGAMMA_EXAMPLE
771 /*::cexcerpt::gam_example::begin::*/
772 #include <stdio.h>
773 #include "easel.h"
774 #include "esl_random.h"
775 #include "esl_histogram.h"
776 #include "esl_gamma.h"
777
778 int
main(int argc,char ** argv)779 main(int argc, char **argv)
780 {
781 double mu = -5.0;
782 double lambda = 2.0;
783 double tau = 0.7;
784 ESL_HISTOGRAM *h = esl_histogram_CreateFull(mu, 100., 0.1);
785 ESL_RANDOMNESS *r = esl_randomness_Create(0);
786 int n = 10000;
787 double elam, etau;
788 int i;
789 double x;
790 double *data;
791 int ndata;
792
793 /* Take <n> gamma-distributed random samples. */
794 for (i = 0; i < n; i++)
795 {
796 x = esl_gam_Sample(r, mu, lambda, tau);
797 esl_histogram_Add(h, x);
798 }
799 esl_histogram_GetData(h, &data, &ndata);
800
801 /* Plot the empirical (sampled) and expected survivals */
802 esl_histogram_PlotSurvival(stdout, h);
803 esl_gam_Plot(stdout, mu, lambda, tau,
804 &esl_gam_surv, h->xmin, h->xmax, 0.1);
805
806 /* ML fit to complete data, and plot fitted survival curve */
807 esl_gam_FitComplete(data, ndata, mu, &elam, &etau);
808 esl_gam_Plot(stdout, mu, elam, etau,
809 &esl_gam_surv, h->xmin, h->xmax, 0.1);
810
811 esl_randomness_Destroy(r);
812 esl_histogram_Destroy(h);
813 return 0;
814 }
815 /*::cexcerpt::gam_example::end::*/
816 #endif /*eslGAMMA_EXAMPLE*/
817
818
819