1 /* Statistical routines for hyperexponential distributions.
2  *
3  * Contents:
4  *   1. The ESL_HYPEREXP object
5  *   2. Evaluating densities and distributions
6  *   3. Generic API routines: for general interface w/ histogram module
7  *   4. Dumping plots for files
8  *   5. Sampling
9  *   6. File input
10  *   7. ML fitting to complete data
11  *   8. ML fitting to binned data
12  *   9. Test driver
13  *  10. Example
14  *
15  * Xrefs:
16  *   STL9/140     :  original implementation
17  *   STL9/143-144 :  ML fitting to binned data
18  *
19  * To-do:
20  *   - Fit*() functions should return eslEINVAL on n=0, eslENORESULT
21  *     on failure due to small n. Compare esl_gumbel. xref J12/93.
22  *     SRE, Wed Nov 27 11:17:59 2013
23  */
24 #include "esl_config.h"
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <math.h>
29 
30 #include "easel.h"
31 #include "esl_exponential.h"
32 #include "esl_fileparser.h"
33 #include "esl_histogram.h"
34 #include "esl_minimizer.h"
35 #include "esl_random.h"
36 #include "esl_stats.h"
37 #include "esl_vectorops.h"
38 
39 #include "esl_hyperexp.h"
40 
41 /****************************************************************************
42  *# 1. The ESL_HYPEREXP object
43  ****************************************************************************/
44 
45 /* Function:  esl_hyperexp_Create()
46  *
47  * Purpose:   Creates an object to hold parameters for a <K>-component
48  *            hyperexponential.
49  *
50  *            Parameters in the object are initialized
51  *            ($q_k = \frac{1}{K}$, $\lambda_k = 1$, $\mu = 0$), but
52  *            the caller will want to set these according to its own
53  *            purposes.
54  *
55  * Args:      K  - number of components in the mixture
56  *
57  * Returns:   ptr to newly allocated/initialized <ESL_HYPEREXP> object.
58  *
59  * Throws:    NULL on allocation failure.
60  */
61 ESL_HYPEREXP *
esl_hyperexp_Create(int K)62 esl_hyperexp_Create(int K)
63 {
64   int           status;
65   ESL_HYPEREXP *h = NULL;
66   int           k;
67 
68   ESL_ALLOC(h, sizeof(ESL_HYPEREXP));
69   h->q = h->lambda = h->wrk = NULL;
70   h->fixlambda = NULL;
71   h->K         = K;
72   h->fixmix    = FALSE;
73 
74   ESL_ALLOC(h->q,         sizeof(double) * K);
75   ESL_ALLOC(h->lambda,    sizeof(double) * K);
76   ESL_ALLOC(h->wrk,       sizeof(double) * K);
77   ESL_ALLOC(h->fixlambda, sizeof(char)   * K);
78 
79   for (k = 0; k < K; k++)
80     {
81       h->q[k]        = 1. / (double) K;
82       h->lambda[k]   = 1.;
83       h->fixlambda[k]= 0;
84     }
85   h->mu = 0.;
86   return h;
87 
88  ERROR:
89   esl_hyperexp_Destroy(h);
90   return NULL;
91 }
92 
93 /* Function:  esl_hyperexp_Destroy()
94  *
95  * Purpose:   Deallocates the hyperexponential parameter object <h>.
96  *
97  * Args:      h  - ptr to the object to be deallocated.
98  *
99  * Returns:   (void).
100  */
101 void
esl_hyperexp_Destroy(ESL_HYPEREXP * h)102 esl_hyperexp_Destroy(ESL_HYPEREXP *h)
103 {
104   if (h == NULL) return;
105 
106   if (h->q        != NULL) free(h->q);
107   if (h->lambda   != NULL) free(h->lambda);
108   if (h->wrk      != NULL) free(h->wrk);
109   if (h->fixlambda!= NULL) free(h->fixlambda);
110   free(h);
111 }
112 
113 
114 /* Function:  esl_hyperexp_Copy()
115  *
116  * Purpose:   Makes a copy of the hyperexponential parameter object <src>
117  *            in <dest>. Caller must have already allocated <dest> to have
118  *            (at least) the same number of components as <src>.
119  *
120  * Args:      src   - object to be copied
121  *            dest  - allocated object to copy <src> into
122  *
123  * Returns:   <eslOK> on success.
124  *
125  * Throws:    <eslEINCOMPAT> if <dest> isn't allocated with enough
126  *            components to hold a copy of <src>.
127  */
128 int
esl_hyperexp_Copy(ESL_HYPEREXP * src,ESL_HYPEREXP * dest)129 esl_hyperexp_Copy(ESL_HYPEREXP *src, ESL_HYPEREXP *dest)
130 {
131   int k;
132 
133   if (dest->K < src->K)
134     ESL_EXCEPTION(eslEINCOMPAT, "hyperexponential too small to copy into");
135 
136   for (k = 0; k < src->K; k++)
137     {
138       dest->q[k]        = src->q[k];
139       dest->lambda[k]   = src->lambda[k];
140       dest->fixlambda[k]= src->fixlambda[k];
141     }
142   dest->mu     = src->mu;
143   dest->K      = src->K;
144   dest->fixmix = src->fixmix;
145   return eslOK;
146 }
147 
148 /* Function:  esl_hyperexp_FixedUniformMixture()
149  *
150  * Purpose:   Set the mixture coeffients to a uniform (1/K) distribution,
151  *            and fix them there so they aren't estimable parameters.
152  */
153 int
esl_hyperexp_FixedUniformMixture(ESL_HYPEREXP * h)154 esl_hyperexp_FixedUniformMixture(ESL_HYPEREXP *h)
155 {
156   int k;
157   for (k = 0; k < h->K; k++) h->q[k] = 1./(double)h->K;
158   h->fixmix = TRUE;
159   return eslOK;
160 }
161 
162 
163 /* Function:  esl_hyperexp_SortComponents()
164  *
165  * Purpose:   Rearrange the components in a hyperexponential in
166  *            order of lambda values, with the highest lambda first.
167  *
168  *            Stupid $O(K^2)$ selection sort algorithm here, because we
169  *            expect $K$ to be small.
170  */
171 int
esl_hyperexp_SortComponents(ESL_HYPEREXP * h)172 esl_hyperexp_SortComponents(ESL_HYPEREXP *h)
173 {
174   int    k, kp;
175   char   ctmp;
176   double dtmp;
177 
178   for (k = 0; k < h->K-1; k++)
179     {
180       kp = k + esl_vec_DArgMax(h->lambda+k, h->K-k);
181       if (k != kp)
182 	{
183 	  dtmp = h->q[k];         h->q[k]         = h->q[kp];         h->q[kp]         = dtmp;
184 	  dtmp = h->lambda[k];    h->lambda[k]    = h->lambda[kp];    h->lambda[kp]    = dtmp;
185 	  ctmp = h->fixlambda[k]; h->fixlambda[k] = h->fixlambda[kp]; h->fixlambda[kp] = ctmp;
186 	}
187     }
188   return eslOK;
189 }
190 
191 
192 /* Function:  esl_hyperexp_Write()
193  *
194  * Purpose:   Write hyperexponential parameters from <hxp> to an open <fp>.
195  *
196  *            The output format is suitable for input by <esl_hyperexp_Read()>.
197  *
198  * Returns:   <eslOK> on success.
199  *
200  * Throws:    <eslEWRITE> on any write error.
201  */
202 int
esl_hyperexp_Write(FILE * fp,ESL_HYPEREXP * hxp)203 esl_hyperexp_Write(FILE *fp, ESL_HYPEREXP *hxp)
204 {
205   int k;
206 
207   if (fprintf(fp, "%8d     # number of components\n", hxp->K)     < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
208   if (fprintf(fp, "%8.2f   # mu (for all components)\n", hxp->mu) < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
209   for (k = 0; k < hxp->K; k++)
210     if (fprintf(fp, "%8.6f %12.6f  # q[%d], lambda[%d]\n",
211 		hxp->q[k], hxp->lambda[k], k, k)                  < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
212   return eslOK;
213 }
214 
215 
216 /* Function:  esl_hyperexp_Dump()
217  *
218  * Purpose:   Dump hyperexponential parameters from <hxp> to an open <fp>,
219  *            all on one line with no comments.
220  *
221  *            The output format is suitable for input by
222  *            <esl_hyperexp_Read()>, like <esl_hyperexp_Write()>,
223  *            though it's intended as a diagnostic dump of the
224  *            contents of the object.
225  *
226  * Returns:   <eslOK> on success.
227  */
228 int
esl_hyperexp_Dump(FILE * fp,ESL_HYPEREXP * hxp)229 esl_hyperexp_Dump(FILE *fp, ESL_HYPEREXP *hxp)
230 {
231   int k;
232 
233   fprintf(fp, "%2d ", hxp->K);
234   fprintf(fp, "%6.2f ", hxp->mu);
235   for (k = 0; k < hxp->K; k++)
236     fprintf(fp, "%5.3f %9.6f ", hxp->q[k], hxp->lambda[k]);
237   fprintf(fp, "\n");
238   return eslOK;
239 }
240 
241 /*----------------- end ESL_HYPEREXP object maintenance --------------------*/
242 
243 
244 
245 /****************************************************************************
246  * 2. Evaluating densities and distributions
247  ****************************************************************************/
248 /* all lambda_k > 0
249  * all q_k are probabilities, \sum_k q_k = 1 [watch out for q_k=0 in log(q_k)].
250  * mu <= x < infinity   [mu=x is not a problem]
251  */
252 
253 /* Function:  esl_hxp_pdf()
254  *
255  * Purpose:   Returns the probability density function $P(X=x)$ for
256  *            quantile <x>, given hyperexponential parameters <h>.
257  */
258 double
esl_hxp_pdf(double x,ESL_HYPEREXP * h)259 esl_hxp_pdf(double x, ESL_HYPEREXP *h)
260 {
261   double pdf = 0.;
262   int    k;
263 
264   if (x < h->mu) return 0.;
265 
266   for (k = 0; k < h->K; k++)
267     pdf += h->q[k] * esl_exp_pdf(x, h->mu, h->lambda[k]);
268   return pdf;
269 }
270 
271 
272 /* Function:  esl_hxp_logpdf()
273  *
274  * Purpose:   Returns the log of the PDF ($\log P(X=x)$) for quantile <x>,
275  *            given hyperexponential parameters <h>.
276  */
277 double
esl_hxp_logpdf(double x,ESL_HYPEREXP * h)278 esl_hxp_logpdf(double x, ESL_HYPEREXP *h)
279 {
280   int    k;
281   double z;
282 
283   if (x < h->mu) return -eslINFINITY;
284 
285   for (k = 0; k < h->K; k++)
286     if (h->q[k] == 0.0)
287       h->wrk[k] = -eslINFINITY;
288     else
289       h->wrk[k] = log(h->q[k]) + esl_exp_logpdf(x, h->mu, h->lambda[k]);
290 
291   z = esl_vec_DLogSum(h->wrk, h->K);
292   return z;
293 }
294 
295 /* Function:  esl_hxp_cdf()
296  *
297  * Purpose:   Returns the cumulative distribution function $P(X \leq x)$
298  *            for quantile <x>, given hyperexponential parameters <h>.
299  */
300 double
esl_hxp_cdf(double x,ESL_HYPEREXP * h)301 esl_hxp_cdf(double x, ESL_HYPEREXP *h)
302 {
303   double cdf = 0.;
304   int    k;
305 
306   if (x < h->mu) return 0.;
307 
308   for (k = 0; k < h->K; k++)
309     cdf += h->q[k] * esl_exp_cdf(x, h->mu, h->lambda[k]);
310   return cdf;
311 }
312 
313 /* Function:  esl_hxp_logcdf()
314  *
315  * Purpose:   Returns the log of the CDF $\log P(X \leq x)$
316  *            for quantile <x>, given hyperexponential parameters <h>.
317  */
318 double
esl_hxp_logcdf(double x,ESL_HYPEREXP * h)319 esl_hxp_logcdf(double x, ESL_HYPEREXP *h)
320 {
321   int k;
322 
323   if (x < h->mu) return -eslINFINITY;
324 
325   for (k = 0; k < h->K; k++)
326     if (h->q[k] == 0.0)
327       h->wrk[k] = -eslINFINITY;
328     else
329       h->wrk[k] = log(h->q[k]) + esl_exp_logcdf(x, h->mu, h->lambda[k]);
330 
331   return esl_vec_DLogSum(h->wrk, h->K);
332 }
333 
334 
335 /* Function:  esl_hxp_surv()
336  *
337  * Purpose:   Returns the survivor function $P(X > x)$ (1-CDF)
338  *            for quantile <x>, given hyperexponential parameters <h>.
339  */
340 double
esl_hxp_surv(double x,ESL_HYPEREXP * h)341 esl_hxp_surv(double x, ESL_HYPEREXP *h)
342 {
343   double srv = 0.;
344   int    k;
345 
346   if (x < h->mu) return 1.0;
347 
348   for (k = 0; k < h->K; k++)
349     srv += h->q[k] * esl_exp_surv(x, h->mu, h->lambda[k]);
350   return srv;
351 }
352 
353 
354 /* Function:  esl_hxp_logsurv()
355  *
356  * Purpose:   Returns the log survivor function $\log P(X > x)$ (log(1-CDF))
357  *            for quantile <x>, given hyperexponential parameters <h>.
358  */
359 double
esl_hxp_logsurv(double x,ESL_HYPEREXP * h)360 esl_hxp_logsurv(double x, ESL_HYPEREXP *h)
361 {
362   int k;
363 
364   if (x < h->mu) return 0.0;
365 
366   for (k = 0; k < h->K; k++)
367     if (h->q[k] == 0.0)
368       h->wrk[k] = -eslINFINITY;
369     else
370       h->wrk[k] = log(h->q[k]) + esl_exp_logsurv(x, h->mu, h->lambda[k]);
371 
372   return esl_vec_DLogSum(h->wrk, h->K);
373 }
374 
375 /* Function:  esl_hxp_invcdf()
376  *
377  * Purpose:   Calculates the inverse CDF for a hyperexponential <h>
378  *            returning the quantile <x> at which the CDF is <p>.
379  *
380  *            The inverse CDF of a mixture model has no
381  *            analytical expression as far as I'm aware. The calculation
382  *            here is a computationally expensive, brute force bisection
383  *            search in <x> using the CDF function. It will suffice for
384  *            a small number of calls (for plotting applications, for example),
385  *            but it is not sufficient for a large number of calls.
386  */
387 double
esl_hxp_invcdf(double p,ESL_HYPEREXP * h)388 esl_hxp_invcdf(double p, ESL_HYPEREXP *h)
389 {
390   double x1, x2, xm;		/* low, high guesses at x */
391   double f2, fm;
392   double tol = 1e-6;
393 
394   x1 = h->mu;
395   x2 = h->mu + 1.;
396   do {				/* bracket */
397     x2 = x2 + 2.*(x2-x1);
398     f2 = esl_hxp_cdf(x2, h);
399   } while (f2 < p);
400 
401   do {				/* bisection */
402     xm = (x1+x2) / 2.;
403     fm = esl_hxp_cdf(xm, h);
404 
405     if      (fm > p) x2 = xm;
406     else if (fm < p) x1 = xm;
407     else return xm;		/* unlikely case of fm==cdf */
408   } while ( (x2-x1)/(x1+x2-2*h->mu) > tol);
409 
410   xm = (x1+x2) / 2.;
411   return xm;
412 
413 }
414 /*-------------------- end densities & distributions ------------------------*/
415 
416 
417 
418 
419 /****************************************************************************
420  * 3. Generic API routines: for general interface w/ histogram module
421  ****************************************************************************/
422 
423 /* Function:  esl_hxp_generic_pdf()
424  *
425  * Purpose:   Generic-API version of PDF call.
426  */
427 double
esl_hxp_generic_pdf(double x,void * params)428 esl_hxp_generic_pdf(double x, void *params)
429 {
430   ESL_HYPEREXP *h = (ESL_HYPEREXP *) params;
431   return esl_hxp_pdf(x, h);
432 }
433 
434 /* Function:  esl_hxp_generic_cdf()
435  *
436  * Purpose:   Generic-API version of CDF call.
437  */
438 double
esl_hxp_generic_cdf(double x,void * params)439 esl_hxp_generic_cdf(double x, void *params)
440 {
441   ESL_HYPEREXP *h = (ESL_HYPEREXP *) params;
442   return esl_hxp_cdf(x, h);
443 }
444 
445 /* Function:  esl_hxp_generic_surv()
446  *
447  * Purpose:   Generic-API version of survivor function.
448  */
449 double
esl_hxp_generic_surv(double x,void * params)450 esl_hxp_generic_surv(double x, void *params)
451 {
452   ESL_HYPEREXP *h = (ESL_HYPEREXP *) params;
453   return esl_hxp_surv(x, h);
454 }
455 
456 /* Function:  esl_hxp_generic_invcdf()
457  *
458  * Purpose:   Generic-API version of inverse CDF.
459  */
460 double
esl_hxp_generic_invcdf(double p,void * params)461 esl_hxp_generic_invcdf(double p, void *params)
462 {
463   ESL_HYPEREXP *h = (ESL_HYPEREXP *) params;
464   return esl_hxp_invcdf(p, h);
465 }
466 /*------------------------ end generic API ---------------------------------*/
467 
468 
469 
470 
471 
472 
473 /****************************************************************************
474  * 4. Dumping plots for files
475  ****************************************************************************/
476 
477 /* Function:  esl_hxp_Plot()
478  *
479  * Purpose:   Plot some function <func> (for instance, <esl_hxp_pdf()>)
480  *            for hyperexponential parameters <h>, for a range of
481  *            quantiles x from <xmin> to <xmax> in steps of <xstep>;
482  *            output to an open stream <fp> in xmgrace XY input format.
483  *
484  * Returns:   <eslOK> on success.
485  *
486  * Throws:    <eslEWRITE> on any system write error.
487  */
488 int
esl_hxp_Plot(FILE * fp,ESL_HYPEREXP * h,double (* func)(double x,ESL_HYPEREXP * h),double xmin,double xmax,double xstep)489 esl_hxp_Plot(FILE *fp, ESL_HYPEREXP *h,
490 	     double (*func)(double x, ESL_HYPEREXP *h),
491 	     double xmin, double xmax, double xstep)
492 {
493   double x;
494   for (x = xmin; x <= xmax; x += xstep)
495     if (fprintf(fp, "%f\t%g\n", x, (*func)(x, h)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "hyperexp plot write failed");
496   if (fprintf(fp, "&\n")                          < 0) ESL_EXCEPTION_SYS(eslEWRITE, "hyperexp plot write failed");
497   return eslOK;
498 }
499 /*-------------------- end plot dumping routines ---------------------------*/
500 
501 
502 
503 
504 
505 /****************************************************************************
506  * 5. Sampling
507  ****************************************************************************/
508 
509 /* Function:  esl_hxp_Sample()
510  *
511  * Purpose:   Sample a random variate x from a hyperexponential <h>,
512  *            given random number source <r>.
513  */
514 double
esl_hxp_Sample(ESL_RANDOMNESS * r,ESL_HYPEREXP * h)515 esl_hxp_Sample(ESL_RANDOMNESS *r, ESL_HYPEREXP *h)
516 {
517   int k;
518   k = esl_rnd_DChoose(r, h->q, h->K);
519   return esl_exp_Sample(r, h->mu, h->lambda[k]);
520 }
521 
522 /*--------------------------- end sampling ---------------------------------*/
523 
524 
525 
526 /****************************************************************************
527  * 6. File input (mixture models are a little too complex to set on commandline)
528  ****************************************************************************/
529 
530 /* Function:  esl_hyperexp_Read()
531  *
532  * Purpose:   Reads hyperexponential parameters from an open <e>.
533  *            which is an <ESL_FILEPARSER> tokenizer for an open stream.
534  *
535  *            The first token is <K>, the number of mixture components.
536  *            The second token is <mu>, the x offset shared by all components.
537  *            Then for each mixture component <k=1..K>, it reads
538  *            a mixture coefficient <q[k]> and a decay parameter
539  *            <lambda[k]>.
540  *
541  *            The <2K+2> data tokens must occur in this order, but
542  *            they can be grouped into any number of lines, because the
543  *            parser ignores line breaks.
544  *
545  *            Anything after a <\#> character on a line is a comment, and
546  *            is ignored.
547  *
548  * Returns:   <eslOK> on success, and <ret_hxp> points to a new <ESL_HYPEREXP>
549  *            object.
550  *            <eslEFORMAT> on "normal" parse failure caused by a bad file
551  *            format that's likely the user's fault.
552  *
553  * Throws:    <eslEMEM> if allocation of the new <ESL_HYPEREXP> fails.
554  *
555  *
556  * FIXME: All our mixture models (esl_dirichlet, for example) should be
557  *        reconciled w/ identical interfaces & behaviour.
558  */
559 int
esl_hyperexp_Read(ESL_FILEPARSER * e,ESL_HYPEREXP ** ret_hxp)560 esl_hyperexp_Read(ESL_FILEPARSER *e, ESL_HYPEREXP **ret_hxp)
561 {
562   ESL_HYPEREXP   *hxp = NULL;
563   char           *tok;
564   int             status = eslOK;
565   int             nc;
566   int             k;
567   double          sum;
568 
569   esl_fileparser_SetCommentChar(e, '#');
570 
571   if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR;
572   nc = atoi(tok);
573   if (nc < 1) {
574     sprintf(e->errbuf, "Expected # of components K >= 1 as first token");
575     goto ERROR;
576   }
577 
578   if ((hxp = esl_hyperexp_Create(nc)) == NULL) return eslEMEM; /* percolation */
579 
580   if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR;
581   hxp->mu = atof(tok);
582 
583   for (k = 0; k < hxp->K; k++)
584     {
585       if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR;
586       hxp->q[k] = atof(tok);
587 
588       if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR;
589       hxp->lambda[k] = atof(tok);
590 
591       if (hxp->q[k] < 0. || hxp->q[k] > 1.) {
592 	sprintf(e->errbuf, "Expected a mixture coefficient q[k], 0<=q[k]<=1");
593 	goto ERROR;
594       }
595       if (hxp->lambda[k] <= 0.) {
596 	sprintf(e->errbuf, "Expected a lambda parameter, lambda>0");
597 	goto ERROR;
598       }
599     }
600   sum = esl_vec_DSum(hxp->q, hxp->K);
601   if (fabs(sum-1.0) > 0.05) {
602     sprintf(e->errbuf, "Expected mixture coefficients to sum to 1");
603     goto ERROR;
604   }
605   esl_vec_DNorm(hxp->q, hxp->K);
606   *ret_hxp = hxp;
607   return eslOK;
608 
609  ERROR:
610   esl_hyperexp_Destroy(hxp);
611   return eslEFORMAT;
612 }
613 
614 /* Function:  esl_hyperexp_ReadFile()
615  *
616  * Purpose:   Convenience wrapper around <esl_hyperexp_Read()> that takes
617  *            a filename as an argument, instead of an open <ESL_FILEPARSER>.
618  *
619  *            This lets you quickly read an object from a file, but it
620  *            limits your ability to deal gracefully and flexibly with
621  *            'normal' errors like 'file not found' or 'bad file format'.
622  *            Here, all errors are fatal.
623  *
624  * Returns:   <eslOK> on success.
625  *
626  * Throws:    <eslEMEM> on an allocation failure.
627  *
628  *            <eslEFORMAT> on any parse error. Diagnostic information is
629  *            unavailable, because the <ESL_FILEPARSER> that's holding
630  *            that information is internal to this function.
631  *
632  *            <eslENOTFOUND> on any failure to open the file.
633  */
634 int
esl_hyperexp_ReadFile(char * filename,ESL_HYPEREXP ** ret_hxp)635 esl_hyperexp_ReadFile(char *filename, ESL_HYPEREXP **ret_hxp)
636 {
637   FILE           *fp;
638   ESL_FILEPARSER *e;
639   int             status;
640 
641   if ((fp = fopen(filename, "r")) == NULL)
642     ESL_EXCEPTION(eslENOTFOUND, "file not found");
643 
644   if ((e = esl_fileparser_Create(fp)) == NULL) {
645     fclose(fp);
646     ESL_EXCEPTION(eslEMEM, "failed to create fileparser");
647   }
648   esl_fileparser_SetCommentChar(e, '#');
649 
650   status = esl_hyperexp_Read(e, ret_hxp);
651 
652   esl_fileparser_Destroy(e);
653   fclose(fp);
654   return status;
655 }
656 
657 
658 
659 
660 /****************************************************************************
661  * 7. ML fitting to complete data
662  ****************************************************************************/
663 
664 /* This structure is used to sneak the data into minimizer's generic
665  * (void *) API for all aux data
666  */
667 struct hyperexp_data {
668   double *x;
669   int     n;
670   ESL_HYPEREXP *h;
671 };
672 
673 /* Given hyperexponential parameters in <h>;
674  * do appropriate c.o.v.'s to unconstrained real parameters
675  * and fill in the packed parameter vector <p>.
676  *
677  * <p> must be allocated for at least (2K-1) doubles: K-1 mixture
678  * coefficients and K lambda parameters. (mu is not a free param).
679  *
680  * First K-1 are $Q_1..Q_{K-1}$ mixture coefficient parameters; $Q_0$ implicitly 0;
681  *  cov is $q_k = \frac{e^{Q_k}}{\sum_j e^{Q_j}}$;  $Q_k = \log(q_k) - \log(q_0)$.
682  * Then K lambda params;
683  * lambda cov is $\lambda = e^w$, $w = \log(\lambda)$.
684  */
685 static void
hyperexp_pack_paramvector(double * p,int np,ESL_HYPEREXP * h)686 hyperexp_pack_paramvector(double *p, int np, ESL_HYPEREXP *h)
687 {
688   int    i;			/* counter in parameter vector p */
689   int    k;			/* counter in mixture components */
690   double z;			/* tmp variable */
691 
692   /* mixture coefficients */
693   i = 0;
694   if (! h->fixmix) {
695     z = log(h->q[0]);
696     for (k = 1; k < h->K; k++)
697       p[i++] = log(h->q[k]) - z;
698   }
699 
700   /* exponential parameters */
701   for (k = 0; k < h->K; k++)
702     if (! h->fixlambda[k])
703       p[i++] = log(h->lambda[k]);
704 }
705 
706 /* Same as above but in reverse: given parameter vector <p>,
707  * <np> = 2K-1, do appropriate c.o.v. back to desired parameter space, and
708  * update the hyperexponential <h>.
709  */
710 static void
hyperexp_unpack_paramvector(double * p,int np,ESL_HYPEREXP * h)711 hyperexp_unpack_paramvector(double *p, int np, ESL_HYPEREXP *h)
712 {
713   int    i;			/* counter in parameter vector p */
714   int    k;			/* counter in mixture components */
715   double z;			/* tmp variable  */
716 
717   /* Fetch the params in their c.o.v. space first
718    */
719   i = 0;
720   if (! h->fixmix) {
721     h->q[0] = 0;	/* implicitly */
722     for (k = 1; k < h->K; k++)
723       h->q[k] = p[i++];
724   }
725   for (k = 0; k < h->K; k++)
726     if (! h->fixlambda[k])
727       h->lambda[k] = p[i++];
728 
729   /* Convert mix coefficients back to probabilities;
730    * their  c.o.v. is q_k = e^{Q_k} / \sum_k e^{Q_k}
731    * which rearranges to exp(Q_k - log[\sum_k e^Q_k]),
732    * and we have the DLogSum() function to compute the log sum.
733    */
734   if (! h->fixmix) {
735     z = esl_vec_DLogSum(h->q, h->K);
736     for (k = 0; k < h->K; k++)
737       h->q[k] = exp(h->q[k] - z);
738   }
739 
740   /* lambda c.o.v. is \lambda = e^w */
741   for (k = 0; k < h->K; k++)
742     if (! h->fixlambda[k])
743       h->lambda[k] = exp(h->lambda[k]);
744 }
745 
746 /* The log likelihood function to be optimized by ML fitting:
747  *   This needs to be careful of a case where a lambda = inf.
748  */
749 static double
hyperexp_complete_func(double * p,int np,void * dptr)750 hyperexp_complete_func(double *p, int np, void *dptr)
751 {
752   struct hyperexp_data *data = (struct hyperexp_data *) dptr;
753   ESL_HYPEREXP         *h    = data->h;
754   double logL = 0.;
755   int    i;
756 
757   hyperexp_unpack_paramvector(p, np, h);
758   for (i = 0; i < data->n; i++)
759     logL += esl_hxp_logpdf(data->x[i], h);
760   return -logL;
761 }
762 
763 /* The gradient of the NLL w.r.t. each free parameter in p.
764  */
765 static void
hyperexp_complete_gradient(double * p,int np,void * dptr,double * dp)766 hyperexp_complete_gradient(double *p, int np, void *dptr, double *dp)
767 {
768   struct hyperexp_data *data = (struct hyperexp_data *) dptr;
769   ESL_HYPEREXP         *h    = data->h;
770   double pdf;
771   int i,k;
772   int pidx;
773 
774   hyperexp_unpack_paramvector(p, np, h);
775   esl_vec_DSet(dp, np, 0.);
776   for (i = 0; i < data->n; i++)
777     {
778       /* FIXME: I think the calculation below may need to be done
779        * in log space, to avoid underflow errors; see complete_binned_gradient()
780        */
781       /* Precalculate q_k PDF_k(x) terms, and their sum */
782       for (k = 0; k < h->K; k++)
783 	h->wrk[k] = h->q[k] * esl_exp_pdf(data->x[i], h->mu, h->lambda[k]);
784       pdf = esl_vec_DSum(h->wrk, h->K);
785 
786       pidx = 0;
787       if (! h->fixmix) {
788 	for (k = 1; k < h->K; k++) /* generic d/dQ solution for mixture models */
789 	  dp[pidx++] -= h->wrk[k]/pdf - h->q[k];
790       }
791 
792       for (k = 0; k < h->K; k++)
793 	if (! h->fixlambda[k])
794 	  dp[pidx++] -= (1.-h->lambda[k]*(data->x[i]-h->mu))*h->wrk[k]/pdf; /* d/dw */
795     }
796 }
797 
798 
799 /* Function:  esl_hxp_FitGuess()
800  *
801  * Purpose:   Given a sorted vector of <n> observed data samples <x[]>,
802  *            from smallest <x[0]> to largest <x[n-1]>, calculate a
803  *            very crude guesstimate of a fit -- suitable only as a starting
804  *            point for further optimization -- and return those parameters
805  *            in <h>.
806  *
807  *            Assigns $q_k \propto \frac{1}{k}$ and  $\mu = \min_i x_i$;
808  *            splits $x$ into $K$ roughly equal-sized bins, and
809  *            and assigns $\lambda_k$ as the ML estimate from bin $k$.
810  *            (If $q_k$ coefficients have already been fixed to
811  *            known values, this step is skipped.)
812  */
813 int
esl_hxp_FitGuess(double * x,int n,ESL_HYPEREXP * h)814 esl_hxp_FitGuess(double *x, int n, ESL_HYPEREXP *h)
815 {
816   double tmu;			/* current mu */
817   double mean;			/* mean (x-tmu) in a bin */
818   int    i,k;
819   int    imin, imax;
820 
821   h->mu = x[0];  /* minimum */
822   for (k = 0; k < h->K; k++)
823     {
824       if (! h->fixmix)
825 	h->q[k] = 1 / (double)(k+1); /* priors ~ 1, 1/2, 1/3... */
826 
827       imin = (int) ((double)(k*n)/(double)h->K);
828       imax = (int) ((double)((k+1)*n)/(double)h->K);
829       tmu = x[imin];
830       mean = 0.;
831       for (i = imin; i < imax; i++)
832 	mean += x[i] - tmu;
833       mean /= (double)(imax-imin);
834       h->lambda[k] = 1 / mean;
835     }
836   esl_vec_DNorm(h->q, h->K);
837   return eslOK;
838 }
839 
840 /* Function:  esl_hxp_FitComplete()
841  *
842  * Purpose:   Given a vector of <n> observed data samples <x[]>
843  *            (sorted or unsorted), and an initial guess <h> for
844  *            a hyperexponential, find maximum likelihood parameters
845  *            by conjugate gradient descent optimization, starting
846  *            from <h> and leaving the final optimized solution in
847  *            <h>.
848  *
849  * Returns:   <eslOK> on success, and <h> contains the fitted
850  *            hyperexponential parameters.
851  *
852  * Throws:    <eslEMEM> on allocation error, and <h> is left in
853  *            in its initial state.
854  */
855 int
esl_hxp_FitComplete(double * x,int n,ESL_HYPEREXP * h)856 esl_hxp_FitComplete(double *x, int n, ESL_HYPEREXP *h)
857 {
858   struct hyperexp_data data;
859   double *p   = NULL;
860   int     np;
861   double  fx;
862   int     i;
863   int     status;
864 
865   /* Determine number of free parameters and allocate */
866   np = 0;
867   if (! h->fixmix) np += h->K-1;  /* K-1 mix coefficients...     */
868   for (i = 0; i < h->K; i++)      /* ...and up to K lambdas free */
869     if (! h->fixlambda[i]) np++;
870   ESL_ALLOC(p,   sizeof(double) * np);
871 
872   /* Copy shared info into the "data" structure */
873   data.x   = x;
874   data.n   = n;
875   data.h   = h;
876 
877   /* From h, create the parameter vector. */
878   hyperexp_pack_paramvector(p, np, h);
879 
880   /* Feed it all to the mighty optimizer. */
881   status = esl_min_ConjugateGradientDescent(NULL, p, np,
882 					    &hyperexp_complete_func,
883 					    &hyperexp_complete_gradient,
884 					    (void *) (&data), &fx, NULL);
885   if (status != eslOK) goto ERROR;
886 
887   /* Convert the final parameter vector back to a hyperexponential
888    */
889   hyperexp_unpack_paramvector(p, np, h);
890 
891   free(p);
892   esl_hyperexp_SortComponents(h);
893   return eslOK;
894 
895  ERROR:
896   free(p);
897   return status;
898 }
899 
900 
901 /****************************************************************************
902  * 8. Maximum likelihood fitting, complete binned data         xref STL9/143-144
903  ****************************************************************************/
904 
905 /* minimizer API only allows us one generic void ptr to pass
906  * our data through:
907  */
908 struct hyperexp_binned_data {
909   ESL_HISTOGRAM *g;
910   ESL_HYPEREXP  *h;
911 };
912 
913 static double
hyperexp_complete_binned_func(double * p,int np,void * dptr)914 hyperexp_complete_binned_func(double *p, int np, void *dptr)
915 {
916   struct hyperexp_binned_data *data = (struct hyperexp_binned_data *) dptr;
917   ESL_HISTOGRAM               *g    = data->g;
918   ESL_HYPEREXP                *h    = data->h;
919   double logL = 0.;
920   double ai, delta;
921   int    i,k;
922 
923   hyperexp_unpack_paramvector(p, np, h);
924   delta = g->w;
925   /* counting over occupied, uncensored histogram bins */
926   for (i = g->cmin; i <= g->imax; i++)
927     {
928       if (g->obs[i] == 0) continue; /* skip unoccupied ones */
929 
930       ai    = esl_histogram_Bin2LBound(g, i);
931       if (ai < h->mu) ai = h->mu; /* careful about the left boundary: no x < h->mu */
932 
933       for (k = 0; k < h->K; k++)
934 	{
935 	  h->wrk[k] = log(h->q[k]) - h->lambda[k]*(ai-h->mu);
936 	  if (delta * h->lambda[k] < eslSMALLX1)
937 	    h->wrk[k] += log(delta * h->lambda[k]);
938 	  else
939 	    h->wrk[k] += log(1 - exp(-delta * h->lambda[k]));
940 	}
941       logL += g->obs[i] * esl_vec_DLogSum(h->wrk, h->K);
942     }
943   return -logL;
944 }
945 
946 static void
hyperexp_complete_binned_gradient(double * p,int np,void * dptr,double * dp)947 hyperexp_complete_binned_gradient(double *p, int np, void *dptr, double *dp)
948 {
949   struct hyperexp_binned_data *data = (struct hyperexp_binned_data *) dptr;
950   ESL_HISTOGRAM               *g    = data->g;
951   ESL_HYPEREXP                *h    = data->h;
952   int i,k;
953   int pidx;
954   double z;
955   double tmp;
956   double ai, delta;
957 
958   hyperexp_unpack_paramvector(p, np, h);
959   esl_vec_DSet(dp, np, 0.);
960   delta = g->w;
961 
962   /* counting over occupied, uncensored histogram bins */
963   for (i = g->cmin; i <= g->imax; i++)
964     {
965       if (g->obs[i] == 0) continue;
966       ai = esl_histogram_Bin2LBound(g, i);
967       if (ai < h->mu) ai = h->mu; /* careful about the left boundary: no x < h->mu */
968 
969       /* Calculate log (q_m alpha_m(a_i) terms
970        */
971       for (k = 0; k < h->K; k++)
972 	{
973 	  h->wrk[k] = log(h->q[k]) - h->lambda[k]*(ai-h->mu);
974 	  if (delta * h->lambda[k] < eslSMALLX1)
975 	    h->wrk[k] += log(delta * h->lambda[k]);
976 	  else
977 	    h->wrk[k] += log(1 - exp(-delta * h->lambda[k]));
978 	}
979       z = esl_vec_DLogSum(h->wrk, h->K); /* z= log \sum_k q_k alpha_k(a_i) */
980 
981       /* Bump the gradients for Q_1..Q_{K-1} */
982       pidx = 0;
983       if (! h->fixmix) {
984 	for (k = 1; k < h->K; k++)
985 	  dp[pidx++] -= g->obs[i] * (exp(h->wrk[k] - z) - h->q[k]);
986       }
987 
988       /* Bump the gradients for w_0..w_{K-1}
989        */
990       for (k = 0; k < h->K; k++)
991 	if (! h->fixlambda[k])
992 	  {
993 	    tmp  = log(h->q[k]) + log(h->lambda[k])- h->lambda[k]*(ai-h->mu);
994 	    tmp  = exp(tmp - z);
995 	    tmp *= (ai + delta - h->mu) * exp(-delta * h->lambda[k]) - (ai - h->mu);
996 	    dp[pidx++] -= g->obs[i] * tmp;
997 	  }
998     }
999 }
1000 
1001 /* Function:  esl_hxp_FitGuessBinned()
1002  *
1003  * Purpose:   Given a histogram <g> with binned observations;
1004  *            obtain a very crude guesstimate of a fit -- suitable only
1005  *            as a starting point for further optimization -- and return
1006  *            those parameters in <h>.
1007  *
1008  *            Assigns $q_k \propto \frac{1}{k}$ and  $\mu = \min_i x_i$;
1009  *            splits $x$ into $K$ roughly equal-sized bins, and
1010  *            and assigns $\lambda_k$ as the ML estimate from bin $k$.
1011  *            If the coefficients have already been set to known values,
1012  *            this step is skipped.
1013  */
1014 int
esl_hxp_FitGuessBinned(ESL_HISTOGRAM * g,ESL_HYPEREXP * h)1015 esl_hxp_FitGuessBinned(ESL_HISTOGRAM *g, ESL_HYPEREXP *h)
1016 {
1017   double sum;
1018   int    n;
1019   int    i,k;
1020   int    nb;
1021   double ai;
1022 
1023   if      (g->is_tailfit) h->mu = g->phi;  /* all x > mu in this case */
1024   else if (g->is_rounded) h->mu = esl_histogram_Bin2LBound(g, g->imin);
1025   else                    h->mu = g->xmin;
1026 
1027   nb    = g->imax - g->cmin + 1;
1028   k     = h->K-1;
1029   sum   = 0;
1030   n     = 0;
1031   for (i = g->imax; i >= g->cmin; i--)
1032     {
1033       ai = esl_histogram_Bin2LBound(g,i);
1034       if (ai < g->xmin) ai = g->xmin;
1035       n      += g->obs[i];
1036       sum    += g->obs[i] * ai;
1037 
1038       if (i == g->cmin + (k*nb)/h->K)
1039 	h->lambda[k--] = 1 / ((sum/(double) n) - ai);
1040     }
1041 
1042   if (! h->fixmix) {
1043     for (k = 0; k < h->K; k++)
1044       h->q[k] = 1 / (double) h->K;
1045   }
1046 
1047   return eslOK;
1048 }
1049 
1050 
1051 /* Function:  esl_hxp_FitCompleteBinned()
1052  *
1053  * Purpose:   Given a histogram <g> with binned observations, where each
1054  *            bin i holds some number of observed samples x with values from
1055  *            lower bound l to upper bound u (that is, $l < x \leq u$),
1056  *            and given a starting guess <h> for hyperexponential parameters;
1057  *
1058  *            Find maximum likelihood parameters <h> by conjugate gradient
1059  *            descent, starting from the initial <h> and leaving the
1060  *            optimized solution in <h>.
1061  *
1062  * Returns:   <eslOK> on success.
1063  *
1064  * Throws:    <eslEMEM> on allocation error, and <h> is left in its
1065  *            initial state.
1066  */
1067 int
esl_hxp_FitCompleteBinned(ESL_HISTOGRAM * g,ESL_HYPEREXP * h)1068 esl_hxp_FitCompleteBinned(ESL_HISTOGRAM *g, ESL_HYPEREXP *h)
1069 {
1070   struct hyperexp_binned_data data;
1071   double *p   = NULL;
1072   double  fx;
1073   int     i;
1074   int     np;
1075   int     status;
1076 
1077   np = 0;
1078   if (! h->fixmix) np = h->K-1;  /* K-1 mix coefficients...      */
1079   for (i = 0; i < h->K; i++)     /* ...and up to K lambdas free. */
1080     if (! h->fixlambda[i]) np++;
1081 
1082   ESL_ALLOC(p,   sizeof(double) * np);
1083 
1084   /* Copy shared info into the "data" structure  */
1085   data.g     = g;
1086   data.h     = h;
1087 
1088   /* From h, create the parameter vector. */
1089   hyperexp_pack_paramvector(p, np, h);
1090 
1091   /* Feed it all to the mighty optimizer. */
1092   status = esl_min_ConjugateGradientDescent(NULL, p, np,
1093 					    &hyperexp_complete_binned_func,
1094 					    &hyperexp_complete_binned_gradient,
1095 					    (void *) (&data), &fx, NULL);
1096   if (status != eslOK) goto ERROR;
1097 
1098   /* Convert the final parameter vector back to a hyperexponential
1099    */
1100   hyperexp_unpack_paramvector(p, np, h);
1101 
1102   free(p);
1103   esl_hyperexp_SortComponents(h);
1104   return eslOK;
1105 
1106  ERROR:
1107   free(p);
1108   return status;
1109 }
1110 /*--------------------------- end fitting ----------------------------------*/
1111 
1112 
1113 
1114 
1115 
1116 
1117 /****************************************************************************
1118  * 9. Test driver
1119  ****************************************************************************/
1120 #ifdef eslHYPEREXP_TESTDRIVE
1121 /* Compile:
1122    gcc -g -Wall -I. -I ~/src/easel -L ~/src/easel -o test\
1123     -DeslHYPEREXP_TESTDRIVE esl_hyperexp.c -leasel -lm
1124 */
1125 #include <stdio.h>
1126 #include <stdlib.h>
1127 #include <string.h>
1128 
1129 #include "easel.h"
1130 #include "esl_random.h"
1131 #include "esl_histogram.h"
1132 #include "esl_hyperexp.h"
1133 
1134 int
main(int argc,char ** argv)1135 main(int argc, char **argv)
1136 {
1137   ESL_HISTOGRAM  *h;
1138   ESL_RANDOMNESS *r;
1139   ESL_HYPEREXP   *hxp;
1140   ESL_HYPEREXP   *ehxp;
1141   int     n         = 20000;
1142   double  binwidth  = 0.1;
1143   int     i;
1144   double  x;
1145   double *data;
1146   int     ndata;
1147   int     k, ek, mink;
1148   double  mindiff, diff;
1149 
1150   int     opti;
1151   int     be_verbose   = FALSE;
1152   char   *paramfile    = NULL;
1153   char   *plotfile     = NULL;
1154   FILE   *pfp          = stdout;
1155   int     plot_pdf     = FALSE;
1156   int     plot_logpdf  = FALSE;
1157   int     plot_cdf     = FALSE;
1158   int     plot_logcdf  = FALSE;
1159   int     plot_surv    = FALSE;
1160   int     plot_logsurv = FALSE;
1161   int     xmin_set     = FALSE;
1162   double  xmin;
1163   int     xmax_set     = FALSE;
1164   double  xmax;
1165   int     xstep_set    = FALSE;
1166   double  xstep;
1167   int     do_fixmix    = FALSE;
1168   int     status;
1169 
1170   for (opti = 1; opti < argc && *(argv[opti]) == '-'; opti++)
1171     {
1172       if      (strcmp(argv[opti], "-f")  == 0) do_fixmix    = TRUE;
1173       else if (strcmp(argv[opti], "-i")  == 0) paramfile    = argv[++opti];
1174       else if (strcmp(argv[opti], "-n")  == 0) n            = atoi(argv[++opti]);
1175       else if (strcmp(argv[opti], "-o")  == 0) plotfile     = argv[++opti];
1176       else if (strcmp(argv[opti], "-v")  == 0) be_verbose   = TRUE;
1177       else if (strcmp(argv[opti], "-w")  == 0) binwidth     = atof(argv[++opti]);
1178       else if (strcmp(argv[opti], "-C")  == 0) plot_cdf     = TRUE;
1179       else if (strcmp(argv[opti], "-LC") == 0) plot_logcdf  = TRUE;
1180       else if (strcmp(argv[opti], "-P")  == 0) plot_pdf     = TRUE;
1181       else if (strcmp(argv[opti], "-LP") == 0) plot_logpdf  = TRUE;
1182       else if (strcmp(argv[opti], "-S")  == 0) plot_surv    = TRUE;
1183       else if (strcmp(argv[opti], "-LS") == 0) plot_logsurv = TRUE;
1184       else if (strcmp(argv[opti], "-XL") == 0) { xmin_set  = TRUE; xmin  = atof(argv[++opti]); }
1185       else if (strcmp(argv[opti], "-XH") == 0) { xmax_set  = TRUE; xmax  = atof(argv[++opti]); }
1186       else if (strcmp(argv[opti], "-XS") == 0) { xstep_set = TRUE; xstep = atof(argv[++opti]); }
1187       else esl_fatal("bad option");
1188     }
1189 
1190   if (paramfile != NULL)
1191     {
1192       status = esl_hyperexp_ReadFile(paramfile, &hxp);
1193       if      (status == eslENOTFOUND) esl_fatal("Param file %s not found", paramfile);
1194       else if (status == eslEFORMAT)   esl_fatal("Parse failed: param file %s invalid format", paramfile);
1195       else if (status != eslOK)        esl_fatal("Unusual failure opening param file %s", paramfile);
1196     }
1197   else
1198     {
1199       hxp = esl_hyperexp_Create(3);
1200       hxp->mu = -2.0;
1201       hxp->q[0]      = 0.5;    hxp->q[1]      = 0.3;   hxp->q[2]      = 0.2;
1202       hxp->lambda[0] = 1.0;    hxp->lambda[1] = 0.3;   hxp->lambda[2] = 0.1;
1203     }
1204   if (do_fixmix) esl_hyperexp_FixedUniformMixture(hxp);	/* overrides q's above */
1205 
1206   if (be_verbose) esl_hyperexp_Dump(stdout, hxp);
1207 
1208   r = esl_randomness_Create(42);
1209   h = esl_histogram_CreateFull(hxp->mu, 100., binwidth);
1210   if (plotfile != NULL) {
1211     if ((pfp = fopen(plotfile, "w")) == NULL)
1212       esl_fatal("Failed to open plotfile");
1213   }
1214   if (! xmin_set)  xmin  = hxp->mu;
1215   if (! xmax_set)  xmax  = hxp->mu+ 20*(1. / esl_vec_DMin(hxp->lambda, hxp->K));
1216   if (! xstep_set) xstep = 0.1;
1217 
1218   for (i = 0; i < n; i++)
1219     {
1220       x = esl_hxp_Sample(r, hxp);
1221       esl_histogram_Add(h, x);
1222     }
1223 
1224   esl_histogram_GetData(h, &data, &ndata); /* get sorted data vector */
1225 
1226   ehxp = esl_hyperexp_Create(hxp->K);
1227   if (do_fixmix) esl_hyperexp_FixedUniformMixture(ehxp);
1228   esl_hxp_FitGuess(data, ndata, ehxp);
1229   if ( esl_hxp_FitComplete(data, ndata, ehxp) != eslOK) esl_fatal("Failed to fit hyperexponential");
1230 
1231   if (be_verbose) esl_hyperexp_Dump(stdout, ehxp);
1232 
1233   if (fabs( (ehxp->mu-hxp->mu)/hxp->mu ) > 0.01)
1234     esl_fatal("Error in (complete) fitted mu > 1%\n");
1235   for (ek = 0; ek < ehxp->K; ek++)
1236     {  /* try to match each estimated lambda up to a parametric lambda */
1237       mindiff = 1.0;
1238       mink    = -1;
1239       for (k = 0; k < hxp->K; k++)
1240 	{
1241 	  diff =  fabs( (ehxp->lambda[ek] - hxp->lambda[k]) / hxp->lambda[k]);
1242 	  if (diff < mindiff) {
1243 	    mindiff = diff;
1244 	    mink    = k;
1245 	  }
1246 	}
1247       if (mindiff > 0.50)
1248 	esl_fatal("Error in (complete) fitted lambda > 50%\n");
1249       if (fabs( (ehxp->q[ek] - hxp->q[mink]) / hxp->q[mink]) > 1.0)
1250 	esl_fatal("Error in (complete) fitted q > 2-fold%\n");
1251     }
1252 
1253   esl_hxp_FitGuessBinned(h, ehxp);
1254   if ( esl_hxp_FitCompleteBinned(h, ehxp) != eslOK) esl_fatal("Failed to fit binned hyperexponential");
1255   if (be_verbose)  esl_hyperexp_Dump(stdout, ehxp);
1256 
1257   if (fabs( (ehxp->mu-hxp->mu)/hxp->mu ) > 0.01)
1258     esl_fatal("Error in (binned) fitted mu > 1%\n");
1259   for (ek = 0; ek < ehxp->K; ek++)
1260     {  /* try to match each estimated lambda up to a parametric lambda */
1261       mindiff = 1.0;
1262       mink    = -1;
1263       for (k = 0; k < hxp->K; k++)
1264 	{
1265 	  diff =  fabs( (ehxp->lambda[ek] - hxp->lambda[k]) / hxp->lambda[k]);
1266 	  if (diff < mindiff) {
1267 	    mindiff = diff;
1268 	    mink    = k;
1269 	  }
1270 	}
1271       if (mindiff > 0.50)
1272 	esl_fatal("Error in (binned) fitted lambda > 50%\n");
1273       if (fabs( (ehxp->q[ek] - hxp->q[mink]) / hxp->q[mink]) > 1.0)
1274 	esl_fatal("Error in (binned) fitted q > 2-fold\n");
1275     }
1276 
1277   if (plot_pdf)     esl_hxp_Plot(pfp, hxp, &esl_hxp_pdf,     xmin, xmax, xstep);
1278   if (plot_logpdf)  esl_hxp_Plot(pfp, hxp, &esl_hxp_logpdf,  xmin, xmax, xstep);
1279   if (plot_cdf)     esl_hxp_Plot(pfp, hxp, &esl_hxp_cdf,     xmin, xmax, xstep);
1280   if (plot_logcdf)  esl_hxp_Plot(pfp, hxp, &esl_hxp_logcdf,  xmin, xmax, xstep);
1281   if (plot_surv)    esl_hxp_Plot(pfp, hxp, &esl_hxp_surv,    xmin, xmax, xstep);
1282   if (plot_logsurv) esl_hxp_Plot(pfp, hxp, &esl_hxp_logsurv, xmin, xmax, xstep);
1283 
1284   if (plotfile != NULL) fclose(pfp);
1285   esl_histogram_Destroy(h);
1286   esl_hyperexp_Destroy(hxp);
1287   esl_hyperexp_Destroy(ehxp);
1288   esl_randomness_Destroy(r);
1289   return 0;
1290 }
1291 #endif /*eslHYPEREXP_TESTDRIVE*/
1292 
1293 /****************************************************************************
1294  * Example main()
1295  ****************************************************************************/
1296 #ifdef eslHYPEREXP_EXAMPLE
1297 /*::cexcerpt::hyperexp_example::begin::*/
1298 #include <stdio.h>
1299 #include "easel.h"
1300 #include "esl_random.h"
1301 #include "esl_histogram.h"
1302 #include "esl_hyperexp.h"
1303 
1304 int
main(int argc,char ** argv)1305 main(int argc, char **argv)
1306 {
1307   ESL_RANDOMNESS *r;		/* source of random numbers        */
1308   ESL_HISTOGRAM  *h;		/* histogram to store the data     */
1309   ESL_HYPEREXP   *hxp;		/* hyperexponential to sample from */
1310   ESL_HYPEREXP   *ehxp;		/* estimated hyperexponential      */
1311   double      x;		/* sampled data point              */
1312   int         n = 100000;	/* number of samples               */
1313   double     *data;
1314   int         ndata;
1315   int         i;
1316 
1317   hxp = esl_hyperexp_Create(3);
1318   hxp->mu = -2.0;
1319   hxp->q[0]      = 0.6;    hxp->q[1]      = 0.3;   hxp->q[2]      = 0.1;
1320   hxp->lambda[0] = 1.0;    hxp->lambda[1] = 0.3;   hxp->lambda[2] = 0.1;
1321 
1322   r   = esl_randomness_Create(0);
1323   h   = esl_histogram_CreateFull(hxp->mu, 100, 1.0);
1324 
1325   for (i = 0; i < n; i++)
1326     {
1327       x    = esl_hxp_Sample(r, hxp);
1328       esl_histogram_Add(h, x);
1329     }
1330   esl_histogram_GetData(h, &data, &ndata);
1331 
1332   /* Plot the empirical (sampled) and expected survivals */
1333   esl_histogram_PlotSurvival(stdout, h);
1334   esl_hxp_Plot(stdout, hxp, &esl_hxp_surv, h->xmin, h->xmax, 0.1);
1335 
1336   /* ML fit to complete data, and plot fitted survival curve */
1337   ehxp = esl_hyperexp_Create(3);
1338   esl_hxp_FitGuess(data, ndata, ehxp);
1339   esl_hxp_FitComplete(data, ndata, ehxp);
1340   esl_hxp_Plot(stdout, ehxp, &esl_hxp_surv,  h->xmin, h->xmax, 0.1);
1341 
1342   /* ML fit to binned data, plot fitted survival curve  */
1343   esl_hxp_FitGuessBinned(h, ehxp);
1344   esl_hxp_FitCompleteBinned(h, ehxp);
1345   esl_hxp_Plot(stdout, ehxp, &esl_hxp_surv,  h->xmin, h->xmax, 0.1);
1346 
1347   esl_randomness_Destroy(r);
1348   esl_histogram_Destroy(h);
1349   esl_hyperexp_Destroy(hxp);
1350   esl_hyperexp_Destroy(ehxp);
1351   return 0;
1352 }
1353 /*::cexcerpt::hyperexp_example::end::*/
1354 #endif /*eslHYPEREXP_EXAMPLE*/
1355 
1356 
1357