1 /* Statistical routines for hyperexponential distributions.
2 *
3 * Contents:
4 * 1. The ESL_HYPEREXP object
5 * 2. Evaluating densities and distributions
6 * 3. Generic API routines: for general interface w/ histogram module
7 * 4. Dumping plots for files
8 * 5. Sampling
9 * 6. File input
10 * 7. ML fitting to complete data
11 * 8. ML fitting to binned data
12 * 9. Test driver
13 * 10. Example
14 *
15 * Xrefs:
16 * STL9/140 : original implementation
17 * STL9/143-144 : ML fitting to binned data
18 *
19 * To-do:
20 * - Fit*() functions should return eslEINVAL on n=0, eslENORESULT
21 * on failure due to small n. Compare esl_gumbel. xref J12/93.
22 * SRE, Wed Nov 27 11:17:59 2013
23 */
24 #include "esl_config.h"
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <math.h>
29
30 #include "easel.h"
31 #include "esl_exponential.h"
32 #include "esl_fileparser.h"
33 #include "esl_histogram.h"
34 #include "esl_minimizer.h"
35 #include "esl_random.h"
36 #include "esl_stats.h"
37 #include "esl_vectorops.h"
38
39 #include "esl_hyperexp.h"
40
41 /****************************************************************************
42 *# 1. The ESL_HYPEREXP object
43 ****************************************************************************/
44
45 /* Function: esl_hyperexp_Create()
46 *
47 * Purpose: Creates an object to hold parameters for a <K>-component
48 * hyperexponential.
49 *
50 * Parameters in the object are initialized
51 * ($q_k = \frac{1}{K}$, $\lambda_k = 1$, $\mu = 0$), but
52 * the caller will want to set these according to its own
53 * purposes.
54 *
55 * Args: K - number of components in the mixture
56 *
57 * Returns: ptr to newly allocated/initialized <ESL_HYPEREXP> object.
58 *
59 * Throws: NULL on allocation failure.
60 */
61 ESL_HYPEREXP *
esl_hyperexp_Create(int K)62 esl_hyperexp_Create(int K)
63 {
64 int status;
65 ESL_HYPEREXP *h = NULL;
66 int k;
67
68 ESL_ALLOC(h, sizeof(ESL_HYPEREXP));
69 h->q = h->lambda = h->wrk = NULL;
70 h->fixlambda = NULL;
71 h->K = K;
72 h->fixmix = FALSE;
73
74 ESL_ALLOC(h->q, sizeof(double) * K);
75 ESL_ALLOC(h->lambda, sizeof(double) * K);
76 ESL_ALLOC(h->wrk, sizeof(double) * K);
77 ESL_ALLOC(h->fixlambda, sizeof(char) * K);
78
79 for (k = 0; k < K; k++)
80 {
81 h->q[k] = 1. / (double) K;
82 h->lambda[k] = 1.;
83 h->fixlambda[k]= 0;
84 }
85 h->mu = 0.;
86 return h;
87
88 ERROR:
89 esl_hyperexp_Destroy(h);
90 return NULL;
91 }
92
93 /* Function: esl_hyperexp_Destroy()
94 *
95 * Purpose: Deallocates the hyperexponential parameter object <h>.
96 *
97 * Args: h - ptr to the object to be deallocated.
98 *
99 * Returns: (void).
100 */
101 void
esl_hyperexp_Destroy(ESL_HYPEREXP * h)102 esl_hyperexp_Destroy(ESL_HYPEREXP *h)
103 {
104 if (h == NULL) return;
105
106 if (h->q != NULL) free(h->q);
107 if (h->lambda != NULL) free(h->lambda);
108 if (h->wrk != NULL) free(h->wrk);
109 if (h->fixlambda!= NULL) free(h->fixlambda);
110 free(h);
111 }
112
113
114 /* Function: esl_hyperexp_Copy()
115 *
116 * Purpose: Makes a copy of the hyperexponential parameter object <src>
117 * in <dest>. Caller must have already allocated <dest> to have
118 * (at least) the same number of components as <src>.
119 *
120 * Args: src - object to be copied
121 * dest - allocated object to copy <src> into
122 *
123 * Returns: <eslOK> on success.
124 *
125 * Throws: <eslEINCOMPAT> if <dest> isn't allocated with enough
126 * components to hold a copy of <src>.
127 */
128 int
esl_hyperexp_Copy(ESL_HYPEREXP * src,ESL_HYPEREXP * dest)129 esl_hyperexp_Copy(ESL_HYPEREXP *src, ESL_HYPEREXP *dest)
130 {
131 int k;
132
133 if (dest->K < src->K)
134 ESL_EXCEPTION(eslEINCOMPAT, "hyperexponential too small to copy into");
135
136 for (k = 0; k < src->K; k++)
137 {
138 dest->q[k] = src->q[k];
139 dest->lambda[k] = src->lambda[k];
140 dest->fixlambda[k]= src->fixlambda[k];
141 }
142 dest->mu = src->mu;
143 dest->K = src->K;
144 dest->fixmix = src->fixmix;
145 return eslOK;
146 }
147
148 /* Function: esl_hyperexp_FixedUniformMixture()
149 *
150 * Purpose: Set the mixture coeffients to a uniform (1/K) distribution,
151 * and fix them there so they aren't estimable parameters.
152 */
153 int
esl_hyperexp_FixedUniformMixture(ESL_HYPEREXP * h)154 esl_hyperexp_FixedUniformMixture(ESL_HYPEREXP *h)
155 {
156 int k;
157 for (k = 0; k < h->K; k++) h->q[k] = 1./(double)h->K;
158 h->fixmix = TRUE;
159 return eslOK;
160 }
161
162
163 /* Function: esl_hyperexp_SortComponents()
164 *
165 * Purpose: Rearrange the components in a hyperexponential in
166 * order of lambda values, with the highest lambda first.
167 *
168 * Stupid $O(K^2)$ selection sort algorithm here, because we
169 * expect $K$ to be small.
170 */
171 int
esl_hyperexp_SortComponents(ESL_HYPEREXP * h)172 esl_hyperexp_SortComponents(ESL_HYPEREXP *h)
173 {
174 int k, kp;
175 char ctmp;
176 double dtmp;
177
178 for (k = 0; k < h->K-1; k++)
179 {
180 kp = k + esl_vec_DArgMax(h->lambda+k, h->K-k);
181 if (k != kp)
182 {
183 dtmp = h->q[k]; h->q[k] = h->q[kp]; h->q[kp] = dtmp;
184 dtmp = h->lambda[k]; h->lambda[k] = h->lambda[kp]; h->lambda[kp] = dtmp;
185 ctmp = h->fixlambda[k]; h->fixlambda[k] = h->fixlambda[kp]; h->fixlambda[kp] = ctmp;
186 }
187 }
188 return eslOK;
189 }
190
191
192 /* Function: esl_hyperexp_Write()
193 *
194 * Purpose: Write hyperexponential parameters from <hxp> to an open <fp>.
195 *
196 * The output format is suitable for input by <esl_hyperexp_Read()>.
197 *
198 * Returns: <eslOK> on success.
199 *
200 * Throws: <eslEWRITE> on any write error.
201 */
202 int
esl_hyperexp_Write(FILE * fp,ESL_HYPEREXP * hxp)203 esl_hyperexp_Write(FILE *fp, ESL_HYPEREXP *hxp)
204 {
205 int k;
206
207 if (fprintf(fp, "%8d # number of components\n", hxp->K) < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
208 if (fprintf(fp, "%8.2f # mu (for all components)\n", hxp->mu) < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
209 for (k = 0; k < hxp->K; k++)
210 if (fprintf(fp, "%8.6f %12.6f # q[%d], lambda[%d]\n",
211 hxp->q[k], hxp->lambda[k], k, k) < 0) ESL_EXCEPTION(eslEWRITE, "hyperexp write failed");
212 return eslOK;
213 }
214
215
216 /* Function: esl_hyperexp_Dump()
217 *
218 * Purpose: Dump hyperexponential parameters from <hxp> to an open <fp>,
219 * all on one line with no comments.
220 *
221 * The output format is suitable for input by
222 * <esl_hyperexp_Read()>, like <esl_hyperexp_Write()>,
223 * though it's intended as a diagnostic dump of the
224 * contents of the object.
225 *
226 * Returns: <eslOK> on success.
227 */
228 int
esl_hyperexp_Dump(FILE * fp,ESL_HYPEREXP * hxp)229 esl_hyperexp_Dump(FILE *fp, ESL_HYPEREXP *hxp)
230 {
231 int k;
232
233 fprintf(fp, "%2d ", hxp->K);
234 fprintf(fp, "%6.2f ", hxp->mu);
235 for (k = 0; k < hxp->K; k++)
236 fprintf(fp, "%5.3f %9.6f ", hxp->q[k], hxp->lambda[k]);
237 fprintf(fp, "\n");
238 return eslOK;
239 }
240
241 /*----------------- end ESL_HYPEREXP object maintenance --------------------*/
242
243
244
245 /****************************************************************************
246 * 2. Evaluating densities and distributions
247 ****************************************************************************/
248 /* all lambda_k > 0
249 * all q_k are probabilities, \sum_k q_k = 1 [watch out for q_k=0 in log(q_k)].
250 * mu <= x < infinity [mu=x is not a problem]
251 */
252
253 /* Function: esl_hxp_pdf()
254 *
255 * Purpose: Returns the probability density function $P(X=x)$ for
256 * quantile <x>, given hyperexponential parameters <h>.
257 */
258 double
esl_hxp_pdf(double x,ESL_HYPEREXP * h)259 esl_hxp_pdf(double x, ESL_HYPEREXP *h)
260 {
261 double pdf = 0.;
262 int k;
263
264 if (x < h->mu) return 0.;
265
266 for (k = 0; k < h->K; k++)
267 pdf += h->q[k] * esl_exp_pdf(x, h->mu, h->lambda[k]);
268 return pdf;
269 }
270
271
272 /* Function: esl_hxp_logpdf()
273 *
274 * Purpose: Returns the log of the PDF ($\log P(X=x)$) for quantile <x>,
275 * given hyperexponential parameters <h>.
276 */
277 double
esl_hxp_logpdf(double x,ESL_HYPEREXP * h)278 esl_hxp_logpdf(double x, ESL_HYPEREXP *h)
279 {
280 int k;
281 double z;
282
283 if (x < h->mu) return -eslINFINITY;
284
285 for (k = 0; k < h->K; k++)
286 if (h->q[k] == 0.0)
287 h->wrk[k] = -eslINFINITY;
288 else
289 h->wrk[k] = log(h->q[k]) + esl_exp_logpdf(x, h->mu, h->lambda[k]);
290
291 z = esl_vec_DLogSum(h->wrk, h->K);
292 return z;
293 }
294
295 /* Function: esl_hxp_cdf()
296 *
297 * Purpose: Returns the cumulative distribution function $P(X \leq x)$
298 * for quantile <x>, given hyperexponential parameters <h>.
299 */
300 double
esl_hxp_cdf(double x,ESL_HYPEREXP * h)301 esl_hxp_cdf(double x, ESL_HYPEREXP *h)
302 {
303 double cdf = 0.;
304 int k;
305
306 if (x < h->mu) return 0.;
307
308 for (k = 0; k < h->K; k++)
309 cdf += h->q[k] * esl_exp_cdf(x, h->mu, h->lambda[k]);
310 return cdf;
311 }
312
313 /* Function: esl_hxp_logcdf()
314 *
315 * Purpose: Returns the log of the CDF $\log P(X \leq x)$
316 * for quantile <x>, given hyperexponential parameters <h>.
317 */
318 double
esl_hxp_logcdf(double x,ESL_HYPEREXP * h)319 esl_hxp_logcdf(double x, ESL_HYPEREXP *h)
320 {
321 int k;
322
323 if (x < h->mu) return -eslINFINITY;
324
325 for (k = 0; k < h->K; k++)
326 if (h->q[k] == 0.0)
327 h->wrk[k] = -eslINFINITY;
328 else
329 h->wrk[k] = log(h->q[k]) + esl_exp_logcdf(x, h->mu, h->lambda[k]);
330
331 return esl_vec_DLogSum(h->wrk, h->K);
332 }
333
334
335 /* Function: esl_hxp_surv()
336 *
337 * Purpose: Returns the survivor function $P(X > x)$ (1-CDF)
338 * for quantile <x>, given hyperexponential parameters <h>.
339 */
340 double
esl_hxp_surv(double x,ESL_HYPEREXP * h)341 esl_hxp_surv(double x, ESL_HYPEREXP *h)
342 {
343 double srv = 0.;
344 int k;
345
346 if (x < h->mu) return 1.0;
347
348 for (k = 0; k < h->K; k++)
349 srv += h->q[k] * esl_exp_surv(x, h->mu, h->lambda[k]);
350 return srv;
351 }
352
353
354 /* Function: esl_hxp_logsurv()
355 *
356 * Purpose: Returns the log survivor function $\log P(X > x)$ (log(1-CDF))
357 * for quantile <x>, given hyperexponential parameters <h>.
358 */
359 double
esl_hxp_logsurv(double x,ESL_HYPEREXP * h)360 esl_hxp_logsurv(double x, ESL_HYPEREXP *h)
361 {
362 int k;
363
364 if (x < h->mu) return 0.0;
365
366 for (k = 0; k < h->K; k++)
367 if (h->q[k] == 0.0)
368 h->wrk[k] = -eslINFINITY;
369 else
370 h->wrk[k] = log(h->q[k]) + esl_exp_logsurv(x, h->mu, h->lambda[k]);
371
372 return esl_vec_DLogSum(h->wrk, h->K);
373 }
374
375 /* Function: esl_hxp_invcdf()
376 *
377 * Purpose: Calculates the inverse CDF for a hyperexponential <h>
378 * returning the quantile <x> at which the CDF is <p>.
379 *
380 * The inverse CDF of a mixture model has no
381 * analytical expression as far as I'm aware. The calculation
382 * here is a computationally expensive, brute force bisection
383 * search in <x> using the CDF function. It will suffice for
384 * a small number of calls (for plotting applications, for example),
385 * but it is not sufficient for a large number of calls.
386 */
387 double
esl_hxp_invcdf(double p,ESL_HYPEREXP * h)388 esl_hxp_invcdf(double p, ESL_HYPEREXP *h)
389 {
390 double x1, x2, xm; /* low, high guesses at x */
391 double f2, fm;
392 double tol = 1e-6;
393
394 x1 = h->mu;
395 x2 = h->mu + 1.;
396 do { /* bracket */
397 x2 = x2 + 2.*(x2-x1);
398 f2 = esl_hxp_cdf(x2, h);
399 } while (f2 < p);
400
401 do { /* bisection */
402 xm = (x1+x2) / 2.;
403 fm = esl_hxp_cdf(xm, h);
404
405 if (fm > p) x2 = xm;
406 else if (fm < p) x1 = xm;
407 else return xm; /* unlikely case of fm==cdf */
408 } while ( (x2-x1)/(x1+x2-2*h->mu) > tol);
409
410 xm = (x1+x2) / 2.;
411 return xm;
412
413 }
414 /*-------------------- end densities & distributions ------------------------*/
415
416
417
418
419 /****************************************************************************
420 * 3. Generic API routines: for general interface w/ histogram module
421 ****************************************************************************/
422
423 /* Function: esl_hxp_generic_pdf()
424 *
425 * Purpose: Generic-API version of PDF call.
426 */
427 double
esl_hxp_generic_pdf(double x,void * params)428 esl_hxp_generic_pdf(double x, void *params)
429 {
430 ESL_HYPEREXP *h = (ESL_HYPEREXP *) params;
431 return esl_hxp_pdf(x, h);
432 }
433
434 /* Function: esl_hxp_generic_cdf()
435 *
436 * Purpose: Generic-API version of CDF call.
437 */
438 double
esl_hxp_generic_cdf(double x,void * params)439 esl_hxp_generic_cdf(double x, void *params)
440 {
441 ESL_HYPEREXP *h = (ESL_HYPEREXP *) params;
442 return esl_hxp_cdf(x, h);
443 }
444
445 /* Function: esl_hxp_generic_surv()
446 *
447 * Purpose: Generic-API version of survivor function.
448 */
449 double
esl_hxp_generic_surv(double x,void * params)450 esl_hxp_generic_surv(double x, void *params)
451 {
452 ESL_HYPEREXP *h = (ESL_HYPEREXP *) params;
453 return esl_hxp_surv(x, h);
454 }
455
456 /* Function: esl_hxp_generic_invcdf()
457 *
458 * Purpose: Generic-API version of inverse CDF.
459 */
460 double
esl_hxp_generic_invcdf(double p,void * params)461 esl_hxp_generic_invcdf(double p, void *params)
462 {
463 ESL_HYPEREXP *h = (ESL_HYPEREXP *) params;
464 return esl_hxp_invcdf(p, h);
465 }
466 /*------------------------ end generic API ---------------------------------*/
467
468
469
470
471
472
473 /****************************************************************************
474 * 4. Dumping plots for files
475 ****************************************************************************/
476
477 /* Function: esl_hxp_Plot()
478 *
479 * Purpose: Plot some function <func> (for instance, <esl_hxp_pdf()>)
480 * for hyperexponential parameters <h>, for a range of
481 * quantiles x from <xmin> to <xmax> in steps of <xstep>;
482 * output to an open stream <fp> in xmgrace XY input format.
483 *
484 * Returns: <eslOK> on success.
485 *
486 * Throws: <eslEWRITE> on any system write error.
487 */
488 int
esl_hxp_Plot(FILE * fp,ESL_HYPEREXP * h,double (* func)(double x,ESL_HYPEREXP * h),double xmin,double xmax,double xstep)489 esl_hxp_Plot(FILE *fp, ESL_HYPEREXP *h,
490 double (*func)(double x, ESL_HYPEREXP *h),
491 double xmin, double xmax, double xstep)
492 {
493 double x;
494 for (x = xmin; x <= xmax; x += xstep)
495 if (fprintf(fp, "%f\t%g\n", x, (*func)(x, h)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "hyperexp plot write failed");
496 if (fprintf(fp, "&\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "hyperexp plot write failed");
497 return eslOK;
498 }
499 /*-------------------- end plot dumping routines ---------------------------*/
500
501
502
503
504
505 /****************************************************************************
506 * 5. Sampling
507 ****************************************************************************/
508
509 /* Function: esl_hxp_Sample()
510 *
511 * Purpose: Sample a random variate x from a hyperexponential <h>,
512 * given random number source <r>.
513 */
514 double
esl_hxp_Sample(ESL_RANDOMNESS * r,ESL_HYPEREXP * h)515 esl_hxp_Sample(ESL_RANDOMNESS *r, ESL_HYPEREXP *h)
516 {
517 int k;
518 k = esl_rnd_DChoose(r, h->q, h->K);
519 return esl_exp_Sample(r, h->mu, h->lambda[k]);
520 }
521
522 /*--------------------------- end sampling ---------------------------------*/
523
524
525
526 /****************************************************************************
527 * 6. File input (mixture models are a little too complex to set on commandline)
528 ****************************************************************************/
529
530 /* Function: esl_hyperexp_Read()
531 *
532 * Purpose: Reads hyperexponential parameters from an open <e>.
533 * which is an <ESL_FILEPARSER> tokenizer for an open stream.
534 *
535 * The first token is <K>, the number of mixture components.
536 * The second token is <mu>, the x offset shared by all components.
537 * Then for each mixture component <k=1..K>, it reads
538 * a mixture coefficient <q[k]> and a decay parameter
539 * <lambda[k]>.
540 *
541 * The <2K+2> data tokens must occur in this order, but
542 * they can be grouped into any number of lines, because the
543 * parser ignores line breaks.
544 *
545 * Anything after a <\#> character on a line is a comment, and
546 * is ignored.
547 *
548 * Returns: <eslOK> on success, and <ret_hxp> points to a new <ESL_HYPEREXP>
549 * object.
550 * <eslEFORMAT> on "normal" parse failure caused by a bad file
551 * format that's likely the user's fault.
552 *
553 * Throws: <eslEMEM> if allocation of the new <ESL_HYPEREXP> fails.
554 *
555 *
556 * FIXME: All our mixture models (esl_dirichlet, for example) should be
557 * reconciled w/ identical interfaces & behaviour.
558 */
559 int
esl_hyperexp_Read(ESL_FILEPARSER * e,ESL_HYPEREXP ** ret_hxp)560 esl_hyperexp_Read(ESL_FILEPARSER *e, ESL_HYPEREXP **ret_hxp)
561 {
562 ESL_HYPEREXP *hxp = NULL;
563 char *tok;
564 int status = eslOK;
565 int nc;
566 int k;
567 double sum;
568
569 esl_fileparser_SetCommentChar(e, '#');
570
571 if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR;
572 nc = atoi(tok);
573 if (nc < 1) {
574 sprintf(e->errbuf, "Expected # of components K >= 1 as first token");
575 goto ERROR;
576 }
577
578 if ((hxp = esl_hyperexp_Create(nc)) == NULL) return eslEMEM; /* percolation */
579
580 if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR;
581 hxp->mu = atof(tok);
582
583 for (k = 0; k < hxp->K; k++)
584 {
585 if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR;
586 hxp->q[k] = atof(tok);
587
588 if ((status = esl_fileparser_GetToken(e, &tok, NULL)) != eslOK) goto ERROR;
589 hxp->lambda[k] = atof(tok);
590
591 if (hxp->q[k] < 0. || hxp->q[k] > 1.) {
592 sprintf(e->errbuf, "Expected a mixture coefficient q[k], 0<=q[k]<=1");
593 goto ERROR;
594 }
595 if (hxp->lambda[k] <= 0.) {
596 sprintf(e->errbuf, "Expected a lambda parameter, lambda>0");
597 goto ERROR;
598 }
599 }
600 sum = esl_vec_DSum(hxp->q, hxp->K);
601 if (fabs(sum-1.0) > 0.05) {
602 sprintf(e->errbuf, "Expected mixture coefficients to sum to 1");
603 goto ERROR;
604 }
605 esl_vec_DNorm(hxp->q, hxp->K);
606 *ret_hxp = hxp;
607 return eslOK;
608
609 ERROR:
610 esl_hyperexp_Destroy(hxp);
611 return eslEFORMAT;
612 }
613
614 /* Function: esl_hyperexp_ReadFile()
615 *
616 * Purpose: Convenience wrapper around <esl_hyperexp_Read()> that takes
617 * a filename as an argument, instead of an open <ESL_FILEPARSER>.
618 *
619 * This lets you quickly read an object from a file, but it
620 * limits your ability to deal gracefully and flexibly with
621 * 'normal' errors like 'file not found' or 'bad file format'.
622 * Here, all errors are fatal.
623 *
624 * Returns: <eslOK> on success.
625 *
626 * Throws: <eslEMEM> on an allocation failure.
627 *
628 * <eslEFORMAT> on any parse error. Diagnostic information is
629 * unavailable, because the <ESL_FILEPARSER> that's holding
630 * that information is internal to this function.
631 *
632 * <eslENOTFOUND> on any failure to open the file.
633 */
634 int
esl_hyperexp_ReadFile(char * filename,ESL_HYPEREXP ** ret_hxp)635 esl_hyperexp_ReadFile(char *filename, ESL_HYPEREXP **ret_hxp)
636 {
637 FILE *fp;
638 ESL_FILEPARSER *e;
639 int status;
640
641 if ((fp = fopen(filename, "r")) == NULL)
642 ESL_EXCEPTION(eslENOTFOUND, "file not found");
643
644 if ((e = esl_fileparser_Create(fp)) == NULL) {
645 fclose(fp);
646 ESL_EXCEPTION(eslEMEM, "failed to create fileparser");
647 }
648 esl_fileparser_SetCommentChar(e, '#');
649
650 status = esl_hyperexp_Read(e, ret_hxp);
651
652 esl_fileparser_Destroy(e);
653 fclose(fp);
654 return status;
655 }
656
657
658
659
660 /****************************************************************************
661 * 7. ML fitting to complete data
662 ****************************************************************************/
663
664 /* This structure is used to sneak the data into minimizer's generic
665 * (void *) API for all aux data
666 */
667 struct hyperexp_data {
668 double *x;
669 int n;
670 ESL_HYPEREXP *h;
671 };
672
673 /* Given hyperexponential parameters in <h>;
674 * do appropriate c.o.v.'s to unconstrained real parameters
675 * and fill in the packed parameter vector <p>.
676 *
677 * <p> must be allocated for at least (2K-1) doubles: K-1 mixture
678 * coefficients and K lambda parameters. (mu is not a free param).
679 *
680 * First K-1 are $Q_1..Q_{K-1}$ mixture coefficient parameters; $Q_0$ implicitly 0;
681 * cov is $q_k = \frac{e^{Q_k}}{\sum_j e^{Q_j}}$; $Q_k = \log(q_k) - \log(q_0)$.
682 * Then K lambda params;
683 * lambda cov is $\lambda = e^w$, $w = \log(\lambda)$.
684 */
685 static void
hyperexp_pack_paramvector(double * p,int np,ESL_HYPEREXP * h)686 hyperexp_pack_paramvector(double *p, int np, ESL_HYPEREXP *h)
687 {
688 int i; /* counter in parameter vector p */
689 int k; /* counter in mixture components */
690 double z; /* tmp variable */
691
692 /* mixture coefficients */
693 i = 0;
694 if (! h->fixmix) {
695 z = log(h->q[0]);
696 for (k = 1; k < h->K; k++)
697 p[i++] = log(h->q[k]) - z;
698 }
699
700 /* exponential parameters */
701 for (k = 0; k < h->K; k++)
702 if (! h->fixlambda[k])
703 p[i++] = log(h->lambda[k]);
704 }
705
706 /* Same as above but in reverse: given parameter vector <p>,
707 * <np> = 2K-1, do appropriate c.o.v. back to desired parameter space, and
708 * update the hyperexponential <h>.
709 */
710 static void
hyperexp_unpack_paramvector(double * p,int np,ESL_HYPEREXP * h)711 hyperexp_unpack_paramvector(double *p, int np, ESL_HYPEREXP *h)
712 {
713 int i; /* counter in parameter vector p */
714 int k; /* counter in mixture components */
715 double z; /* tmp variable */
716
717 /* Fetch the params in their c.o.v. space first
718 */
719 i = 0;
720 if (! h->fixmix) {
721 h->q[0] = 0; /* implicitly */
722 for (k = 1; k < h->K; k++)
723 h->q[k] = p[i++];
724 }
725 for (k = 0; k < h->K; k++)
726 if (! h->fixlambda[k])
727 h->lambda[k] = p[i++];
728
729 /* Convert mix coefficients back to probabilities;
730 * their c.o.v. is q_k = e^{Q_k} / \sum_k e^{Q_k}
731 * which rearranges to exp(Q_k - log[\sum_k e^Q_k]),
732 * and we have the DLogSum() function to compute the log sum.
733 */
734 if (! h->fixmix) {
735 z = esl_vec_DLogSum(h->q, h->K);
736 for (k = 0; k < h->K; k++)
737 h->q[k] = exp(h->q[k] - z);
738 }
739
740 /* lambda c.o.v. is \lambda = e^w */
741 for (k = 0; k < h->K; k++)
742 if (! h->fixlambda[k])
743 h->lambda[k] = exp(h->lambda[k]);
744 }
745
746 /* The log likelihood function to be optimized by ML fitting:
747 * This needs to be careful of a case where a lambda = inf.
748 */
749 static double
hyperexp_complete_func(double * p,int np,void * dptr)750 hyperexp_complete_func(double *p, int np, void *dptr)
751 {
752 struct hyperexp_data *data = (struct hyperexp_data *) dptr;
753 ESL_HYPEREXP *h = data->h;
754 double logL = 0.;
755 int i;
756
757 hyperexp_unpack_paramvector(p, np, h);
758 for (i = 0; i < data->n; i++)
759 logL += esl_hxp_logpdf(data->x[i], h);
760 return -logL;
761 }
762
763 /* The gradient of the NLL w.r.t. each free parameter in p.
764 */
765 static void
hyperexp_complete_gradient(double * p,int np,void * dptr,double * dp)766 hyperexp_complete_gradient(double *p, int np, void *dptr, double *dp)
767 {
768 struct hyperexp_data *data = (struct hyperexp_data *) dptr;
769 ESL_HYPEREXP *h = data->h;
770 double pdf;
771 int i,k;
772 int pidx;
773
774 hyperexp_unpack_paramvector(p, np, h);
775 esl_vec_DSet(dp, np, 0.);
776 for (i = 0; i < data->n; i++)
777 {
778 /* FIXME: I think the calculation below may need to be done
779 * in log space, to avoid underflow errors; see complete_binned_gradient()
780 */
781 /* Precalculate q_k PDF_k(x) terms, and their sum */
782 for (k = 0; k < h->K; k++)
783 h->wrk[k] = h->q[k] * esl_exp_pdf(data->x[i], h->mu, h->lambda[k]);
784 pdf = esl_vec_DSum(h->wrk, h->K);
785
786 pidx = 0;
787 if (! h->fixmix) {
788 for (k = 1; k < h->K; k++) /* generic d/dQ solution for mixture models */
789 dp[pidx++] -= h->wrk[k]/pdf - h->q[k];
790 }
791
792 for (k = 0; k < h->K; k++)
793 if (! h->fixlambda[k])
794 dp[pidx++] -= (1.-h->lambda[k]*(data->x[i]-h->mu))*h->wrk[k]/pdf; /* d/dw */
795 }
796 }
797
798
799 /* Function: esl_hxp_FitGuess()
800 *
801 * Purpose: Given a sorted vector of <n> observed data samples <x[]>,
802 * from smallest <x[0]> to largest <x[n-1]>, calculate a
803 * very crude guesstimate of a fit -- suitable only as a starting
804 * point for further optimization -- and return those parameters
805 * in <h>.
806 *
807 * Assigns $q_k \propto \frac{1}{k}$ and $\mu = \min_i x_i$;
808 * splits $x$ into $K$ roughly equal-sized bins, and
809 * and assigns $\lambda_k$ as the ML estimate from bin $k$.
810 * (If $q_k$ coefficients have already been fixed to
811 * known values, this step is skipped.)
812 */
813 int
esl_hxp_FitGuess(double * x,int n,ESL_HYPEREXP * h)814 esl_hxp_FitGuess(double *x, int n, ESL_HYPEREXP *h)
815 {
816 double tmu; /* current mu */
817 double mean; /* mean (x-tmu) in a bin */
818 int i,k;
819 int imin, imax;
820
821 h->mu = x[0]; /* minimum */
822 for (k = 0; k < h->K; k++)
823 {
824 if (! h->fixmix)
825 h->q[k] = 1 / (double)(k+1); /* priors ~ 1, 1/2, 1/3... */
826
827 imin = (int) ((double)(k*n)/(double)h->K);
828 imax = (int) ((double)((k+1)*n)/(double)h->K);
829 tmu = x[imin];
830 mean = 0.;
831 for (i = imin; i < imax; i++)
832 mean += x[i] - tmu;
833 mean /= (double)(imax-imin);
834 h->lambda[k] = 1 / mean;
835 }
836 esl_vec_DNorm(h->q, h->K);
837 return eslOK;
838 }
839
840 /* Function: esl_hxp_FitComplete()
841 *
842 * Purpose: Given a vector of <n> observed data samples <x[]>
843 * (sorted or unsorted), and an initial guess <h> for
844 * a hyperexponential, find maximum likelihood parameters
845 * by conjugate gradient descent optimization, starting
846 * from <h> and leaving the final optimized solution in
847 * <h>.
848 *
849 * Returns: <eslOK> on success, and <h> contains the fitted
850 * hyperexponential parameters.
851 *
852 * Throws: <eslEMEM> on allocation error, and <h> is left in
853 * in its initial state.
854 */
855 int
esl_hxp_FitComplete(double * x,int n,ESL_HYPEREXP * h)856 esl_hxp_FitComplete(double *x, int n, ESL_HYPEREXP *h)
857 {
858 struct hyperexp_data data;
859 double *p = NULL;
860 int np;
861 double fx;
862 int i;
863 int status;
864
865 /* Determine number of free parameters and allocate */
866 np = 0;
867 if (! h->fixmix) np += h->K-1; /* K-1 mix coefficients... */
868 for (i = 0; i < h->K; i++) /* ...and up to K lambdas free */
869 if (! h->fixlambda[i]) np++;
870 ESL_ALLOC(p, sizeof(double) * np);
871
872 /* Copy shared info into the "data" structure */
873 data.x = x;
874 data.n = n;
875 data.h = h;
876
877 /* From h, create the parameter vector. */
878 hyperexp_pack_paramvector(p, np, h);
879
880 /* Feed it all to the mighty optimizer. */
881 status = esl_min_ConjugateGradientDescent(NULL, p, np,
882 &hyperexp_complete_func,
883 &hyperexp_complete_gradient,
884 (void *) (&data), &fx, NULL);
885 if (status != eslOK) goto ERROR;
886
887 /* Convert the final parameter vector back to a hyperexponential
888 */
889 hyperexp_unpack_paramvector(p, np, h);
890
891 free(p);
892 esl_hyperexp_SortComponents(h);
893 return eslOK;
894
895 ERROR:
896 free(p);
897 return status;
898 }
899
900
901 /****************************************************************************
902 * 8. Maximum likelihood fitting, complete binned data xref STL9/143-144
903 ****************************************************************************/
904
905 /* minimizer API only allows us one generic void ptr to pass
906 * our data through:
907 */
908 struct hyperexp_binned_data {
909 ESL_HISTOGRAM *g;
910 ESL_HYPEREXP *h;
911 };
912
913 static double
hyperexp_complete_binned_func(double * p,int np,void * dptr)914 hyperexp_complete_binned_func(double *p, int np, void *dptr)
915 {
916 struct hyperexp_binned_data *data = (struct hyperexp_binned_data *) dptr;
917 ESL_HISTOGRAM *g = data->g;
918 ESL_HYPEREXP *h = data->h;
919 double logL = 0.;
920 double ai, delta;
921 int i,k;
922
923 hyperexp_unpack_paramvector(p, np, h);
924 delta = g->w;
925 /* counting over occupied, uncensored histogram bins */
926 for (i = g->cmin; i <= g->imax; i++)
927 {
928 if (g->obs[i] == 0) continue; /* skip unoccupied ones */
929
930 ai = esl_histogram_Bin2LBound(g, i);
931 if (ai < h->mu) ai = h->mu; /* careful about the left boundary: no x < h->mu */
932
933 for (k = 0; k < h->K; k++)
934 {
935 h->wrk[k] = log(h->q[k]) - h->lambda[k]*(ai-h->mu);
936 if (delta * h->lambda[k] < eslSMALLX1)
937 h->wrk[k] += log(delta * h->lambda[k]);
938 else
939 h->wrk[k] += log(1 - exp(-delta * h->lambda[k]));
940 }
941 logL += g->obs[i] * esl_vec_DLogSum(h->wrk, h->K);
942 }
943 return -logL;
944 }
945
946 static void
hyperexp_complete_binned_gradient(double * p,int np,void * dptr,double * dp)947 hyperexp_complete_binned_gradient(double *p, int np, void *dptr, double *dp)
948 {
949 struct hyperexp_binned_data *data = (struct hyperexp_binned_data *) dptr;
950 ESL_HISTOGRAM *g = data->g;
951 ESL_HYPEREXP *h = data->h;
952 int i,k;
953 int pidx;
954 double z;
955 double tmp;
956 double ai, delta;
957
958 hyperexp_unpack_paramvector(p, np, h);
959 esl_vec_DSet(dp, np, 0.);
960 delta = g->w;
961
962 /* counting over occupied, uncensored histogram bins */
963 for (i = g->cmin; i <= g->imax; i++)
964 {
965 if (g->obs[i] == 0) continue;
966 ai = esl_histogram_Bin2LBound(g, i);
967 if (ai < h->mu) ai = h->mu; /* careful about the left boundary: no x < h->mu */
968
969 /* Calculate log (q_m alpha_m(a_i) terms
970 */
971 for (k = 0; k < h->K; k++)
972 {
973 h->wrk[k] = log(h->q[k]) - h->lambda[k]*(ai-h->mu);
974 if (delta * h->lambda[k] < eslSMALLX1)
975 h->wrk[k] += log(delta * h->lambda[k]);
976 else
977 h->wrk[k] += log(1 - exp(-delta * h->lambda[k]));
978 }
979 z = esl_vec_DLogSum(h->wrk, h->K); /* z= log \sum_k q_k alpha_k(a_i) */
980
981 /* Bump the gradients for Q_1..Q_{K-1} */
982 pidx = 0;
983 if (! h->fixmix) {
984 for (k = 1; k < h->K; k++)
985 dp[pidx++] -= g->obs[i] * (exp(h->wrk[k] - z) - h->q[k]);
986 }
987
988 /* Bump the gradients for w_0..w_{K-1}
989 */
990 for (k = 0; k < h->K; k++)
991 if (! h->fixlambda[k])
992 {
993 tmp = log(h->q[k]) + log(h->lambda[k])- h->lambda[k]*(ai-h->mu);
994 tmp = exp(tmp - z);
995 tmp *= (ai + delta - h->mu) * exp(-delta * h->lambda[k]) - (ai - h->mu);
996 dp[pidx++] -= g->obs[i] * tmp;
997 }
998 }
999 }
1000
1001 /* Function: esl_hxp_FitGuessBinned()
1002 *
1003 * Purpose: Given a histogram <g> with binned observations;
1004 * obtain a very crude guesstimate of a fit -- suitable only
1005 * as a starting point for further optimization -- and return
1006 * those parameters in <h>.
1007 *
1008 * Assigns $q_k \propto \frac{1}{k}$ and $\mu = \min_i x_i$;
1009 * splits $x$ into $K$ roughly equal-sized bins, and
1010 * and assigns $\lambda_k$ as the ML estimate from bin $k$.
1011 * If the coefficients have already been set to known values,
1012 * this step is skipped.
1013 */
1014 int
esl_hxp_FitGuessBinned(ESL_HISTOGRAM * g,ESL_HYPEREXP * h)1015 esl_hxp_FitGuessBinned(ESL_HISTOGRAM *g, ESL_HYPEREXP *h)
1016 {
1017 double sum;
1018 int n;
1019 int i,k;
1020 int nb;
1021 double ai;
1022
1023 if (g->is_tailfit) h->mu = g->phi; /* all x > mu in this case */
1024 else if (g->is_rounded) h->mu = esl_histogram_Bin2LBound(g, g->imin);
1025 else h->mu = g->xmin;
1026
1027 nb = g->imax - g->cmin + 1;
1028 k = h->K-1;
1029 sum = 0;
1030 n = 0;
1031 for (i = g->imax; i >= g->cmin; i--)
1032 {
1033 ai = esl_histogram_Bin2LBound(g,i);
1034 if (ai < g->xmin) ai = g->xmin;
1035 n += g->obs[i];
1036 sum += g->obs[i] * ai;
1037
1038 if (i == g->cmin + (k*nb)/h->K)
1039 h->lambda[k--] = 1 / ((sum/(double) n) - ai);
1040 }
1041
1042 if (! h->fixmix) {
1043 for (k = 0; k < h->K; k++)
1044 h->q[k] = 1 / (double) h->K;
1045 }
1046
1047 return eslOK;
1048 }
1049
1050
1051 /* Function: esl_hxp_FitCompleteBinned()
1052 *
1053 * Purpose: Given a histogram <g> with binned observations, where each
1054 * bin i holds some number of observed samples x with values from
1055 * lower bound l to upper bound u (that is, $l < x \leq u$),
1056 * and given a starting guess <h> for hyperexponential parameters;
1057 *
1058 * Find maximum likelihood parameters <h> by conjugate gradient
1059 * descent, starting from the initial <h> and leaving the
1060 * optimized solution in <h>.
1061 *
1062 * Returns: <eslOK> on success.
1063 *
1064 * Throws: <eslEMEM> on allocation error, and <h> is left in its
1065 * initial state.
1066 */
1067 int
esl_hxp_FitCompleteBinned(ESL_HISTOGRAM * g,ESL_HYPEREXP * h)1068 esl_hxp_FitCompleteBinned(ESL_HISTOGRAM *g, ESL_HYPEREXP *h)
1069 {
1070 struct hyperexp_binned_data data;
1071 double *p = NULL;
1072 double fx;
1073 int i;
1074 int np;
1075 int status;
1076
1077 np = 0;
1078 if (! h->fixmix) np = h->K-1; /* K-1 mix coefficients... */
1079 for (i = 0; i < h->K; i++) /* ...and up to K lambdas free. */
1080 if (! h->fixlambda[i]) np++;
1081
1082 ESL_ALLOC(p, sizeof(double) * np);
1083
1084 /* Copy shared info into the "data" structure */
1085 data.g = g;
1086 data.h = h;
1087
1088 /* From h, create the parameter vector. */
1089 hyperexp_pack_paramvector(p, np, h);
1090
1091 /* Feed it all to the mighty optimizer. */
1092 status = esl_min_ConjugateGradientDescent(NULL, p, np,
1093 &hyperexp_complete_binned_func,
1094 &hyperexp_complete_binned_gradient,
1095 (void *) (&data), &fx, NULL);
1096 if (status != eslOK) goto ERROR;
1097
1098 /* Convert the final parameter vector back to a hyperexponential
1099 */
1100 hyperexp_unpack_paramvector(p, np, h);
1101
1102 free(p);
1103 esl_hyperexp_SortComponents(h);
1104 return eslOK;
1105
1106 ERROR:
1107 free(p);
1108 return status;
1109 }
1110 /*--------------------------- end fitting ----------------------------------*/
1111
1112
1113
1114
1115
1116
1117 /****************************************************************************
1118 * 9. Test driver
1119 ****************************************************************************/
1120 #ifdef eslHYPEREXP_TESTDRIVE
1121 /* Compile:
1122 gcc -g -Wall -I. -I ~/src/easel -L ~/src/easel -o test\
1123 -DeslHYPEREXP_TESTDRIVE esl_hyperexp.c -leasel -lm
1124 */
1125 #include <stdio.h>
1126 #include <stdlib.h>
1127 #include <string.h>
1128
1129 #include "easel.h"
1130 #include "esl_random.h"
1131 #include "esl_histogram.h"
1132 #include "esl_hyperexp.h"
1133
1134 int
main(int argc,char ** argv)1135 main(int argc, char **argv)
1136 {
1137 ESL_HISTOGRAM *h;
1138 ESL_RANDOMNESS *r;
1139 ESL_HYPEREXP *hxp;
1140 ESL_HYPEREXP *ehxp;
1141 int n = 20000;
1142 double binwidth = 0.1;
1143 int i;
1144 double x;
1145 double *data;
1146 int ndata;
1147 int k, ek, mink;
1148 double mindiff, diff;
1149
1150 int opti;
1151 int be_verbose = FALSE;
1152 char *paramfile = NULL;
1153 char *plotfile = NULL;
1154 FILE *pfp = stdout;
1155 int plot_pdf = FALSE;
1156 int plot_logpdf = FALSE;
1157 int plot_cdf = FALSE;
1158 int plot_logcdf = FALSE;
1159 int plot_surv = FALSE;
1160 int plot_logsurv = FALSE;
1161 int xmin_set = FALSE;
1162 double xmin;
1163 int xmax_set = FALSE;
1164 double xmax;
1165 int xstep_set = FALSE;
1166 double xstep;
1167 int do_fixmix = FALSE;
1168 int status;
1169
1170 for (opti = 1; opti < argc && *(argv[opti]) == '-'; opti++)
1171 {
1172 if (strcmp(argv[opti], "-f") == 0) do_fixmix = TRUE;
1173 else if (strcmp(argv[opti], "-i") == 0) paramfile = argv[++opti];
1174 else if (strcmp(argv[opti], "-n") == 0) n = atoi(argv[++opti]);
1175 else if (strcmp(argv[opti], "-o") == 0) plotfile = argv[++opti];
1176 else if (strcmp(argv[opti], "-v") == 0) be_verbose = TRUE;
1177 else if (strcmp(argv[opti], "-w") == 0) binwidth = atof(argv[++opti]);
1178 else if (strcmp(argv[opti], "-C") == 0) plot_cdf = TRUE;
1179 else if (strcmp(argv[opti], "-LC") == 0) plot_logcdf = TRUE;
1180 else if (strcmp(argv[opti], "-P") == 0) plot_pdf = TRUE;
1181 else if (strcmp(argv[opti], "-LP") == 0) plot_logpdf = TRUE;
1182 else if (strcmp(argv[opti], "-S") == 0) plot_surv = TRUE;
1183 else if (strcmp(argv[opti], "-LS") == 0) plot_logsurv = TRUE;
1184 else if (strcmp(argv[opti], "-XL") == 0) { xmin_set = TRUE; xmin = atof(argv[++opti]); }
1185 else if (strcmp(argv[opti], "-XH") == 0) { xmax_set = TRUE; xmax = atof(argv[++opti]); }
1186 else if (strcmp(argv[opti], "-XS") == 0) { xstep_set = TRUE; xstep = atof(argv[++opti]); }
1187 else esl_fatal("bad option");
1188 }
1189
1190 if (paramfile != NULL)
1191 {
1192 status = esl_hyperexp_ReadFile(paramfile, &hxp);
1193 if (status == eslENOTFOUND) esl_fatal("Param file %s not found", paramfile);
1194 else if (status == eslEFORMAT) esl_fatal("Parse failed: param file %s invalid format", paramfile);
1195 else if (status != eslOK) esl_fatal("Unusual failure opening param file %s", paramfile);
1196 }
1197 else
1198 {
1199 hxp = esl_hyperexp_Create(3);
1200 hxp->mu = -2.0;
1201 hxp->q[0] = 0.5; hxp->q[1] = 0.3; hxp->q[2] = 0.2;
1202 hxp->lambda[0] = 1.0; hxp->lambda[1] = 0.3; hxp->lambda[2] = 0.1;
1203 }
1204 if (do_fixmix) esl_hyperexp_FixedUniformMixture(hxp); /* overrides q's above */
1205
1206 if (be_verbose) esl_hyperexp_Dump(stdout, hxp);
1207
1208 r = esl_randomness_Create(42);
1209 h = esl_histogram_CreateFull(hxp->mu, 100., binwidth);
1210 if (plotfile != NULL) {
1211 if ((pfp = fopen(plotfile, "w")) == NULL)
1212 esl_fatal("Failed to open plotfile");
1213 }
1214 if (! xmin_set) xmin = hxp->mu;
1215 if (! xmax_set) xmax = hxp->mu+ 20*(1. / esl_vec_DMin(hxp->lambda, hxp->K));
1216 if (! xstep_set) xstep = 0.1;
1217
1218 for (i = 0; i < n; i++)
1219 {
1220 x = esl_hxp_Sample(r, hxp);
1221 esl_histogram_Add(h, x);
1222 }
1223
1224 esl_histogram_GetData(h, &data, &ndata); /* get sorted data vector */
1225
1226 ehxp = esl_hyperexp_Create(hxp->K);
1227 if (do_fixmix) esl_hyperexp_FixedUniformMixture(ehxp);
1228 esl_hxp_FitGuess(data, ndata, ehxp);
1229 if ( esl_hxp_FitComplete(data, ndata, ehxp) != eslOK) esl_fatal("Failed to fit hyperexponential");
1230
1231 if (be_verbose) esl_hyperexp_Dump(stdout, ehxp);
1232
1233 if (fabs( (ehxp->mu-hxp->mu)/hxp->mu ) > 0.01)
1234 esl_fatal("Error in (complete) fitted mu > 1%\n");
1235 for (ek = 0; ek < ehxp->K; ek++)
1236 { /* try to match each estimated lambda up to a parametric lambda */
1237 mindiff = 1.0;
1238 mink = -1;
1239 for (k = 0; k < hxp->K; k++)
1240 {
1241 diff = fabs( (ehxp->lambda[ek] - hxp->lambda[k]) / hxp->lambda[k]);
1242 if (diff < mindiff) {
1243 mindiff = diff;
1244 mink = k;
1245 }
1246 }
1247 if (mindiff > 0.50)
1248 esl_fatal("Error in (complete) fitted lambda > 50%\n");
1249 if (fabs( (ehxp->q[ek] - hxp->q[mink]) / hxp->q[mink]) > 1.0)
1250 esl_fatal("Error in (complete) fitted q > 2-fold%\n");
1251 }
1252
1253 esl_hxp_FitGuessBinned(h, ehxp);
1254 if ( esl_hxp_FitCompleteBinned(h, ehxp) != eslOK) esl_fatal("Failed to fit binned hyperexponential");
1255 if (be_verbose) esl_hyperexp_Dump(stdout, ehxp);
1256
1257 if (fabs( (ehxp->mu-hxp->mu)/hxp->mu ) > 0.01)
1258 esl_fatal("Error in (binned) fitted mu > 1%\n");
1259 for (ek = 0; ek < ehxp->K; ek++)
1260 { /* try to match each estimated lambda up to a parametric lambda */
1261 mindiff = 1.0;
1262 mink = -1;
1263 for (k = 0; k < hxp->K; k++)
1264 {
1265 diff = fabs( (ehxp->lambda[ek] - hxp->lambda[k]) / hxp->lambda[k]);
1266 if (diff < mindiff) {
1267 mindiff = diff;
1268 mink = k;
1269 }
1270 }
1271 if (mindiff > 0.50)
1272 esl_fatal("Error in (binned) fitted lambda > 50%\n");
1273 if (fabs( (ehxp->q[ek] - hxp->q[mink]) / hxp->q[mink]) > 1.0)
1274 esl_fatal("Error in (binned) fitted q > 2-fold\n");
1275 }
1276
1277 if (plot_pdf) esl_hxp_Plot(pfp, hxp, &esl_hxp_pdf, xmin, xmax, xstep);
1278 if (plot_logpdf) esl_hxp_Plot(pfp, hxp, &esl_hxp_logpdf, xmin, xmax, xstep);
1279 if (plot_cdf) esl_hxp_Plot(pfp, hxp, &esl_hxp_cdf, xmin, xmax, xstep);
1280 if (plot_logcdf) esl_hxp_Plot(pfp, hxp, &esl_hxp_logcdf, xmin, xmax, xstep);
1281 if (plot_surv) esl_hxp_Plot(pfp, hxp, &esl_hxp_surv, xmin, xmax, xstep);
1282 if (plot_logsurv) esl_hxp_Plot(pfp, hxp, &esl_hxp_logsurv, xmin, xmax, xstep);
1283
1284 if (plotfile != NULL) fclose(pfp);
1285 esl_histogram_Destroy(h);
1286 esl_hyperexp_Destroy(hxp);
1287 esl_hyperexp_Destroy(ehxp);
1288 esl_randomness_Destroy(r);
1289 return 0;
1290 }
1291 #endif /*eslHYPEREXP_TESTDRIVE*/
1292
1293 /****************************************************************************
1294 * Example main()
1295 ****************************************************************************/
1296 #ifdef eslHYPEREXP_EXAMPLE
1297 /*::cexcerpt::hyperexp_example::begin::*/
1298 #include <stdio.h>
1299 #include "easel.h"
1300 #include "esl_random.h"
1301 #include "esl_histogram.h"
1302 #include "esl_hyperexp.h"
1303
1304 int
main(int argc,char ** argv)1305 main(int argc, char **argv)
1306 {
1307 ESL_RANDOMNESS *r; /* source of random numbers */
1308 ESL_HISTOGRAM *h; /* histogram to store the data */
1309 ESL_HYPEREXP *hxp; /* hyperexponential to sample from */
1310 ESL_HYPEREXP *ehxp; /* estimated hyperexponential */
1311 double x; /* sampled data point */
1312 int n = 100000; /* number of samples */
1313 double *data;
1314 int ndata;
1315 int i;
1316
1317 hxp = esl_hyperexp_Create(3);
1318 hxp->mu = -2.0;
1319 hxp->q[0] = 0.6; hxp->q[1] = 0.3; hxp->q[2] = 0.1;
1320 hxp->lambda[0] = 1.0; hxp->lambda[1] = 0.3; hxp->lambda[2] = 0.1;
1321
1322 r = esl_randomness_Create(0);
1323 h = esl_histogram_CreateFull(hxp->mu, 100, 1.0);
1324
1325 for (i = 0; i < n; i++)
1326 {
1327 x = esl_hxp_Sample(r, hxp);
1328 esl_histogram_Add(h, x);
1329 }
1330 esl_histogram_GetData(h, &data, &ndata);
1331
1332 /* Plot the empirical (sampled) and expected survivals */
1333 esl_histogram_PlotSurvival(stdout, h);
1334 esl_hxp_Plot(stdout, hxp, &esl_hxp_surv, h->xmin, h->xmax, 0.1);
1335
1336 /* ML fit to complete data, and plot fitted survival curve */
1337 ehxp = esl_hyperexp_Create(3);
1338 esl_hxp_FitGuess(data, ndata, ehxp);
1339 esl_hxp_FitComplete(data, ndata, ehxp);
1340 esl_hxp_Plot(stdout, ehxp, &esl_hxp_surv, h->xmin, h->xmax, 0.1);
1341
1342 /* ML fit to binned data, plot fitted survival curve */
1343 esl_hxp_FitGuessBinned(h, ehxp);
1344 esl_hxp_FitCompleteBinned(h, ehxp);
1345 esl_hxp_Plot(stdout, ehxp, &esl_hxp_surv, h->xmin, h->xmax, 0.1);
1346
1347 esl_randomness_Destroy(r);
1348 esl_histogram_Destroy(h);
1349 esl_hyperexp_Destroy(hxp);
1350 esl_hyperexp_Destroy(ehxp);
1351 return 0;
1352 }
1353 /*::cexcerpt::hyperexp_example::end::*/
1354 #endif /*eslHYPEREXP_EXAMPLE*/
1355
1356
1357