1 /* Statistical routines for mixtures of generalized extreme value distributions.
2 *
3 * Contents:
4 * 1. The ESL_MIXGEV object
5 * 2. Evaluating densities and distributions
6 * 3. Generic API routines: for general interface w/ histogram module
7 * 4. Dumping plots to xmgrace XY format
8 * 5. Sampling
9 * 6. ML fitting to complete data
10 * 7. Example
11 *
12 * Xrefs:
13 * SRE:STL9/139 original implementation
14 *
15 * To-do:
16 * - Fit*() functions should return eslEINVAL on n=0, eslENORESULT
17 * on failure due to small n. Compare esl_gumbel. xref J12/93.
18 * SRE, Wed Nov 27 11:02:14 2013
19 */
20 #include "esl_config.h"
21
22 #include <stdlib.h>
23 #include <math.h>
24 #include <assert.h>
25
26 #include "easel.h"
27 #include "esl_dirichlet.h" /* for uniform sampling of a probability vector */
28 #include "esl_gev.h"
29 #include "esl_minimizer.h"
30 #include "esl_random.h"
31 #include "esl_stats.h"
32 #include "esl_vectorops.h"
33
34 #include "esl_mixgev.h"
35
36
37 /****************************************************************************
38 * 1. The ESL_MIXGEV object
39 ****************************************************************************/
40
41 /* Function: esl_mixgev_Create()
42 *
43 * Purpose: Creates an object to hold parameters for a <K>-component
44 * mixture of generalized extreme value distributions.
45 *
46 * Parameters in the object are initialized ($q_k =
47 * \frac{1}{K}$, $\mu_k = 0$, $\lambda_k = 1$, $\alpha_k =
48 * 0$), but the caller will want to set these according to
49 * its own purposes.
50 *
51 * After an object is created, the caller can constrain any
52 * of the components to be a Gumbel (that is, constrain
53 * $\alpha_k = 0$ by calling <esl_mixgev_ForceGumbel(obj,
54 * k)>.
55 *
56 * Args: K - number of components in the mixture
57 *
58 * Returns: ptr to newly allocated/initialized <ESL_MIXGEV> object.
59 *
60 * Throws: <NULL> on allocation failure.
61 */
62 ESL_MIXGEV *
esl_mixgev_Create(int K)63 esl_mixgev_Create(int K)
64 {
65 ESL_MIXGEV *mg = NULL;
66 int k;
67 int status;
68
69 ESL_ALLOC(mg, sizeof(ESL_MIXGEV));
70 mg->q = mg->mu = mg->lambda = mg->alpha = mg->wrk = NULL;
71 mg->isgumbel = NULL;
72 mg->K = K;
73
74 ESL_ALLOC(mg->q, sizeof(double) * K);
75 ESL_ALLOC(mg->mu, sizeof(double) * K);
76 ESL_ALLOC(mg->lambda, sizeof(double) * K);
77 ESL_ALLOC(mg->alpha, sizeof(double) * K);
78 ESL_ALLOC(mg->wrk, sizeof(double) * K);
79 ESL_ALLOC(mg->isgumbel, sizeof(int) * K);
80
81 for (k = 0; k < K; k++)
82 {
83 mg->q[k] = 1. / (double) K;
84 mg->mu[k] = 0.;
85 mg->lambda[k] = 1.;
86 mg->alpha[k] = 0.;
87 mg->isgumbel[k] = FALSE;
88 }
89 return mg;
90
91 ERROR:
92 esl_mixgev_Destroy(mg);
93 return NULL;
94 }
95
96 /* Function: esl_mixgev_Destroy()
97 *
98 * Purpose: Deallocates the mixture GEV parameter object <mg>.
99 *
100 * Args: mg - ptr to the object to be deallocated.
101 *
102 * Returns: (void)
103 */
104 void
esl_mixgev_Destroy(ESL_MIXGEV * mg)105 esl_mixgev_Destroy(ESL_MIXGEV *mg)
106 {
107 if (mg == NULL) return;
108
109 if (mg->q != NULL) free(mg->q);
110 if (mg->mu != NULL) free(mg->mu);
111 if (mg->lambda != NULL) free(mg->lambda);
112 if (mg->alpha != NULL) free(mg->alpha);
113 if (mg->wrk != NULL) free(mg->wrk);
114 if (mg->isgumbel != NULL) free(mg->isgumbel);
115
116 free(mg);
117 }
118
119 /* Function: esl_mixgev_Copy()
120 *
121 * Purpose: Makes a copy of the mixture GEV parameter object <src>
122 * in <dest>. Caller must have already allocated <dest> to have
123 * (at least) the same number of components as <src>.
124 *
125 * Args: src - object to be copied
126 * dest - allocated object to copy <src> into
127 *
128 * Returns: <eslOK> on success.
129 *
130 * Throws: <eslEINCOMPAT> if <dest> isn't allocated with enough
131 * components to hold a copy of <src>.
132 */
133 int
esl_mixgev_Copy(ESL_MIXGEV * src,ESL_MIXGEV * dest)134 esl_mixgev_Copy(ESL_MIXGEV *src, ESL_MIXGEV *dest)
135 {
136 int k;
137
138 if (dest->K < src->K)
139 ESL_EXCEPTION(eslEINCOMPAT, "mixture GEV too small to copy into");
140
141 for (k = 0; k < src->K; k++)
142 {
143 dest->q[k] = src->q[k];
144 dest->mu[k] = src->mu[k];
145 dest->lambda[k] = src->lambda[k];
146 dest->alpha[k] = src->alpha[k];
147 dest->isgumbel[k] = src->isgumbel[k];
148 }
149 dest->K = src->K;
150 return eslOK;
151 }
152
153 /* Function: esl_mixgev_ForceGumbel()
154 *
155 * Purpose: Constrain component <which> of the mixture GEV <mg>
156 * to be a Gumbel (that is, constrain $\alpha=0$ for
157 * that component. This constraint will be obeyed by
158 * any subsequent calls to parameter fitting routines.
159 *
160 * Normally would be called just after creating the <mg>
161 * object, as part of its configuration before trying to
162 * fit some observed data to a mixture GEV.
163 *
164 * Args: mg - mixture GEV object being configured
165 * which - which component to constrain to a Gumbel
166 *
167 * Returns: <eslOK> on success.
168 */
169 int
esl_mixgev_ForceGumbel(ESL_MIXGEV * mg,int which)170 esl_mixgev_ForceGumbel(ESL_MIXGEV *mg, int which)
171 {
172 mg->isgumbel[which] = TRUE;
173 return eslOK;
174 }
175 /*----------------- end ESL_MIXGEV object maintenance ----------------------*/
176
177
178
179
180 /****************************************************************************
181 * 2. Evaluating densities and distributions
182 ****************************************************************************/
183
184 /* Function: esl_mixgev_pdf()
185 *
186 * Purpose: Returns the probability density function $P(X=x)$ for
187 * quantile <x>, given mixture GEV parameters <mg>.
188 */
189 double
esl_mixgev_pdf(double x,ESL_MIXGEV * mg)190 esl_mixgev_pdf(double x, ESL_MIXGEV *mg)
191 {
192 double pdf = 0.;
193 int k;
194
195 for (k = 0; k < mg->K; k++)
196 pdf += mg->q[k] * esl_gev_pdf(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
197 return pdf;
198 }
199
200 /* Function: esl_mixgev_logpdf()
201 *
202 * Purpose: Returns the log of the PDF ($\log P(X=x)$) for quantile <x>,
203 * given mixture GEV parameters <mg>.
204 */
205 double
esl_mixgev_logpdf(double x,ESL_MIXGEV * mg)206 esl_mixgev_logpdf(double x, ESL_MIXGEV *mg)
207 {
208 int k;
209 for (k = 0; k < mg->K; k++)
210 if (mg->q[k] == 0.0)
211 mg->wrk[k] = -eslINFINITY;
212 else
213 mg->wrk[k] = log(mg->q[k]) +
214 esl_gev_logpdf(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
215
216 return esl_vec_DLogSum(mg->wrk, mg->K);
217 }
218
219 /* Function: esl_mixgev_cdf()
220 *
221 * Purpose: Returns the cumulative distribution function $P(X \leq x)$
222 * for quantile <x>, given mixture GEV parameters <mg>.
223 */
224 double
esl_mixgev_cdf(double x,ESL_MIXGEV * mg)225 esl_mixgev_cdf(double x, ESL_MIXGEV *mg)
226 {
227 double cdf = 0.;
228 int k;
229
230 for (k = 0; k < mg->K; k++)
231 cdf += mg->q[k] * esl_gev_cdf(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
232 return cdf;
233 }
234
235 /* Function: esl_mixgev_logcdf()
236 *
237 * Purpose: Returns the log of the CDF $\log P(X \leq x)$
238 * for quantile <x>, given mixture GEV parameters <mg>.
239 */
240 double
esl_mixgev_logcdf(double x,ESL_MIXGEV * mg)241 esl_mixgev_logcdf(double x, ESL_MIXGEV *mg)
242 {
243 int k;
244
245 for (k = 0; k < mg->K; k++)
246 if (mg->q[k] == 0.0)
247 mg->wrk[k] = -eslINFINITY;
248 else
249 mg->wrk[k] = log(mg->q[k]) +
250 esl_gev_logcdf(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
251
252 return esl_vec_DLogSum(mg->wrk, mg->K);
253 }
254
255 /* Function: esl_mixgev_surv()
256 *
257 * Purpose: Returns the survivor function $P(X > x)$ (1-CDF)
258 * for quantile <x>, given mixture GEV parameters <mg>.
259 */
260 double
esl_mixgev_surv(double x,ESL_MIXGEV * mg)261 esl_mixgev_surv(double x, ESL_MIXGEV *mg)
262 {
263 double srv = 0.;
264 int k;
265
266 for (k = 0; k < mg->K; k++)
267 srv += mg->q[k] * esl_gev_surv(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
268 return srv;
269 }
270
271 /* Function: esl_mixgev_logsurv()
272 *
273 * Purpose: Returns the log survivor function $\log P(X > x)$ (log(1-CDF))
274 * for quantile <x>, given mixture GEV parameters <mg>.
275 */
276 double
esl_mixgev_logsurv(double x,ESL_MIXGEV * mg)277 esl_mixgev_logsurv(double x, ESL_MIXGEV *mg)
278 {
279 int k;
280 for (k = 0; k < mg->K; k++)
281 {
282 mg->wrk[k] = log(mg->q[k]);
283 mg->wrk[k] += esl_gev_logsurv(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
284 }
285 return esl_vec_DLogSum(mg->wrk, mg->K);
286 }
287
288 /* Function: esl_mixgev_invcdf()
289 *
290 * Purpose: Calculates the inverse CDF for a mixture GEV <mg>,
291 * returning the quantile <x> at which the CDF is <p>,
292 * where $0 < p < 1$.
293 *
294 * The inverse CDF of a mixture model has no analytical
295 * expression as far as I'm aware. The calculation here is
296 * a brute force bisection search in <x> using the CDF
297 * function. It will suffice for a small number of calls
298 * (for plotting applications, for example), but beware, it is not
299 * efficient.
300 */
301 double
esl_mixgev_invcdf(double p,ESL_MIXGEV * mg)302 esl_mixgev_invcdf(double p, ESL_MIXGEV *mg)
303 {
304 double x1, x2, xm; /* low, high guesses at x */
305 double f1, f2, fm;
306 double tol = 1e-6;
307
308 x2 = esl_vec_DMin(mg->mu, mg->K);
309 x1 = x2 - 1.;
310 do { /* bracket, left side */
311 x1 = x1 + 2.*(x2-x1);
312 f1 = esl_mixgev_cdf(x1, mg);
313 } while (f1 > p);
314 do { /* bracket, right side */
315 x2 = x2 + 2.*(x2-x1);
316 f2 = esl_mixgev_cdf(x2, mg);
317 } while (f2 < p);
318
319 do { /* bisection */
320 xm = (x1+x2) / 2.;
321 fm = esl_mixgev_cdf(xm, mg);
322
323 if (fm > p) x2 = xm;
324 else if (fm < p) x1 = xm;
325 else return xm; /* unlikely case of fm==p */
326 } while ( (x2-x1)/(x1+x2+1e-9) > tol);
327
328 xm = (x1+x2) / 2.;
329 return xm;
330 }
331 /*-------------------- end densities & distributions ------------------------*/
332
333
334
335
336 /****************************************************************************
337 * 3. Generic API routines: for general interface w/ histogram module
338 ****************************************************************************/
339
340 /* Function: esl_mixgev_generic_pdf()
341 *
342 * Purpose: Generic-API wrapper around <esl_mixgev_pdf()>, taking
343 * a void ptr to a <ESL_MIXGEV> parameter structure.
344 */
345 double
esl_mixgev_generic_pdf(double x,void * params)346 esl_mixgev_generic_pdf(double x, void *params)
347 {
348 ESL_MIXGEV *mg = (ESL_MIXGEV *) params;
349 return esl_mixgev_pdf(x, mg);
350 }
351
352 /* Function: esl_mixgev_generic_cdf()
353 *
354 * Purpose: Generic-API wrapper around <esl_mixgev_cdf()>, taking
355 * a void ptr to a <ESL_MIXGEV> parameter structure.
356 */
357 double
esl_mixgev_generic_cdf(double x,void * params)358 esl_mixgev_generic_cdf(double x, void *params)
359 {
360 ESL_MIXGEV *mg = (ESL_MIXGEV *) params;
361 return esl_mixgev_cdf(x, mg);
362 }
363
364 /* Function: esl_mixgev_generic_surv()
365 *
366 * Purpose: Generic-API wrapper around <esl_mixgev_surv()>, taking
367 * a void ptr to a <ESL_MIXGEV> parameter structure.
368 */
369 double
esl_mixgev_generic_surv(double x,void * params)370 esl_mixgev_generic_surv(double x, void *params)
371 {
372 ESL_MIXGEV *mg = (ESL_MIXGEV *) params;
373 return esl_mixgev_surv(x, mg);
374 }
375
376 /* Function: esl_mixgev_generic_invcdf()
377 *
378 * Purpose: Generic-API wrapper around <esl_mixgev_invcdf()>, taking
379 * a void ptr to a <ESL_MIXGEV> parameter structure.
380 */
381 double
esl_mixgev_generic_invcdf(double p,void * params)382 esl_mixgev_generic_invcdf(double p, void *params)
383 {
384 ESL_MIXGEV *mg = (ESL_MIXGEV *) params;
385 return esl_mixgev_invcdf(p, mg);
386 }
387 /*------------------------ end generic API ---------------------------------*/
388
389
390
391
392 /****************************************************************************
393 * 4. Dumping plots to xmgrace XY format
394 ****************************************************************************/
395
396 /* Function: esl_mixgev_Plot()
397 *
398 * Purpose: Plot some function <func> (for instance, <esl_mixgev_pdf()>)
399 * for mixture GEV parameters <mg>, for a range of
400 * quantiles x from <xmin> to <xmax> in steps of <xstep>;
401 * output to an open stream <fp> in xmgrace XY input format.
402 *
403 * Returns: <eslOK> on success.
404 *
405 * Throws: <eslEWRITE> on any system write error.
406 */
407 int
esl_mixgev_Plot(FILE * fp,ESL_MIXGEV * mg,double (* func)(double x,ESL_MIXGEV * mg),double xmin,double xmax,double xstep)408 esl_mixgev_Plot(FILE *fp, ESL_MIXGEV *mg,
409 double (*func)(double x, ESL_MIXGEV *mg),
410 double xmin, double xmax, double xstep)
411 {
412 double x;
413 for (x = xmin; x <= xmax; x += xstep)
414 if (fprintf(fp, "%f\t%g\n", x, (*func)(x, mg)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "mixgev plot write failed");
415 if (fprintf(fp, "&\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "mixgev plot write failed");
416 return eslOK;
417 }
418 /*-------------------- end plot dumping routines ---------------------------*/
419
420
421
422
423
424 /****************************************************************************
425 * 5. Sampling
426 ****************************************************************************/
427
428 /* Function: esl_mixgev_Sample()
429 *
430 * Purpose: Sample a random variate x from a mixture GEV <mg>,
431 * given random number source <r>.
432 */
433 double
esl_mixgev_Sample(ESL_RANDOMNESS * r,ESL_MIXGEV * mg)434 esl_mixgev_Sample(ESL_RANDOMNESS *r, ESL_MIXGEV *mg)
435 {
436 int k;
437 k = esl_rnd_DChoose(r, mg->q, mg->K);
438 return esl_gev_Sample(r, mg->mu[k], mg->lambda[k], mg->alpha[k]);
439 }
440
441 /*--------------------------- end sampling ---------------------------------*/
442
443
444
445
446
447
448 /****************************************************************************
449 * 6. ML fitting to complete data
450 ****************************************************************************/
451
452 struct mixgev_data {
453 double *x;
454 int n;
455 ESL_MIXGEV *mg;
456 };
457
458 /* Given mixture GEV parameters in <mg>;
459 * do appropriate c.o.v.'s to unconstrained real parameters
460 * and fill in the packed parameter vector <p>.
461 *
462 * First K-1 are Q_1..Q_K-1 mixture coefficient parameters; Q_0 implicitly 0;
463 * cov is q_k = e^{Q_k} / \sum_j e^{Q_j},
464 * so Q_k = log(q_k) - log(q_0).
465 * Then K components: mu, lambda, optional alpha;
466 * mu, alpha are already unconstrained real;
467 * lambda cov is lambda = e^w, w = log(lambda).
468 */
469 static void
mixgev_pack_paramvector(double * p,int np,ESL_MIXGEV * mg)470 mixgev_pack_paramvector(double *p, int np, ESL_MIXGEV *mg)
471 {
472 int i; /* counter in parameter vector p */
473 int k; /* counter in mixture components */
474 double z; /* tmp variable */
475
476 /* mixture coefficients */
477 z = log(mg->q[0]);
478 i = 0;
479 for (k = 1; k < mg->K; k++)
480 p[i++] = log(mg->q[k]) - z;
481
482 /* gev parameters */
483 for (k = 0; k < mg->K; k++)
484 {
485 p[i++] = mg->mu[k];
486 p[i++] = log(mg->lambda[k]);
487 if (! mg->isgumbel[k]) p[i++] = mg->alpha[k];
488 }
489 /* assert(i==np) in debugging, if you want */
490 }
491
492 /* Same as above but in reverse: given parameter vector <p>,
493 * do appropriate c.o.v. back to desired parameter space, and
494 * fill in the mixture GEV structure <mg>.
495 */
496 static void
mixgev_unpack_paramvector(double * p,int np,ESL_MIXGEV * mg)497 mixgev_unpack_paramvector(double *p, int np, ESL_MIXGEV *mg)
498 {
499 int i; /* counter in parameter vector p */
500 int k; /* counter in mixture components */
501 double z; /* tmp variable */
502
503 /* Fetch the params in their c.o.v. space first
504 */
505 i = 0;
506 mg->q[0] = 0; /* implicitly */
507 for (k = 1; k < mg->K; k++)
508 mg->q[k] = p[i++];
509 for (k = 0; k < mg->K; k++)
510 {
511 mg->mu[k] = p[i++];
512 mg->lambda[k] = p[i++];
513 if (!mg->isgumbel[k]) mg->alpha[k] = p[i++];
514 else mg->alpha[k] = 0.;
515 }
516 assert(i==np);
517
518 /* Convert mix coefficients back to probabilities;
519 * their c.o.v. is q_k = e^{Q_k} / \sum_k e^{Q_k}
520 * which rearranges to exp(Q_k - log[\sum_k e^Q_k]),
521 * and we have the DLogSum() function to compute the log sum.
522 */
523 z = esl_vec_DLogSum(mg->q, mg->K);
524 for (k = 0; k < mg->K; k++)
525 mg->q[k] = exp(mg->q[k] - z);
526
527 /* lambda c.o.v. is \lambda = e^w
528 */
529 for (k = 0; k < mg->K; k++)
530 mg->lambda[k] = exp(mg->lambda[k]);
531 }
532
533 static double
mixgev_complete_func(double * p,int np,void * dptr)534 mixgev_complete_func(double *p, int np, void *dptr)
535 {
536 struct mixgev_data *data = (struct mixgev_data *) dptr;
537 ESL_MIXGEV *mg = data->mg;
538 int i;
539 double logL;
540
541 /* Use the current parameter vector (in its unconstrained
542 * real c.o.v. space) to deduce what the current mixture GEV
543 * parameters are:
544 */
545 mixgev_unpack_paramvector(p, np, mg);
546
547 /* Calculate the log likelihood:
548 */
549 logL = 0;
550 for (i = 0; i < data->n; i++)
551 logL += esl_mixgev_logpdf(data->x[i], mg);
552
553 /* return the NLL
554 */
555 return -logL;
556 }
557
558
559 /* Function: esl_mixgev_FitGuess()
560 *
561 * Purpose: Make initial randomized guesses at the parameters
562 * of mixture GEV <mg>, using random number generator
563 * <r> and observed data consisting of <n> values
564 * <x[0..n-1]>. This guess is a suitable starting
565 * point for a parameter optimization routine, such
566 * as <esl_mixgev_FitComplete()>.
567 *
568 * Specifically, we estimate one 'central' guess
569 * for a single Gumbel fit to the data, using the
570 * method of moments. Then we add $\pm 10\%$ to that 'central'
571 * $\mu$ and $\lambda$ to get each component
572 * $\mu_i$ and $\lambda_i$. The $\alpha_i$ parameters
573 * are generated by sampling uniformly from $-0.1..0.1$.
574 * Mixture coefficients $q_i$ are sampled uniformly.
575 *
576 * Args: r - randomness source
577 * x - vector of observed data values to fit, 0..n-1
578 * n - number of values in <x>
579 * mg - mixture GEV to put guessed params into
580 *
581 * Returns: <eslOK> on success.
582 */
583 int
esl_mixgev_FitGuess(ESL_RANDOMNESS * r,double * x,int n,ESL_MIXGEV * mg)584 esl_mixgev_FitGuess(ESL_RANDOMNESS *r, double *x, int n, ESL_MIXGEV *mg)
585 {
586 double mean, variance;
587 double mu, lambda;
588 int k;
589
590 esl_stats_DMean(x, n, &mean, &variance);
591 lambda = eslCONST_PI / sqrt(6.*variance);
592 mu = mean - 0.57722/lambda;
593
594 esl_dirichlet_DSampleUniform(r, mg->K, mg->q);
595 for (k = 0; k < mg->K; k++)
596 {
597 mg->mu[k] = mu + 0.2 * mu * (esl_random(r) - 0.5);
598 mg->lambda[k] = lambda + 0.2 * lambda * (esl_random(r) - 0.5);
599 if (mg->isgumbel[k]) mg->alpha[k] = 0.;
600 else mg->alpha[k] = 0.2 * (esl_random(r) - 0.5);
601 }
602 return eslOK;
603 }
604
605
606 /* Function: esl_mixgev_FitComplete()
607 *
608 * Purpose: Given <n> observed data values <x[0..n-1]>, and
609 * an initial guess at a mixture GEV fit to those data
610 * <mg>, use conjugate gradient descent to perform
611 * a locally optimal maximum likelihood mixture
612 * GEV parameter fit to the data.
613 *
614 * To obtain a reasonable initial guess for <mg>,
615 * see <esl_mixgev_FitGuess()>.
616 *
617 * Args: x - observed data, <x[0..n-1]>.
618 * n - number of samples in <x>
619 * mg - mixture GEV to estimate, w/ params set to
620 * an initial guess.
621 *
622 * Returns: <eslOK> on success, and <mg> contains local
623 * ML estimate for mixture GEV parameters.
624 *
625 * Throws: <eslEMEM> on allocation error, and <mg> is unchanged
626 * from its initial state.
627 */
628 int
esl_mixgev_FitComplete(double * x,int n,ESL_MIXGEV * mg)629 esl_mixgev_FitComplete(double *x, int n, ESL_MIXGEV *mg)
630 {
631 ESL_MIN_CFG *cfg = NULL;
632 struct mixgev_data data;
633 int status;
634 double *p = NULL;
635 int np;
636 double fx;
637 int k;
638 int i;
639
640 /* Determine number of free parameters and allocate */
641 np = mg->K-1; /* K-1 mix coefficients free */
642 for (k = 0; k < mg->K; k++)
643 np += (mg->isgumbel[k])? 2 : 3;
644 ESL_ALLOC(p, sizeof(double) * np);
645
646 /* Customize the minimizer */
647 cfg = esl_min_cfg_Create(np);
648 cfg->cg_rtol = 1e-6;
649 i = 0;
650 for (k = 1; k < mg->K; k++) cfg->u[i++] = 1.0;
651 for (k = 0; k < mg->K; k++)
652 {
653 cfg->u[i++] = 1.0;
654 cfg->u[i++] = 1.0;
655 if (! mg->isgumbel[k]) cfg->u[i++] = 0.02;
656 }
657 ESL_DASSERT1( (np == i) );
658
659
660 /* Copy shared info into the "data" structure
661 */
662 data.x = x;
663 data.n = n;
664 data.mg = mg;
665
666 /* From mg, create the parameter vector.
667 */
668 mixgev_pack_paramvector(p, np, mg);
669
670 /* Feed it all to the mighty optimizer. */
671 status = esl_min_ConjugateGradientDescent(cfg, p, np, &mixgev_complete_func, NULL,
672 (void *) (&data), &fx, NULL);
673 if (status != eslOK) goto ERROR;
674
675 /* Convert the final parameter vector back to a mixture GEV
676 */
677 mixgev_unpack_paramvector(p, np, mg);
678
679 free(p);
680 esl_min_cfg_Destroy(cfg);
681 return eslOK;
682
683 ERROR:
684 free(p);
685 esl_min_cfg_Destroy(cfg);
686 return status;
687 }
688 /*--------------------------- end fitting ----------------------------------*/
689
690
691
692
693 /****************************************************************************
694 * Example main()
695 ****************************************************************************/
696
697 #ifdef eslMIXGEV_EXAMPLE
698 /*::cexcerpt::mixgev_example::begin::*/
699 /* compile:
700 gcc -g -Wall -I. -L. -o example -DeslMIXGEV_EXAMPLE esl_mixgev.c -leasel -lm
701 * run: ./example
702 */
703 #include <stdio.h>
704 #include <stdlib.h>
705 #include "easel.h"
706 #include "esl_mixgev.h"
707 #include "esl_random.h"
708
709 int
main(int argc,char ** argv)710 main(int argc, char **argv)
711 {
712 FILE *fp;
713 ESL_RANDOMNESS *r; /* source of random numbers */
714 ESL_MIXGEV *mg; /* mixture GEV to sample from */
715 ESL_MIXGEV *emg; /* estimated mixture GEV */
716 double *x; /* sampled dataset */
717 int n = 100000; /* number of samples */
718 int i;
719 int k;
720 double nll;
721 double min, max;
722
723 r = esl_randomness_Create(42);
724 mg = esl_mixgev_Create(2);
725 mg->q[0] = 0.85; mg->q[1] = 0.15;
726 mg->mu[0] = -2.72; mg->mu[1] = -2.0;
727 mg->lambda[0] = 2.5; mg->lambda[1] = 1.0;
728 mg->alpha[0] = 0.; mg->alpha[1] = 0.09;
729
730 nll = 0.;
731 min = 99999;
732 max = -99999;
733
734 x = malloc(sizeof(double) * n);
735 for (i = 0; i < n; i++)
736 {
737 x[i] = esl_mixgev_Sample(r, mg);
738 nll -= esl_mixgev_logpdf(x[i], mg);
739 if (x[i] > max) max = x[i];
740 if (x[i] < min) min = x[i];
741 }
742 printf("NLL of known mixGEV: %g\n", nll);
743
744 /* Dump the raw data samples to an R file.
745 */
746 fp = fopen("data.out", "w");
747 fprintf(fp, " val\n");
748 for (i = 0; i < n; i++)
749 fprintf(fp, "%d %f\n", i+1, x[i]);
750 fclose(fp);
751
752 emg = esl_mixgev_Create(2);
753 esl_mixgev_FitGuess(r, x, n, emg);
754 /* esl_mixgev_Copy(mg, emg); */
755 esl_mixgev_ForceGumbel(emg, 0);
756 esl_mixgev_FitComplete(x, n, emg);
757
758 printf("Component q mu lambda alpha\n");
759 for (k=0; k < 2; k++)
760 printf("%d\t%7.4f\t%7.2f\t%7.4f\t%7.4f\n",
761 k, emg->q[k], emg->mu[k], emg->lambda[k], emg->alpha[k]);
762
763 nll = 0.;
764 for (i = 0; i < n; i++)
765 nll -= esl_mixgev_logpdf(x[i], emg);
766 printf("NLL of fitted mixGEV: %g\n", nll);
767
768 /* Dump some R commands for showing these distributions
769 */
770 printf("library(ismev)\n");
771 printf("library(evd)\n");
772
773 printf("d <- read.table(\"data.out\")$val\n");
774 printf("plot(density(d,bw=0.2), log=\"y\")\n");
775 printf("min <- %f\n", min);
776 printf("max <- %f\n", max);
777 printf("xax <- seq(min-2, max+5, by=0.1)\n");
778 printf("cc <- xax - xax\n");
779 printf("zc <- xax - xax\n");
780 for (k = 0; k < mg->K; k++)
781 {
782 printf("c%d <- %f * dgev(xax, %f, %f, %f)\n",
783 k, mg->q[k], mg->mu[k], 1./mg->lambda[k], mg->alpha[k]);
784 printf("cc <- cc + c%d\n", k);
785 printf("lines(xax, c%d, col=\"blue\")\n", k);
786 }
787 for (k = 0; k < emg->K; k++)
788 {
789 printf("z%d <- %f * dgev(xax, %f, %f, %f)\n",
790 k, emg->q[k], emg->mu[k], 1./emg->lambda[k], emg->alpha[k]);
791 printf("zc <- zc + z%d\n", k);
792 printf("lines(xax, z%d, col=\"blue\")\n", k);
793 }
794 printf("lines(xax, cc, col=\"green\")\n");
795 printf("lines(xax, zc, col=\"red\")\n");
796
797 esl_mixgev_Destroy(mg);
798 esl_mixgev_Destroy(emg);
799 esl_randomness_Destroy(r);
800 free(x);
801 return 0;
802 }
803 /*::cexcerpt::mixgev_example::end::*/
804 #endif /*eslMIXGEV_EXAMPLE*/
805
806