1 /* Statistical routines for mixtures of generalized extreme value distributions.
2  *
3  * Contents:
4  *   1. The ESL_MIXGEV object
5  *   2. Evaluating densities and distributions
6  *   3. Generic API routines: for general interface w/ histogram module
7  *   4. Dumping plots to xmgrace XY format
8  *   5. Sampling
9  *   6. ML fitting to complete data
10  *   7. Example
11  *
12  * Xrefs:
13  *  SRE:STL9/139  original implementation
14  *
15  * To-do:
16  *   - Fit*() functions should return eslEINVAL on n=0, eslENORESULT
17  *     on failure due to small n. Compare esl_gumbel. xref J12/93.
18  *     SRE, Wed Nov 27 11:02:14 2013
19  */
20 #include "esl_config.h"
21 
22 #include <stdlib.h>
23 #include <math.h>
24 #include <assert.h>
25 
26 #include "easel.h"
27 #include "esl_dirichlet.h"  /* for uniform sampling of a probability vector */
28 #include "esl_gev.h"
29 #include "esl_minimizer.h"
30 #include "esl_random.h"
31 #include "esl_stats.h"
32 #include "esl_vectorops.h"
33 
34 #include "esl_mixgev.h"
35 
36 
37 /****************************************************************************
38  * 1. The ESL_MIXGEV object
39  ****************************************************************************/
40 
41 /* Function:  esl_mixgev_Create()
42  *
43  * Purpose:   Creates an object to hold parameters for a <K>-component
44  *            mixture of generalized extreme value distributions.
45  *
46  *            Parameters in the object are initialized ($q_k =
47  *            \frac{1}{K}$, $\mu_k = 0$, $\lambda_k = 1$, $\alpha_k =
48  *            0$), but the caller will want to set these according to
49  *            its own purposes.
50  *
51  *            After an object is created, the caller can constrain any
52  *            of the components to be a Gumbel (that is, constrain
53  *            $\alpha_k = 0$ by calling <esl_mixgev_ForceGumbel(obj,
54  *            k)>.
55  *
56  * Args:      K  - number of components in the mixture
57  *
58  * Returns:   ptr to newly allocated/initialized <ESL_MIXGEV> object.
59  *
60  * Throws:    <NULL> on allocation failure.
61  */
62 ESL_MIXGEV *
esl_mixgev_Create(int K)63 esl_mixgev_Create(int K)
64 {
65   ESL_MIXGEV *mg = NULL;
66   int         k;
67   int         status;
68 
69   ESL_ALLOC(mg, sizeof(ESL_MIXGEV));
70   mg->q = mg->mu = mg->lambda = mg->alpha = mg->wrk = NULL;
71   mg->isgumbel = NULL;
72   mg->K = K;
73 
74   ESL_ALLOC(mg->q,        sizeof(double) * K);
75   ESL_ALLOC(mg->mu,       sizeof(double) * K);
76   ESL_ALLOC(mg->lambda,   sizeof(double) * K);
77   ESL_ALLOC(mg->alpha,    sizeof(double) * K);
78   ESL_ALLOC(mg->wrk,      sizeof(double) * K);
79   ESL_ALLOC(mg->isgumbel, sizeof(int)    * K);
80 
81   for (k = 0; k < K; k++)
82     {
83       mg->q[k]        = 1. / (double) K;
84       mg->mu[k]       = 0.;
85       mg->lambda[k]   = 1.;
86       mg->alpha[k]    = 0.;
87       mg->isgumbel[k] = FALSE;
88     }
89   return mg;
90 
91  ERROR:
92   esl_mixgev_Destroy(mg);
93   return NULL;
94 }
95 
96 /* Function:  esl_mixgev_Destroy()
97  *
98  * Purpose:   Deallocates the mixture GEV parameter object <mg>.
99  *
100  * Args:      mg  - ptr to the object to be deallocated.
101  *
102  * Returns:   (void)
103  */
104 void
esl_mixgev_Destroy(ESL_MIXGEV * mg)105 esl_mixgev_Destroy(ESL_MIXGEV *mg)
106 {
107   if (mg == NULL) return;
108 
109   if (mg->q        != NULL) free(mg->q);
110   if (mg->mu       != NULL) free(mg->mu);
111   if (mg->lambda   != NULL) free(mg->lambda);
112   if (mg->alpha    != NULL) free(mg->alpha);
113   if (mg->wrk      != NULL) free(mg->wrk);
114   if (mg->isgumbel != NULL) free(mg->isgumbel);
115 
116   free(mg);
117 }
118 
119 /* Function:  esl_mixgev_Copy()
120  *
121  * Purpose:   Makes a copy of the mixture GEV parameter object <src>
122  *            in <dest>. Caller must have already allocated <dest> to have
123  *            (at least) the same number of components as <src>.
124  *
125  * Args:      src   - object to be copied
126  *            dest  - allocated object to copy <src> into
127  *
128  * Returns:   <eslOK> on success.
129  *
130  * Throws:    <eslEINCOMPAT> if <dest> isn't allocated with enough
131  *            components to hold a copy of <src>.
132  */
133 int
esl_mixgev_Copy(ESL_MIXGEV * src,ESL_MIXGEV * dest)134 esl_mixgev_Copy(ESL_MIXGEV *src, ESL_MIXGEV *dest)
135 {
136   int k;
137 
138   if (dest->K < src->K)
139     ESL_EXCEPTION(eslEINCOMPAT, "mixture GEV too small to copy into");
140 
141   for (k = 0; k < src->K; k++)
142     {
143       dest->q[k]        = src->q[k];
144       dest->mu[k]       = src->mu[k];
145       dest->lambda[k]   = src->lambda[k];
146       dest->alpha[k]    = src->alpha[k];
147       dest->isgumbel[k] = src->isgumbel[k];
148     }
149   dest->K = src->K;
150   return eslOK;
151 }
152 
153 /* Function:  esl_mixgev_ForceGumbel()
154  *
155  * Purpose:   Constrain component <which> of the mixture GEV <mg>
156  *            to be a Gumbel (that is, constrain $\alpha=0$ for
157  *            that component. This constraint will be obeyed by
158  *            any subsequent calls to parameter fitting routines.
159  *
160  *            Normally would be called just after creating the <mg>
161  *            object, as part of its configuration before trying to
162  *            fit some observed data to a mixture GEV.
163  *
164  * Args:      mg    - mixture GEV object being configured
165  *            which - which component to constrain to a Gumbel
166  *
167  * Returns:   <eslOK> on success.
168  */
169 int
esl_mixgev_ForceGumbel(ESL_MIXGEV * mg,int which)170 esl_mixgev_ForceGumbel(ESL_MIXGEV *mg, int which)
171 {
172   mg->isgumbel[which] = TRUE;
173   return eslOK;
174 }
175 /*----------------- end ESL_MIXGEV object maintenance ----------------------*/
176 
177 
178 
179 
180 /****************************************************************************
181  * 2. Evaluating densities and distributions
182  ****************************************************************************/
183 
184 /* Function:  esl_mixgev_pdf()
185  *
186  * Purpose:   Returns the probability density function $P(X=x)$ for
187  *            quantile <x>, given mixture GEV parameters <mg>.
188  */
189 double
esl_mixgev_pdf(double x,ESL_MIXGEV * mg)190 esl_mixgev_pdf(double x, ESL_MIXGEV *mg)
191 {
192   double pdf = 0.;
193   int    k;
194 
195   for (k = 0; k < mg->K; k++)
196     pdf += mg->q[k] * esl_gev_pdf(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
197   return pdf;
198 }
199 
200 /* Function:  esl_mixgev_logpdf()
201  *
202  * Purpose:   Returns the log of the PDF ($\log P(X=x)$) for quantile <x>,
203  *            given mixture GEV parameters <mg>.
204  */
205 double
esl_mixgev_logpdf(double x,ESL_MIXGEV * mg)206 esl_mixgev_logpdf(double x, ESL_MIXGEV *mg)
207 {
208   int k;
209   for (k = 0; k < mg->K; k++)
210     if (mg->q[k] == 0.0)
211       mg->wrk[k] = -eslINFINITY;
212     else
213       mg->wrk[k] =  log(mg->q[k]) +
214 	esl_gev_logpdf(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
215 
216   return esl_vec_DLogSum(mg->wrk, mg->K);
217 }
218 
219 /* Function:  esl_mixgev_cdf()
220  *
221  * Purpose:   Returns the cumulative distribution function $P(X \leq x)$
222  *            for quantile <x>, given mixture GEV parameters <mg>.
223  */
224 double
esl_mixgev_cdf(double x,ESL_MIXGEV * mg)225 esl_mixgev_cdf(double x, ESL_MIXGEV *mg)
226 {
227   double cdf = 0.;
228   int    k;
229 
230   for (k = 0; k < mg->K; k++)
231     cdf += mg->q[k] * esl_gev_cdf(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
232   return cdf;
233 }
234 
235 /* Function:  esl_mixgev_logcdf()
236  *
237  * Purpose:   Returns the log of the CDF $\log P(X \leq x)$
238  *            for quantile <x>, given mixture GEV parameters <mg>.
239  */
240 double
esl_mixgev_logcdf(double x,ESL_MIXGEV * mg)241 esl_mixgev_logcdf(double x, ESL_MIXGEV *mg)
242 {
243   int k;
244 
245   for (k = 0; k < mg->K; k++)
246     if (mg->q[k] == 0.0)
247       mg->wrk[k] = -eslINFINITY;
248     else
249       mg->wrk[k] = log(mg->q[k]) +
250 	esl_gev_logcdf(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
251 
252   return esl_vec_DLogSum(mg->wrk, mg->K);
253 }
254 
255 /* Function:  esl_mixgev_surv()
256  *
257  * Purpose:   Returns the survivor function $P(X > x)$ (1-CDF)
258  *            for quantile <x>, given mixture GEV parameters <mg>.
259  */
260 double
esl_mixgev_surv(double x,ESL_MIXGEV * mg)261 esl_mixgev_surv(double x, ESL_MIXGEV *mg)
262 {
263   double srv = 0.;
264   int    k;
265 
266   for (k = 0; k < mg->K; k++)
267     srv += mg->q[k] * esl_gev_surv(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
268   return srv;
269 }
270 
271 /* Function:  esl_mixgev_logsurv()
272  *
273  * Purpose:   Returns the log survivor function $\log P(X > x)$ (log(1-CDF))
274  *            for quantile <x>, given mixture GEV parameters <mg>.
275  */
276 double
esl_mixgev_logsurv(double x,ESL_MIXGEV * mg)277 esl_mixgev_logsurv(double x, ESL_MIXGEV *mg)
278 {
279   int k;
280   for (k = 0; k < mg->K; k++)
281     {
282       mg->wrk[k] =  log(mg->q[k]);
283       mg->wrk[k] += esl_gev_logsurv(x, mg->mu[k], mg->lambda[k], mg->alpha[k]);
284     }
285   return esl_vec_DLogSum(mg->wrk, mg->K);
286 }
287 
288 /* Function:  esl_mixgev_invcdf()
289  *
290  * Purpose:   Calculates the inverse CDF for a mixture GEV <mg>,
291  *            returning the quantile <x> at which the CDF is <p>,
292  *            where $0 < p < 1$.
293  *
294  *            The inverse CDF of a mixture model has no analytical
295  *            expression as far as I'm aware. The calculation here is
296  *            a brute force bisection search in <x> using the CDF
297  *            function. It will suffice for a small number of calls
298  *            (for plotting applications, for example), but beware, it is not
299  *            efficient.
300  */
301 double
esl_mixgev_invcdf(double p,ESL_MIXGEV * mg)302 esl_mixgev_invcdf(double p, ESL_MIXGEV *mg)
303 {
304   double x1, x2, xm;		/* low, high guesses at x */
305   double f1, f2, fm;
306   double tol = 1e-6;
307 
308   x2 = esl_vec_DMin(mg->mu, mg->K);
309   x1 = x2 - 1.;
310   do {				/* bracket, left side */
311     x1 = x1 + 2.*(x2-x1);
312     f1 = esl_mixgev_cdf(x1, mg);
313   } while (f1 > p);
314   do {				/* bracket, right side */
315     x2 = x2 + 2.*(x2-x1);
316     f2 = esl_mixgev_cdf(x2, mg);
317   } while (f2 < p);
318 
319   do {				/* bisection */
320     xm = (x1+x2) / 2.;
321     fm = esl_mixgev_cdf(xm, mg);
322 
323     if      (fm > p) x2 = xm;
324     else if (fm < p) x1 = xm;
325     else return xm;		/* unlikely case of fm==p */
326   } while ( (x2-x1)/(x1+x2+1e-9) > tol);
327 
328   xm = (x1+x2) / 2.;
329   return xm;
330 }
331 /*-------------------- end densities & distributions ------------------------*/
332 
333 
334 
335 
336 /****************************************************************************
337  * 3. Generic API routines: for general interface w/ histogram module
338  ****************************************************************************/
339 
340 /* Function:  esl_mixgev_generic_pdf()
341  *
342  * Purpose:   Generic-API wrapper around <esl_mixgev_pdf()>, taking
343  *            a void ptr to a <ESL_MIXGEV> parameter structure.
344  */
345 double
esl_mixgev_generic_pdf(double x,void * params)346 esl_mixgev_generic_pdf(double x, void *params)
347 {
348   ESL_MIXGEV *mg = (ESL_MIXGEV *) params;
349   return esl_mixgev_pdf(x, mg);
350 }
351 
352 /* Function:  esl_mixgev_generic_cdf()
353  *
354  * Purpose:   Generic-API wrapper around <esl_mixgev_cdf()>, taking
355  *            a void ptr to a <ESL_MIXGEV> parameter structure.
356  */
357 double
esl_mixgev_generic_cdf(double x,void * params)358 esl_mixgev_generic_cdf(double x, void *params)
359 {
360   ESL_MIXGEV *mg = (ESL_MIXGEV *) params;
361   return esl_mixgev_cdf(x, mg);
362 }
363 
364 /* Function:  esl_mixgev_generic_surv()
365  *
366  * Purpose:   Generic-API wrapper around <esl_mixgev_surv()>, taking
367  *            a void ptr to a <ESL_MIXGEV> parameter structure.
368  */
369 double
esl_mixgev_generic_surv(double x,void * params)370 esl_mixgev_generic_surv(double x, void *params)
371 {
372   ESL_MIXGEV *mg = (ESL_MIXGEV *) params;
373   return esl_mixgev_surv(x, mg);
374 }
375 
376 /* Function:  esl_mixgev_generic_invcdf()
377  *
378  * Purpose:   Generic-API wrapper around <esl_mixgev_invcdf()>, taking
379  *            a void ptr to a <ESL_MIXGEV> parameter structure.
380  */
381 double
esl_mixgev_generic_invcdf(double p,void * params)382 esl_mixgev_generic_invcdf(double p, void *params)
383 {
384   ESL_MIXGEV *mg = (ESL_MIXGEV *) params;
385   return esl_mixgev_invcdf(p, mg);
386 }
387 /*------------------------ end generic API ---------------------------------*/
388 
389 
390 
391 
392 /****************************************************************************
393  * 4. Dumping plots to xmgrace XY format
394  ****************************************************************************/
395 
396 /* Function:  esl_mixgev_Plot()
397  *
398  * Purpose:   Plot some function <func> (for instance, <esl_mixgev_pdf()>)
399  *            for mixture GEV parameters <mg>, for a range of
400  *            quantiles x from <xmin> to <xmax> in steps of <xstep>;
401  *            output to an open stream <fp> in xmgrace XY input format.
402  *
403  * Returns:   <eslOK> on success.
404  *
405  * Throws:    <eslEWRITE> on any system write error.
406  */
407 int
esl_mixgev_Plot(FILE * fp,ESL_MIXGEV * mg,double (* func)(double x,ESL_MIXGEV * mg),double xmin,double xmax,double xstep)408 esl_mixgev_Plot(FILE *fp, ESL_MIXGEV *mg,
409 		double (*func)(double x, ESL_MIXGEV *mg),
410 		double xmin, double xmax, double xstep)
411 {
412   double x;
413   for (x = xmin; x <= xmax; x += xstep)
414     if (fprintf(fp, "%f\t%g\n", x, (*func)(x, mg)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "mixgev plot write failed");
415   if (fprintf(fp, "&\n")                           < 0) ESL_EXCEPTION_SYS(eslEWRITE, "mixgev plot write failed");
416   return eslOK;
417 }
418 /*-------------------- end plot dumping routines ---------------------------*/
419 
420 
421 
422 
423 
424 /****************************************************************************
425  * 5. Sampling
426  ****************************************************************************/
427 
428 /* Function:  esl_mixgev_Sample()
429  *
430  * Purpose:   Sample a random variate x from a mixture GEV <mg>,
431  *            given random number source <r>.
432  */
433 double
esl_mixgev_Sample(ESL_RANDOMNESS * r,ESL_MIXGEV * mg)434 esl_mixgev_Sample(ESL_RANDOMNESS *r, ESL_MIXGEV *mg)
435 {
436   int k;
437   k = esl_rnd_DChoose(r, mg->q, mg->K);
438   return esl_gev_Sample(r, mg->mu[k], mg->lambda[k], mg->alpha[k]);
439 }
440 
441 /*--------------------------- end sampling ---------------------------------*/
442 
443 
444 
445 
446 
447 
448 /****************************************************************************
449  * 6. ML fitting to complete data
450  ****************************************************************************/
451 
452 struct mixgev_data {
453   double *x;
454   int     n;
455   ESL_MIXGEV *mg;
456 };
457 
458 /* Given mixture GEV parameters in <mg>;
459  * do appropriate c.o.v.'s to unconstrained real parameters
460  * and fill in the packed parameter vector <p>.
461  *
462  * First K-1 are Q_1..Q_K-1 mixture coefficient parameters; Q_0 implicitly 0;
463  *  cov is q_k = e^{Q_k} / \sum_j e^{Q_j},
464  *  so     Q_k = log(q_k) - log(q_0).
465  * Then K components: mu, lambda, optional alpha;
466  * mu, alpha are already unconstrained real;
467  * lambda cov is lambda = e^w, w = log(lambda).
468  */
469 static void
mixgev_pack_paramvector(double * p,int np,ESL_MIXGEV * mg)470 mixgev_pack_paramvector(double *p, int np, ESL_MIXGEV *mg)
471 {
472   int    i;			/* counter in parameter vector p */
473   int    k;			/* counter in mixture components */
474   double z;			/* tmp variable */
475 
476   /* mixture coefficients */
477   z = log(mg->q[0]);
478   i = 0;
479   for (k = 1; k < mg->K; k++)
480     p[i++] = log(mg->q[k]) - z;
481 
482   /* gev parameters */
483   for (k = 0; k < mg->K; k++)
484     {
485       p[i++] = mg->mu[k];
486       p[i++] = log(mg->lambda[k]);
487       if (! mg->isgumbel[k]) p[i++] = mg->alpha[k];
488     }
489   /* assert(i==np) in debugging, if you want */
490 }
491 
492 /* Same as above but in reverse: given parameter vector <p>,
493  * do appropriate c.o.v. back to desired parameter space, and
494  * fill in the mixture GEV structure <mg>.
495  */
496 static void
mixgev_unpack_paramvector(double * p,int np,ESL_MIXGEV * mg)497 mixgev_unpack_paramvector(double *p, int np, ESL_MIXGEV *mg)
498 {
499   int    i;			/* counter in parameter vector p */
500   int    k;			/* counter in mixture components */
501   double z;			/* tmp variable  */
502 
503   /* Fetch the params in their c.o.v. space first
504    */
505   i = 0;
506   mg->q[0] = 0;	/* implicitly */
507   for (k = 1; k < mg->K; k++)
508     mg->q[k] = p[i++];
509   for (k = 0; k < mg->K; k++)
510     {
511       mg->mu[k]     = p[i++];
512       mg->lambda[k] = p[i++];
513       if (!mg->isgumbel[k]) mg->alpha[k]  = p[i++];
514       else                  mg->alpha[k]  = 0.;
515     }
516   assert(i==np);
517 
518   /* Convert mix coefficients back to probabilities;
519    * their  c.o.v. is q_k = e^{Q_k} / \sum_k e^{Q_k}
520    * which rearranges to exp(Q_k - log[\sum_k e^Q_k]),
521    * and we have the DLogSum() function to compute the log sum.
522    */
523   z = esl_vec_DLogSum(mg->q, mg->K);
524   for (k = 0; k < mg->K; k++)
525     mg->q[k] = exp(mg->q[k] - z);
526 
527   /* lambda c.o.v. is \lambda = e^w
528    */
529   for (k = 0; k < mg->K; k++)
530     mg->lambda[k] = exp(mg->lambda[k]);
531 }
532 
533 static double
mixgev_complete_func(double * p,int np,void * dptr)534 mixgev_complete_func(double *p, int np, void *dptr)
535 {
536   struct mixgev_data *data = (struct mixgev_data *) dptr;
537   ESL_MIXGEV         *mg   = data->mg;
538   int    i;
539   double logL;
540 
541   /* Use the current parameter vector (in its unconstrained
542    * real c.o.v. space) to deduce what the current mixture GEV
543    * parameters are:
544    */
545   mixgev_unpack_paramvector(p, np, mg);
546 
547   /* Calculate the log likelihood:
548    */
549   logL = 0;
550   for (i = 0; i < data->n; i++)
551     logL += esl_mixgev_logpdf(data->x[i], mg);
552 
553   /* return the NLL
554    */
555   return -logL;
556 }
557 
558 
559 /* Function:  esl_mixgev_FitGuess()
560  *
561  * Purpose:   Make initial randomized guesses at the parameters
562  *            of mixture GEV <mg>, using random number generator
563  *            <r> and observed data consisting of <n> values
564  *            <x[0..n-1]>. This guess is a suitable starting
565  *            point for a parameter optimization routine, such
566  *            as <esl_mixgev_FitComplete()>.
567  *
568  *            Specifically, we estimate one 'central' guess
569  *            for a single Gumbel fit to the data, using the
570  *            method of moments. Then we add $\pm 10\%$ to that 'central'
571  *            $\mu$ and $\lambda$ to get each component
572  *            $\mu_i$ and $\lambda_i$. The $\alpha_i$ parameters
573  *            are generated by sampling uniformly from $-0.1..0.1$.
574  *            Mixture coefficients $q_i$ are sampled uniformly.
575  *
576  * Args:      r   - randomness source
577  *            x   - vector of observed data values to fit, 0..n-1
578  *            n   - number of values in <x>
579  *            mg  - mixture GEV to put guessed params into
580  *
581  * Returns:   <eslOK> on success.
582  */
583 int
esl_mixgev_FitGuess(ESL_RANDOMNESS * r,double * x,int n,ESL_MIXGEV * mg)584 esl_mixgev_FitGuess(ESL_RANDOMNESS *r, double *x, int n, ESL_MIXGEV *mg)
585 {
586   double mean, variance;
587   double mu, lambda;
588   int    k;
589 
590   esl_stats_DMean(x, n, &mean, &variance);
591   lambda = eslCONST_PI / sqrt(6.*variance);
592   mu     = mean - 0.57722/lambda;
593 
594   esl_dirichlet_DSampleUniform(r, mg->K, mg->q);
595   for (k = 0; k < mg->K; k++)
596     {
597       mg->mu[k]     = mu     + 0.2 * mu     * (esl_random(r) - 0.5);
598       mg->lambda[k] = lambda + 0.2 * lambda * (esl_random(r) - 0.5);
599       if (mg->isgumbel[k]) mg->alpha[k] = 0.;
600       else mg->alpha[k] = 0.2 * (esl_random(r) - 0.5);
601     }
602   return eslOK;
603 }
604 
605 
606 /* Function:  esl_mixgev_FitComplete()
607  *
608  * Purpose:   Given <n> observed data values <x[0..n-1]>, and
609  *            an initial guess at a mixture GEV fit to those data
610  *            <mg>, use conjugate gradient descent to perform
611  *            a locally optimal maximum likelihood mixture
612  *            GEV parameter fit to the data.
613  *
614  *            To obtain a reasonable initial guess for <mg>,
615  *            see <esl_mixgev_FitGuess()>.
616  *
617  * Args:      x   - observed data, <x[0..n-1]>.
618  *            n   - number of samples in <x>
619  *            mg  - mixture GEV to estimate, w/ params set to
620  *                  an initial guess.
621  *
622  * Returns:   <eslOK> on success, and <mg> contains local
623  *            ML estimate for mixture GEV parameters.
624  *
625  * Throws:    <eslEMEM> on allocation error, and <mg> is unchanged
626  *            from its initial state.
627  */
628 int
esl_mixgev_FitComplete(double * x,int n,ESL_MIXGEV * mg)629 esl_mixgev_FitComplete(double *x, int n, ESL_MIXGEV *mg)
630 {
631   ESL_MIN_CFG *cfg = NULL;
632   struct mixgev_data data;
633   int     status;
634   double *p = NULL;
635   int     np;
636   double  fx;
637   int     k;
638   int     i;
639 
640   /* Determine number of free parameters and allocate  */
641   np = mg->K-1;			/* K-1 mix coefficients free */
642   for (k = 0; k < mg->K; k++)
643     np += (mg->isgumbel[k])? 2 : 3;
644   ESL_ALLOC(p,   sizeof(double) * np);
645 
646   /* Customize the minimizer */
647   cfg = esl_min_cfg_Create(np);
648   cfg->cg_rtol = 1e-6;
649   i = 0;
650   for (k = 1; k < mg->K; k++) cfg->u[i++] = 1.0;
651   for (k = 0; k < mg->K; k++)
652     {
653       cfg->u[i++] = 1.0;
654       cfg->u[i++] = 1.0;
655       if (! mg->isgumbel[k]) cfg->u[i++] = 0.02;
656     }
657   ESL_DASSERT1( (np == i) );
658 
659 
660   /* Copy shared info into the "data" structure
661    */
662   data.x   = x;
663   data.n   = n;
664   data.mg  = mg;
665 
666   /* From mg, create the parameter vector.
667    */
668   mixgev_pack_paramvector(p, np, mg);
669 
670   /* Feed it all to the mighty optimizer. */
671   status = esl_min_ConjugateGradientDescent(cfg, p, np, &mixgev_complete_func, NULL,
672 					    (void *) (&data), &fx, NULL);
673   if (status != eslOK) goto ERROR;
674 
675   /* Convert the final parameter vector back to a mixture GEV
676    */
677   mixgev_unpack_paramvector(p, np, mg);
678 
679   free(p);
680   esl_min_cfg_Destroy(cfg);
681   return eslOK;
682 
683  ERROR:
684   free(p);
685   esl_min_cfg_Destroy(cfg);
686   return status;
687 }
688 /*--------------------------- end fitting ----------------------------------*/
689 
690 
691 
692 
693 /****************************************************************************
694  * Example main()
695  ****************************************************************************/
696 
697 #ifdef eslMIXGEV_EXAMPLE
698 /*::cexcerpt::mixgev_example::begin::*/
699 /* compile:
700    gcc -g -Wall -I. -L. -o example -DeslMIXGEV_EXAMPLE esl_mixgev.c -leasel -lm
701  * run:     ./example
702  */
703 #include <stdio.h>
704 #include <stdlib.h>
705 #include "easel.h"
706 #include "esl_mixgev.h"
707 #include "esl_random.h"
708 
709 int
main(int argc,char ** argv)710 main(int argc, char **argv)
711 {
712   FILE *fp;
713   ESL_RANDOMNESS *r;		/* source of random numbers   */
714   ESL_MIXGEV *mg;		/* mixture GEV to sample from */
715   ESL_MIXGEV *emg;		/* estimated mixture GEV      */
716   double     *x;		/* sampled dataset            */
717   int         n = 100000;	/* number of samples          */
718   int         i;
719   int         k;
720   double      nll;
721   double      min, max;
722 
723   r  = esl_randomness_Create(42);
724   mg = esl_mixgev_Create(2);
725   mg->q[0]      = 0.85;   mg->q[1]      = 0.15;
726   mg->mu[0]     = -2.72;  mg->mu[1]     = -2.0;
727   mg->lambda[0] = 2.5;    mg->lambda[1] = 1.0;
728   mg->alpha[0]  = 0.;     mg->alpha[1]  = 0.09;
729 
730   nll = 0.;
731   min = 99999;
732   max = -99999;
733 
734   x = malloc(sizeof(double) * n);
735   for (i = 0; i < n; i++)
736     {
737       x[i] = esl_mixgev_Sample(r, mg);
738       nll -= esl_mixgev_logpdf(x[i], mg);
739       if (x[i] > max) max = x[i];
740       if (x[i] < min) min = x[i];
741     }
742   printf("NLL of known mixGEV: %g\n", nll);
743 
744   /* Dump the raw data samples to an R file.
745    */
746   fp = fopen("data.out", "w");
747   fprintf(fp, "     val\n");
748   for (i = 0; i < n; i++)
749     fprintf(fp, "%d   %f\n", i+1, x[i]);
750   fclose(fp);
751 
752   emg = esl_mixgev_Create(2);
753   esl_mixgev_FitGuess(r, x, n, emg);
754   /*  esl_mixgev_Copy(mg, emg); */
755   esl_mixgev_ForceGumbel(emg, 0);
756   esl_mixgev_FitComplete(x, n, emg);
757 
758   printf("Component   q      mu   lambda  alpha\n");
759   for (k=0; k < 2; k++)
760     printf("%d\t%7.4f\t%7.2f\t%7.4f\t%7.4f\n",
761 	   k, emg->q[k], emg->mu[k], emg->lambda[k], emg->alpha[k]);
762 
763   nll = 0.;
764   for (i = 0; i < n; i++)
765     nll -= esl_mixgev_logpdf(x[i], emg);
766   printf("NLL of fitted mixGEV: %g\n", nll);
767 
768   /* Dump some R commands for showing these distributions
769    */
770   printf("library(ismev)\n");
771   printf("library(evd)\n");
772 
773   printf("d <- read.table(\"data.out\")$val\n");
774   printf("plot(density(d,bw=0.2), log=\"y\")\n");
775   printf("min <- %f\n", min);
776   printf("max <- %f\n", max);
777   printf("xax <- seq(min-2, max+5, by=0.1)\n");
778   printf("cc <- xax - xax\n");
779   printf("zc <- xax - xax\n");
780   for (k = 0; k < mg->K; k++)
781     {
782       printf("c%d  <- %f * dgev(xax, %f, %f, %f)\n",
783 	     k, mg->q[k], mg->mu[k], 1./mg->lambda[k], mg->alpha[k]);
784       printf("cc   <- cc + c%d\n", k);
785       printf("lines(xax, c%d, col=\"blue\")\n", k);
786     }
787   for (k = 0; k < emg->K; k++)
788     {
789       printf("z%d  <- %f * dgev(xax, %f, %f, %f)\n",
790 	     k, emg->q[k], emg->mu[k], 1./emg->lambda[k], emg->alpha[k]);
791       printf("zc   <- zc + z%d\n", k);
792       printf("lines(xax, z%d, col=\"blue\")\n", k);
793     }
794   printf("lines(xax, cc, col=\"green\")\n");
795   printf("lines(xax, zc, col=\"red\")\n");
796 
797   esl_mixgev_Destroy(mg);
798   esl_mixgev_Destroy(emg);
799   esl_randomness_Destroy(r);
800   free(x);
801   return 0;
802 }
803 /*::cexcerpt::mixgev_example::end::*/
804 #endif /*eslMIXGEV_EXAMPLE*/
805 
806