1 /* Dirichlet and Beta densities.
2  *
3  * Contents:
4  *   1. Dirichlet likelihood functions
5  *   2. Sampling from Dirichlets
6  *   3. Unit tests
7  *   4. Test driver
8  *   5. Example
9  *
10  * See also:
11  *   esl_mixdchlet : mixture Dirichlets
12  *   esl_gamma:      Gamma densities
13  */
14 #include "esl_config.h"
15 
16 #include <assert.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <math.h>
20 
21 #include "easel.h"
22 #include "esl_fileparser.h"
23 #include "esl_minimizer.h"
24 #include "esl_random.h"
25 #include "esl_stats.h"
26 #include "esl_vectorops.h"
27 
28 #include "esl_dirichlet.h"
29 
30 
31 
32 /*****************************************************************
33  *# 1. Dirichlet likelihood functions
34  *****************************************************************/
35 
36 /* Function:  esl_dirichlet_logpdf()
37  *
38  * Purpose:   Given Dirichlet parameter vector <alpha> and a probability
39  *            vector <p>, both of cardinality <K>; return
40  *            $\log P(p \mid alpha)$.
41  *
42  * Returns:   $\log P(p \mid alpha)$.
43  *
44  * Xref:      Sjolander (1996) appendix, lemma 2.
45  */
46 double
esl_dirichlet_logpdf(double * p,double * alpha,int K)47 esl_dirichlet_logpdf(double *p, double *alpha, int K)
48 {
49   double sum;		        /* for Gammln(|alpha|) in Z     */
50   double logp;			/* RETURN: log P(p|alpha)       */
51   double val;
52   int    a;
53 
54   sum = logp = 0.0;
55   for (a = 0; a < K; a++)
56     if (p[a] > 0.0)		/* any param that is == 0.0 doesn't exist */
57       {
58 	esl_stats_LogGamma(alpha[a], &val);
59 	logp -= val;
60 	logp += (alpha[a]-1.0) * log(p[a]);
61 	sum  += alpha[a];
62       }
63   esl_stats_LogGamma(sum, &val); // esl_stats_LogGamma() can only fail for x < 0; here sum > 0
64   logp += val;
65   return logp;
66 }
67 
68 
69 /* Function:  esl_dirichlet_logpdf_c()
70  *
71  * Purpose:   Given an observed count vector $c[0..K-1]$,
72  *            and a simple Dirichlet density parameterized by
73  *            $\alpha[0..K-1]$;
74  *            return $\log P(c \mid \alpha)$.
75  *
76  *            This is $\int P(c \mid p) P(p \mid \alpha) dp$,
77  *            an integral that can be solved analytically.
78  *
79  * Args:      c          - count vector, [0..K-1]
80  *            alpha      - Dirichlet parameters, [0..K-1]
81  *            K          - size of c, alpha vectors
82  *
83  * Returns:   <eslOK> on success, and puts result $\log P(c \mid \alpha)$
84  *            in <ret_answer>.
85  */
86 double
esl_dirichlet_logpdf_c(double * c,double * alpha,int K)87 esl_dirichlet_logpdf_c(double *c, double *alpha, int K)
88 {
89   double logp;
90   double sum1, sum2, sum3;
91   double a1, a2, a3;
92   int    a;
93 
94   sum1 = sum2 = sum3 = logp = 0.0;
95   for (a = 0; a < K; a++)
96     {
97       sum1 += c[a] + alpha[a];
98       sum2 += alpha[a];
99       sum3 += c[a];
100       esl_stats_LogGamma(alpha[a] + c[a], &a1);
101       esl_stats_LogGamma(c[a] + 1.,       &a2);
102       esl_stats_LogGamma(alpha[a],        &a3);
103       logp  += a1 - a2 - a3;
104     }
105   esl_stats_LogGamma(sum1,      &a1);
106   esl_stats_LogGamma(sum2,      &a2);
107   esl_stats_LogGamma(sum3 + 1., &a3);
108   logp += a2 + a3 - a1;
109 
110   return logp;
111 }
112 /*----------- end, Dirichlet likelihood functions ---------------*/
113 
114 
115 
116 /*****************************************************************
117  *# 2. Sampling from Dirichlets
118  *****************************************************************/
119 
120 /* Function:  esl_dirichlet_DSample()
121  *
122  * Purpose:   Given a Dirichlet density parameterized by $\alpha[0..K-1]$,
123  *            sample a probability vector $p[0..K-1]$ from
124  *            $P(p \mid \alpha)$.
125  *
126  * Args:      r      - random number generation object
127  *            alpha  - parameters of Dirichlet density [0..K-1]
128  *            K      - vector size
129  *            p      - RETURN: sampled probability vector
130  *                     (caller allocates 0..K-1).
131  *
132  * Returns:   <eslOK>, and <p> will contain the sampled vector.
133  */
134 int
esl_dirichlet_DSample(ESL_RANDOMNESS * r,double * alpha,int K,double * p)135 esl_dirichlet_DSample(ESL_RANDOMNESS *r, double *alpha, int K, double *p)
136 {
137   int x;
138 
139   for (x = 0; x < K; x++)
140     p[x] = esl_rnd_Gamma(r, alpha[x]);
141   esl_vec_DNorm(p, K);
142   return eslOK;
143 }
144 
145 /* Function:  esl_dirichlet_FSample()
146  *
147  * Purpose:   Same as <esl_dirichlet_DSample()>, except it
148  *            works in single-precision floats, not doubles.
149  */
150 int
esl_dirichlet_FSample(ESL_RANDOMNESS * r,float * alpha,int K,float * p)151 esl_dirichlet_FSample(ESL_RANDOMNESS *r, float *alpha, int K, float *p)
152 {
153   int x;
154 
155   for (x = 0; x < K; x++)
156     p[x] = (float) esl_rnd_Gamma(r, (double) alpha[x]);
157   esl_vec_FNorm(p, K);
158   return eslOK;
159 }
160 
161 /* Function:  esl_dirichlet_DSampleUniform()
162  *
163  * Purpose:   Sample a probability vector $p[0..K-1]$ uniformly, by
164  *            sampling from a Dirichlet of $\alpha_i = 1.0 \forall i$.
165  *
166  * Args:      r  - source of random numbers
167  *            K  - vector size
168  *            p  - RETURN: sampled prob vector, caller alloc'ed 0..K-1
169  *
170  * Returns:   <eslOK>, and <p> will contain the sampled vector.
171  *
172  * Throws:    (no abnormal error conditions)
173  */
174 int
esl_dirichlet_DSampleUniform(ESL_RANDOMNESS * r,int K,double * p)175 esl_dirichlet_DSampleUniform(ESL_RANDOMNESS *r, int K, double *p)
176 {
177   int x;
178   for (x = 0; x < K; x++)
179     p[x] = esl_rnd_Gamma(r, 1.0);
180   esl_vec_DNorm(p, K);
181   return eslOK;
182 }
183 
184 /* Function:  esl_dirichlet_FSampleUniform()
185  *
186  * Purpose:   Same as <esl_dirichlet_DSampleUniform()>, except it
187  *            works in single-precision floats, not doubles.
188  */
189 int
esl_dirichlet_FSampleUniform(ESL_RANDOMNESS * r,int K,float * p)190 esl_dirichlet_FSampleUniform(ESL_RANDOMNESS *r, int K, float *p)
191 {
192   int x;
193   for (x = 0; x < K; x++)
194     p[x] = (float) esl_rnd_Gamma(r, 1.0);
195   esl_vec_FNorm(p, K);
196   return eslOK;
197 }
198 
199 
200 /* Function:  esl_dirichlet_SampleBeta()
201  *
202  * Purpose:   Samples from a Beta(theta1, theta2) density, leaves answer
203  *            in <ret_answer>. (Special case of sampling Dirichlet.)
204  *
205  * Returns:   <eslOK>.
206  */
207 int
esl_dirichlet_SampleBeta(ESL_RANDOMNESS * r,double theta1,double theta2,double * ret_answer)208 esl_dirichlet_SampleBeta(ESL_RANDOMNESS *r, double theta1, double theta2, double *ret_answer)
209 {
210   double p, q;
211 
212   p = esl_rnd_Gamma(r, theta1);
213   q = esl_rnd_Gamma(r, theta2);
214   *ret_answer = p / (p+q);
215   return eslOK;
216 }
217 /*-------------- end, sampling from Dirichlets -------------------*/
218 
219 
220 /*****************************************************************
221  * 3. Unit tests
222  *****************************************************************/
223 #ifdef eslDIRICHLET_TESTDRIVE
224 
225 /* utest_uniformity()
226  * Tests that probability vectors sampled from a uniform Dirichlet
227  * density indeed have uniform probabilities.
228  */
229 static void
utest_uniformity(ESL_RANDOMNESS * rng)230 utest_uniformity(ESL_RANDOMNESS *rng)
231 {
232   char   msg[]    = "esl_dirichlet unifomity test failed";
233   int    K        = 4;
234   int    N        = 100;    // number of counts to collect
235   double tol      = 1e-6;
236   double alpha[K], p[K], c[K];
237   double logp,   prv_logp;
238   double logpc,  prv_logpc;
239   int    i,j;
240 
241   esl_vec_DSet(alpha, K, 1.0);  // uniform density P(p | alpha)
242 
243   for (i = 0; i < 20; i++)
244     {
245       esl_dirichlet_DSample(rng, alpha, 4, p);
246 
247       esl_vec_DSet(c, K, 0.);
248       for (j = 0; j < N; j++)
249 	c[ esl_rnd_DChoose(rng, p, K) ] += 1.;
250 
251       logp = esl_dirichlet_logpdf(p, alpha, 4);
252       if (! isfinite(logp))                                    esl_fatal(msg);  // PDF is a density; can't test for <= 1.0
253       if (i > 0 && esl_DCompare(logp, prv_logp, tol) != eslOK) esl_fatal(msg);
254 
255       logpc = esl_dirichlet_logpdf_c(c, alpha, K);
256       if (! isfinite(logpc))                                     esl_fatal(msg);
257       if (i > 0 && esl_DCompare(logpc, prv_logpc, tol) != eslOK) esl_fatal(msg);
258 
259       prv_logp  = logp;
260       prv_logpc = logpc;
261     }
262 }
263 #endif // eslDIRICHLET_TESTDRIVE
264 
265 /*****************************************************************
266  * 4. Test driver
267  *****************************************************************/
268 #ifdef eslDIRICHLET_TESTDRIVE
269 
270 #include "easel.h"
271 #include "esl_getopts.h"
272 #include "esl_random.h"
273 
274 static ESL_OPTIONS options[] = {
275   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
276   { "-h",        eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
277   { "-s",        eslARG_INT,      "0",  NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                    0 },
278   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
279 };
280 static char usage[]  = "[-options]";
281 static char banner[] = "test driver for dirichlet module";
282 
283 int
main(int argc,char ** argv)284 main(int argc, char **argv)
285 {
286   ESL_GETOPTS    *go  = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
287   ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
288 
289   fprintf(stderr, "## %s\n", argv[0]);
290   fprintf(stderr, "#  rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));
291 
292   utest_uniformity(rng);
293 
294   fprintf(stderr, "#  status = ok\n");
295   esl_randomness_Destroy(rng);
296   esl_getopts_Destroy(go);
297   return 0;
298 }
299 
300 #endif // eslDIRICHLET_TESTDRIVE
301 
302 
303 
304 /*****************************************************************
305  * 5. Example
306  *****************************************************************/
307 #ifdef eslDIRICHLET_EXAMPLE
308 
309 #include "easel.h"
310 #include "esl_getopts.h"
311 #include "esl_random.h"
312 #include "esl_vectorops.h"
313 #include "esl_dirichlet.h"
314 
315 static ESL_OPTIONS options[] = {
316   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
317   { "-h",        eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",             0 },
318   { "-s",        eslARG_INT,      "0",  NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                    0 },
319   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
320 };
321 static char usage[]  = "[-options]";
322 static char banner[] = "examples of using dirichlet module";
323 
324 int
main(int argc,char ** argv)325 main(int argc, char **argv)
326 {
327   ESL_GETOPTS    *go    = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
328   ESL_RANDOMNESS *rng   = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
329   int             K     = 4;
330   double        *alpha1 = malloc(sizeof(double) * K);
331   double        *alpha2 = malloc(sizeof(double) * K);
332   double        *p      = malloc(sizeof(double) * K);
333   double        *c      = malloc(sizeof(double) * K);
334   int            sumc   = 1000;
335   int            N      = 1000;
336   double         logp1, logp2;
337   int            j,a;
338 
339   esl_vec_DSet(alpha1, K, 1.0);   // alpha1 is a flat prior
340   esl_vec_DSet(alpha2, K, 10.0);  // alpha2 is a peaked prior, with modes p_i = 1/K
341 
342   /* Sample probability & count vector from Dirichlet 1.
343    * Calculate its log probability under both.
344    * Output p vector, c vector, and the two log P's.
345    * Since Dirichlet 1 is a uniform distribution, first logp is always the same.
346    */
347   while (N--)
348     {
349       esl_dirichlet_DSample(rng, alpha1, K, p);
350       esl_vec_DSet(c, K, 0.);
351       for (j = 0; j < sumc; j++) c[esl_rnd_DChoose(rng, p, K)] += 1.;
352 
353       logp1 = esl_dirichlet_logpdf_c(c, alpha1, K);
354       logp2 = esl_dirichlet_logpdf_c(c, alpha2, K);
355 
356       printf("[ ");
357       for (a = 0; a < K; a++) printf("%6.4f ", p[a]);
358       printf("]   [ ");
359       for (a = 0; a < K; a++) printf("%6.0f ", c[a]);
360       printf("]   %10.4g %10.4g\n", logp1, logp2);
361     }
362 
363 
364   free(alpha1);
365   free(alpha2);
366   free(p);
367   free(c);
368   esl_randomness_Destroy(rng);
369   esl_getopts_Destroy(go);
370   return 0;
371 }
372 #endif //eslDIRICHLET_EXAMPLE
373