1 /* Dirichlet and Beta densities.
2 *
3 * Contents:
4 * 1. Dirichlet likelihood functions
5 * 2. Sampling from Dirichlets
6 * 3. Unit tests
7 * 4. Test driver
8 * 5. Example
9 *
10 * See also:
11 * esl_mixdchlet : mixture Dirichlets
12 * esl_gamma: Gamma densities
13 */
14 #include "esl_config.h"
15
16 #include <assert.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <math.h>
20
21 #include "easel.h"
22 #include "esl_fileparser.h"
23 #include "esl_minimizer.h"
24 #include "esl_random.h"
25 #include "esl_stats.h"
26 #include "esl_vectorops.h"
27
28 #include "esl_dirichlet.h"
29
30
31
32 /*****************************************************************
33 *# 1. Dirichlet likelihood functions
34 *****************************************************************/
35
36 /* Function: esl_dirichlet_logpdf()
37 *
38 * Purpose: Given Dirichlet parameter vector <alpha> and a probability
39 * vector <p>, both of cardinality <K>; return
40 * $\log P(p \mid alpha)$.
41 *
42 * Returns: $\log P(p \mid alpha)$.
43 *
44 * Xref: Sjolander (1996) appendix, lemma 2.
45 */
46 double
esl_dirichlet_logpdf(double * p,double * alpha,int K)47 esl_dirichlet_logpdf(double *p, double *alpha, int K)
48 {
49 double sum; /* for Gammln(|alpha|) in Z */
50 double logp; /* RETURN: log P(p|alpha) */
51 double val;
52 int a;
53
54 sum = logp = 0.0;
55 for (a = 0; a < K; a++)
56 if (p[a] > 0.0) /* any param that is == 0.0 doesn't exist */
57 {
58 esl_stats_LogGamma(alpha[a], &val);
59 logp -= val;
60 logp += (alpha[a]-1.0) * log(p[a]);
61 sum += alpha[a];
62 }
63 esl_stats_LogGamma(sum, &val); // esl_stats_LogGamma() can only fail for x < 0; here sum > 0
64 logp += val;
65 return logp;
66 }
67
68
69 /* Function: esl_dirichlet_logpdf_c()
70 *
71 * Purpose: Given an observed count vector $c[0..K-1]$,
72 * and a simple Dirichlet density parameterized by
73 * $\alpha[0..K-1]$;
74 * return $\log P(c \mid \alpha)$.
75 *
76 * This is $\int P(c \mid p) P(p \mid \alpha) dp$,
77 * an integral that can be solved analytically.
78 *
79 * Args: c - count vector, [0..K-1]
80 * alpha - Dirichlet parameters, [0..K-1]
81 * K - size of c, alpha vectors
82 *
83 * Returns: <eslOK> on success, and puts result $\log P(c \mid \alpha)$
84 * in <ret_answer>.
85 */
86 double
esl_dirichlet_logpdf_c(double * c,double * alpha,int K)87 esl_dirichlet_logpdf_c(double *c, double *alpha, int K)
88 {
89 double logp;
90 double sum1, sum2, sum3;
91 double a1, a2, a3;
92 int a;
93
94 sum1 = sum2 = sum3 = logp = 0.0;
95 for (a = 0; a < K; a++)
96 {
97 sum1 += c[a] + alpha[a];
98 sum2 += alpha[a];
99 sum3 += c[a];
100 esl_stats_LogGamma(alpha[a] + c[a], &a1);
101 esl_stats_LogGamma(c[a] + 1., &a2);
102 esl_stats_LogGamma(alpha[a], &a3);
103 logp += a1 - a2 - a3;
104 }
105 esl_stats_LogGamma(sum1, &a1);
106 esl_stats_LogGamma(sum2, &a2);
107 esl_stats_LogGamma(sum3 + 1., &a3);
108 logp += a2 + a3 - a1;
109
110 return logp;
111 }
112 /*----------- end, Dirichlet likelihood functions ---------------*/
113
114
115
116 /*****************************************************************
117 *# 2. Sampling from Dirichlets
118 *****************************************************************/
119
120 /* Function: esl_dirichlet_DSample()
121 *
122 * Purpose: Given a Dirichlet density parameterized by $\alpha[0..K-1]$,
123 * sample a probability vector $p[0..K-1]$ from
124 * $P(p \mid \alpha)$.
125 *
126 * Args: r - random number generation object
127 * alpha - parameters of Dirichlet density [0..K-1]
128 * K - vector size
129 * p - RETURN: sampled probability vector
130 * (caller allocates 0..K-1).
131 *
132 * Returns: <eslOK>, and <p> will contain the sampled vector.
133 */
134 int
esl_dirichlet_DSample(ESL_RANDOMNESS * r,double * alpha,int K,double * p)135 esl_dirichlet_DSample(ESL_RANDOMNESS *r, double *alpha, int K, double *p)
136 {
137 int x;
138
139 for (x = 0; x < K; x++)
140 p[x] = esl_rnd_Gamma(r, alpha[x]);
141 esl_vec_DNorm(p, K);
142 return eslOK;
143 }
144
145 /* Function: esl_dirichlet_FSample()
146 *
147 * Purpose: Same as <esl_dirichlet_DSample()>, except it
148 * works in single-precision floats, not doubles.
149 */
150 int
esl_dirichlet_FSample(ESL_RANDOMNESS * r,float * alpha,int K,float * p)151 esl_dirichlet_FSample(ESL_RANDOMNESS *r, float *alpha, int K, float *p)
152 {
153 int x;
154
155 for (x = 0; x < K; x++)
156 p[x] = (float) esl_rnd_Gamma(r, (double) alpha[x]);
157 esl_vec_FNorm(p, K);
158 return eslOK;
159 }
160
161 /* Function: esl_dirichlet_DSampleUniform()
162 *
163 * Purpose: Sample a probability vector $p[0..K-1]$ uniformly, by
164 * sampling from a Dirichlet of $\alpha_i = 1.0 \forall i$.
165 *
166 * Args: r - source of random numbers
167 * K - vector size
168 * p - RETURN: sampled prob vector, caller alloc'ed 0..K-1
169 *
170 * Returns: <eslOK>, and <p> will contain the sampled vector.
171 *
172 * Throws: (no abnormal error conditions)
173 */
174 int
esl_dirichlet_DSampleUniform(ESL_RANDOMNESS * r,int K,double * p)175 esl_dirichlet_DSampleUniform(ESL_RANDOMNESS *r, int K, double *p)
176 {
177 int x;
178 for (x = 0; x < K; x++)
179 p[x] = esl_rnd_Gamma(r, 1.0);
180 esl_vec_DNorm(p, K);
181 return eslOK;
182 }
183
184 /* Function: esl_dirichlet_FSampleUniform()
185 *
186 * Purpose: Same as <esl_dirichlet_DSampleUniform()>, except it
187 * works in single-precision floats, not doubles.
188 */
189 int
esl_dirichlet_FSampleUniform(ESL_RANDOMNESS * r,int K,float * p)190 esl_dirichlet_FSampleUniform(ESL_RANDOMNESS *r, int K, float *p)
191 {
192 int x;
193 for (x = 0; x < K; x++)
194 p[x] = (float) esl_rnd_Gamma(r, 1.0);
195 esl_vec_FNorm(p, K);
196 return eslOK;
197 }
198
199
200 /* Function: esl_dirichlet_SampleBeta()
201 *
202 * Purpose: Samples from a Beta(theta1, theta2) density, leaves answer
203 * in <ret_answer>. (Special case of sampling Dirichlet.)
204 *
205 * Returns: <eslOK>.
206 */
207 int
esl_dirichlet_SampleBeta(ESL_RANDOMNESS * r,double theta1,double theta2,double * ret_answer)208 esl_dirichlet_SampleBeta(ESL_RANDOMNESS *r, double theta1, double theta2, double *ret_answer)
209 {
210 double p, q;
211
212 p = esl_rnd_Gamma(r, theta1);
213 q = esl_rnd_Gamma(r, theta2);
214 *ret_answer = p / (p+q);
215 return eslOK;
216 }
217 /*-------------- end, sampling from Dirichlets -------------------*/
218
219
220 /*****************************************************************
221 * 3. Unit tests
222 *****************************************************************/
223 #ifdef eslDIRICHLET_TESTDRIVE
224
225 /* utest_uniformity()
226 * Tests that probability vectors sampled from a uniform Dirichlet
227 * density indeed have uniform probabilities.
228 */
229 static void
utest_uniformity(ESL_RANDOMNESS * rng)230 utest_uniformity(ESL_RANDOMNESS *rng)
231 {
232 char msg[] = "esl_dirichlet unifomity test failed";
233 int K = 4;
234 int N = 100; // number of counts to collect
235 double tol = 1e-6;
236 double alpha[K], p[K], c[K];
237 double logp, prv_logp;
238 double logpc, prv_logpc;
239 int i,j;
240
241 esl_vec_DSet(alpha, K, 1.0); // uniform density P(p | alpha)
242
243 for (i = 0; i < 20; i++)
244 {
245 esl_dirichlet_DSample(rng, alpha, 4, p);
246
247 esl_vec_DSet(c, K, 0.);
248 for (j = 0; j < N; j++)
249 c[ esl_rnd_DChoose(rng, p, K) ] += 1.;
250
251 logp = esl_dirichlet_logpdf(p, alpha, 4);
252 if (! isfinite(logp)) esl_fatal(msg); // PDF is a density; can't test for <= 1.0
253 if (i > 0 && esl_DCompare(logp, prv_logp, tol) != eslOK) esl_fatal(msg);
254
255 logpc = esl_dirichlet_logpdf_c(c, alpha, K);
256 if (! isfinite(logpc)) esl_fatal(msg);
257 if (i > 0 && esl_DCompare(logpc, prv_logpc, tol) != eslOK) esl_fatal(msg);
258
259 prv_logp = logp;
260 prv_logpc = logpc;
261 }
262 }
263 #endif // eslDIRICHLET_TESTDRIVE
264
265 /*****************************************************************
266 * 4. Test driver
267 *****************************************************************/
268 #ifdef eslDIRICHLET_TESTDRIVE
269
270 #include "easel.h"
271 #include "esl_getopts.h"
272 #include "esl_random.h"
273
274 static ESL_OPTIONS options[] = {
275 /* name type default env range toggles reqs incomp help docgroup*/
276 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
277 { "-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
278 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
279 };
280 static char usage[] = "[-options]";
281 static char banner[] = "test driver for dirichlet module";
282
283 int
main(int argc,char ** argv)284 main(int argc, char **argv)
285 {
286 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
287 ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
288
289 fprintf(stderr, "## %s\n", argv[0]);
290 fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng));
291
292 utest_uniformity(rng);
293
294 fprintf(stderr, "# status = ok\n");
295 esl_randomness_Destroy(rng);
296 esl_getopts_Destroy(go);
297 return 0;
298 }
299
300 #endif // eslDIRICHLET_TESTDRIVE
301
302
303
304 /*****************************************************************
305 * 5. Example
306 *****************************************************************/
307 #ifdef eslDIRICHLET_EXAMPLE
308
309 #include "easel.h"
310 #include "esl_getopts.h"
311 #include "esl_random.h"
312 #include "esl_vectorops.h"
313 #include "esl_dirichlet.h"
314
315 static ESL_OPTIONS options[] = {
316 /* name type default env range toggles reqs incomp help docgroup*/
317 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
318 { "-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to <n>", 0 },
319 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
320 };
321 static char usage[] = "[-options]";
322 static char banner[] = "examples of using dirichlet module";
323
324 int
main(int argc,char ** argv)325 main(int argc, char **argv)
326 {
327 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
328 ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s"));
329 int K = 4;
330 double *alpha1 = malloc(sizeof(double) * K);
331 double *alpha2 = malloc(sizeof(double) * K);
332 double *p = malloc(sizeof(double) * K);
333 double *c = malloc(sizeof(double) * K);
334 int sumc = 1000;
335 int N = 1000;
336 double logp1, logp2;
337 int j,a;
338
339 esl_vec_DSet(alpha1, K, 1.0); // alpha1 is a flat prior
340 esl_vec_DSet(alpha2, K, 10.0); // alpha2 is a peaked prior, with modes p_i = 1/K
341
342 /* Sample probability & count vector from Dirichlet 1.
343 * Calculate its log probability under both.
344 * Output p vector, c vector, and the two log P's.
345 * Since Dirichlet 1 is a uniform distribution, first logp is always the same.
346 */
347 while (N--)
348 {
349 esl_dirichlet_DSample(rng, alpha1, K, p);
350 esl_vec_DSet(c, K, 0.);
351 for (j = 0; j < sumc; j++) c[esl_rnd_DChoose(rng, p, K)] += 1.;
352
353 logp1 = esl_dirichlet_logpdf_c(c, alpha1, K);
354 logp2 = esl_dirichlet_logpdf_c(c, alpha2, K);
355
356 printf("[ ");
357 for (a = 0; a < K; a++) printf("%6.4f ", p[a]);
358 printf("] [ ");
359 for (a = 0; a < K; a++) printf("%6.0f ", c[a]);
360 printf("] %10.4g %10.4g\n", logp1, logp2);
361 }
362
363
364 free(alpha1);
365 free(alpha2);
366 free(p);
367 free(c);
368 esl_randomness_Destroy(rng);
369 esl_getopts_Destroy(go);
370 return 0;
371 }
372 #endif //eslDIRICHLET_EXAMPLE
373