1 /* Statistical routines for normal (Gaussian) distributions.
2 *
3 * Contents:
4 * 1. Densities and distributions
5 * 2. Generic API, interface to histogram module
6 * 3. Unit tests
7 * 4. Test driver
8 * 5. Example
9 *
10 * To-do:
11 * - incomplete API, by the standards of other Easel stats modules.
12 * Compare esl_gumbel, for example.
13 *
14 *****************************************************************
15 * Crib notes.
16 *
17 * The error function is defined as: erf(x) = 2/sqrt(pi) \int_0^x e^{-t^2} dt
18 * The complementary error function is: erfc(x) = 1 - erf(x)
19 * The normal CDF in terms of erf: CDF(z) = 1/2 + 1/2 erf(z/sqrt(2))
20 * erf(x) is an "odd function": erf(x) = -erf(-x)
21 *
22 * lim_{x -> -inf} erf(x) = -1; erf(0) = 0; lim_{x -> +inf} erf(x) = 1
23 * lim_{x -> -inf} erfc(x) = 2 erfc(0) = 1; lim_{x -> +inf} erfc(x) = 0;
24 *
25 * erf(), erfc() in double precision are in the C99 standard. Some
26 * systems (cough, Microsoft, cough) are not necessarily C99 compliant
27 * and may not provide erf(), erfc(). But Easel will compile in an
28 * alternative, esl_stats_erfc(), if needed.
29 */
30 #include "esl_config.h"
31
32 #include <math.h>
33
34 #include "easel.h"
35 #include "esl_normal.h"
36 #include "esl_stats.h"
37
38
39 /*****************************************************************
40 * 1. Densities and distributions.
41 *****************************************************************/
42
43 /* Function: esl_normal_pdf()
44 * Incept: SRE, Tue Nov 21 14:15:43 2006 [Janelia]
45 *
46 * Purpose: Calculates the normal (Gaussian) probability density
47 * function $P(X=x)$ for a normal distribution, given
48 * value <x>, mean <mu>, and standard deviation <sigma>.
49 *
50 * Xref: STL11/94.
51 */
52 double
esl_normal_pdf(double x,double mu,double sigma)53 esl_normal_pdf(double x, double mu, double sigma)
54 {
55 double z;
56
57 z = (x - mu) / sigma;
58 return exp(-z*z*0.5) / (sigma * sqrt(2. * eslCONST_PI));
59 }
60
61 /* Function: esl_normal_logpdf()
62 * Incept: SRE, Tue Jan 9 20:43:52 2007 [Casa de Gatos]
63 *
64 * Purpose: Calculates the log of the probabiility density function
65 * for the normal (Gaussian), $\log P(X=x)$, given value
66 * <x>, mean <mu>, and standard deviation <sigma>.
67 *
68 * Xref: STL11/94.
69 */
70 double
esl_normal_logpdf(double x,double mu,double sigma)71 esl_normal_logpdf(double x, double mu, double sigma)
72 {
73 double z;
74
75 z = (x - mu) / sigma;
76 return (-z*z*0.5) - log(sigma) - log(sqrt(2.*eslCONST_PI));
77 }
78
79 /* Function: esl_normal_cdf()
80 * Incept: SRE, Tue Jan 9 20:59:04 2007 [Casa de Gatos]
81 *
82 * Purpose: Calculates the cumulative distribution function for the
83 * normal, $P(X \leq x)$, given value <x>, mean <mu>,
84 * and standard deviation <sigma>.
85 *
86 * Xref: STL11/94.
87 */
88 double
esl_normal_cdf(double x,double mu,double sigma)89 esl_normal_cdf(double x, double mu, double sigma)
90 {
91 double z;
92
93 /* for z -> -inf, CDF->0, so we rearrange in order to avoid 1 - 1
94 * cancellation error that arises in 0.5 * (1 + erf(z)) version.
95 * This way, esl_normal_cdf() returns full double-precision dynamic
96 * range.
97 */
98 z = (x - mu) / sigma;
99 return 0.5 * erfc(-1. * z / sqrt(2.));
100 }
101
102 /* Function: esl_normal_surv()
103 * Incept: SRE, Thu Jan 11 20:16:23 2007 [Casa de Gatos]
104 *
105 * Purpose: Calculates the survivor function, $P(X>x)$ (that is,
106 * 1-CDF, the right tail probability mass) for a normal
107 * distribution, given value <x>, mean <mu>, and standard
108 * deviation <sigma>.
109 *
110 * Xref: STL11/94
111 */
112 double
esl_normal_surv(double x,double mu,double sigma)113 esl_normal_surv(double x, double mu, double sigma)
114 {
115 double z = (x - mu) / sigma;
116
117 /* As above, we avoid the use of 1-CDF or the more
118 * common 1/2 (1 - erf(z)) version because we need to
119 * avoid 1-1 cancellation error.
120 */
121 return 0.5 * erfc( z / sqrt(2.));
122 }
123
124
125 /*****************************************************************
126 * 2. Generic API, interface to histogram module
127 *****************************************************************/
128
129 double
esl_normal_generic_pdf(double x,void * params)130 esl_normal_generic_pdf(double x, void *params)
131 {
132 double *v = (double *) params;
133 return esl_normal_pdf(x, v[0], v[1]);
134 }
135
136 double
esl_normal_generic_cdf(double x,void * params)137 esl_normal_generic_cdf(double x, void *params)
138 {
139 double *v = (double *) params;
140 return esl_normal_cdf(x, v[0], v[1]);
141 }
142
143 double
esl_normal_generic_surv(double x,void * params)144 esl_normal_generic_surv(double x, void *params)
145 {
146 double *v = (double *) params;
147 return esl_normal_surv(x, v[0], v[1]);
148 }
149
150
151 /*****************************************************************
152 * 3. Unit tests.
153 *****************************************************************/
154 #ifdef eslNORMAL_TESTDRIVE
155 static int
utest_pdf(void)156 utest_pdf(void)
157 {
158 char msg[] = "gaussian PDF unit test failed";
159 double mu = 0.;
160 double sigma = 1.;
161 double delta = 0.01;
162 double x;
163 double newpdf, lastpdf;
164 double cdf;
165
166 /* One way to test the PDF is to integrate the CDF by quadrature, which should give us ~ 1. */
167 for (cdf = 0., x = -40.; x < 40.; x += delta)
168 cdf += esl_normal_pdf(x, mu, sigma) * delta;
169 if (esl_DCompare(cdf, 1.0, 1e-9) != eslOK) esl_fatal(msg);
170
171 /* We also verify that we're using double-precision range */
172 x = 0.;
173 newpdf = esl_normal_pdf(x, mu, sigma);
174 do {
175 x += 1.;
176 lastpdf = newpdf;
177 newpdf = esl_normal_pdf(x, mu, sigma);
178 } while (newpdf > 0.);
179 /* If denormals flush to zero, we reach x=38; lastpdf = 2.12001e-298.
180 * With denormals, we reach one more step, x=39; lastpdf = 1.09722e-314.
181 * icc enables flush-to-zero at all -O levels, and gcc does not.
182 */
183 if (lastpdf > 1e-297 || x < 38.) esl_fatal(msg);
184 return eslOK;
185 }
186
187 static int
utest_logpdf(void)188 utest_logpdf(void)
189 {
190 char msg[] = "gaussian log PDF unit test failed";
191 double mu = 0.;
192 double sigma = 1.;
193 double delta = 0.01;
194 double x;
195 double old, new;
196 double cdf;
197
198 /* One way to test the log PDF is to integrate the CDF by quadrature, which should give us ~ 1. */
199 for (cdf = 0., x = -40.; x < 40.; x += delta)
200 cdf += exp(esl_normal_logpdf(x, mu, sigma)) * delta;
201 if (esl_DCompare(cdf, 1.0, 1e-9) != eslOK) esl_fatal(msg);
202
203 /* Another way is to compare exp(logpdf) to the PDF */
204 for (x = -20.; x < 20.; x += delta)
205 {
206 old = esl_normal_pdf (x, mu, sigma);
207 new = exp(esl_normal_logpdf(x, mu, sigma));
208 if (esl_DCompare(old, new, 1e-9) != eslOK) esl_fatal(msg);
209 }
210
211 return eslOK;
212 }
213
214 static int
utest_cdf(void)215 utest_cdf(void)
216 {
217 char msg[] = "gaussian CDF unit test failed";
218 double mu = 0.;
219 double sigma = 1.;
220 double x;
221
222 x = esl_normal_cdf(mu, mu, sigma);
223 if (esl_DCompare(x, 0.5, 1e-9) != eslOK) esl_fatal(msg);
224
225 x = esl_normal_cdf(99., mu, sigma);
226 if (esl_DCompare(x, 1.0, 1e-9) != eslOK) esl_fatal(msg);
227
228 x = esl_normal_cdf(-99., mu, sigma);
229 if (esl_DCompare(x, 0.0, 1e-9) != eslOK) esl_fatal(msg);
230
231 x = esl_normal_cdf(-30., mu, sigma);
232 if (x > 1e-100 || x == 0.) esl_fatal(msg);
233
234 return eslOK;
235 }
236
237
238 static int
utest_surv(void)239 utest_surv(void)
240 {
241 char msg[] = "gaussian survival unit test failed";
242 double mu = 0.;
243 double sigma = 1.;
244 double x;
245
246 x = esl_normal_surv(mu, mu, sigma);
247 if (esl_DCompare(x, 0.5, 1e-9) != eslOK) esl_fatal(msg);
248
249 x = esl_normal_surv(-99., mu, sigma);
250 if (esl_DCompare(x, 1.0, 1e-9) != eslOK) esl_fatal(msg);
251
252 x = esl_normal_surv(99., mu, sigma);
253 if (esl_DCompare(x, 0.0, 1e-9) != eslOK) esl_fatal(msg);
254
255 x = esl_normal_surv(30., mu, sigma);
256 if (x > 1e-100 || x == 0.) esl_fatal(msg);
257
258 return eslOK;
259 }
260 #endif /*eslNORMAL_TESTDRIVE*/
261
262
263
264
265 /*****************************************************************
266 * 4. Test driver.
267 *****************************************************************/
268 #ifdef eslNORMAL_TESTDRIVE
269 /* Compile:
270 gcc -g -Wall -I. -L. -o esl_normal_utest -DeslNORMAL_TESTDRIVE esl_normal.c -leasel -lm
271 */
272 #include <stdio.h>
273 #include <math.h>
274 #include "easel.h"
275 #include "esl_normal.h"
276
277 int
main(int argc,char ** argv)278 main(int argc, char **argv)
279 {
280 utest_pdf();
281 utest_logpdf();
282 utest_cdf();
283 utest_surv();
284
285 return eslOK;
286 }
287 #endif /*eslNORMAL_TESTDRIVE*/
288
289 /*****************************************************************
290 * 5. Example.
291 *****************************************************************/
292
293 #ifdef eslNORMAL_EXAMPLE
294 /* Print Gaussian distribution(s) in xmgrace XY set format
295 gcc -g -Wall -I. -L. -o esl_normal_example -DeslNORMAL_EXAMPLE esl_normal.c -leasel -lm
296 */
297 #include <stdio.h>
298 #include <math.h>
299
300 #include "easel.h"
301 #include "esl_getopts.h"
302 #include "esl_normal.h"
303
304 static ESL_OPTIONS options[] = {
305 /* name type default env range toggles reqs incomp help docgroup*/
306 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 },
307 { "--mean", eslARG_REAL, "0.0", NULL, NULL, NULL, NULL, NULL, "mean of normal distribution", 0 },
308 { "--sd", eslARG_REAL, "1.0", NULL, NULL, NULL, NULL, NULL, "s.d. of normal distribution", 0 },
309 { "--min", eslARG_REAL, "-10.0", NULL, NULL, NULL, NULL, NULL, "minimum for xaxis", 0 },
310 { "--max", eslARG_REAL, "10.0", NULL, NULL, NULL, NULL, NULL, "maximum for xaxis", 0 },
311 { "--step", eslARG_REAL, "1.0", NULL, NULL, NULL, NULL, NULL, "step size for xaxis", 0 },
312 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
313 };
314 static char usage[] = "[-options]";
315 static char banner[] = "output a Gaussian histogram";
316
317 int
main(int argc,char ** argv)318 main(int argc, char **argv)
319 {
320 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
321 double minx = esl_opt_GetReal(go, "--min");
322 double maxx = esl_opt_GetReal(go, "--max");
323 double xstep = esl_opt_GetReal(go, "--step");
324 double mean = esl_opt_GetReal(go, "--mean");
325 double sd = esl_opt_GetReal(go, "--sd");
326 double x;
327 double val;
328
329 for (x = minx; x < maxx; x += xstep)
330 {
331 val = esl_normal_logpdf(x, mean, sd) * xstep; /* replace w/ whatever you want to test drive */
332 printf("%f %g\n", x, val);
333 }
334 printf("&\n");
335
336 esl_getopts_Destroy(go);
337 return 0;
338 }
339 #endif /*eslNORMAL_EXAMPLE*/
340
341