1 /* Statistical routines for normal (Gaussian) distributions.
2  *
3  * Contents:
4  *   1. Densities and distributions
5  *   2. Generic API, interface to histogram module
6  *   3. Unit tests
7  *   4. Test driver
8  *   5. Example
9  *
10  * To-do:
11  *   - incomplete API, by the standards of other Easel stats modules.
12  *     Compare esl_gumbel, for example.
13  *
14  *****************************************************************
15  * Crib notes.
16  *
17  * The error function is defined as:    erf(x)  = 2/sqrt(pi) \int_0^x e^{-t^2} dt
18  * The complementary error function is: erfc(x) = 1 - erf(x)
19  * The normal CDF in terms of erf:      CDF(z)  = 1/2 + 1/2 erf(z/sqrt(2))
20  * erf(x) is an "odd function":         erf(x)  = -erf(-x)
21  *
22  * lim_{x -> -inf} erf(x) = -1;   erf(0)  = 0;     lim_{x -> +inf} erf(x) =  1
23  * lim_{x -> -inf} erfc(x) = 2    erfc(0) = 1;     lim_{x -> +inf} erfc(x) = 0;
24  *
25  * erf(), erfc() in double precision are in the C99 standard.  Some
26  * systems (cough, Microsoft, cough) are not necessarily C99 compliant
27  * and may not provide erf(), erfc(). But Easel will compile in an
28  * alternative, esl_stats_erfc(), if needed.
29  */
30 #include "esl_config.h"
31 
32 #include <math.h>
33 
34 #include "easel.h"
35 #include "esl_normal.h"
36 #include "esl_stats.h"
37 
38 
39 /*****************************************************************
40  * 1. Densities and distributions.
41  *****************************************************************/
42 
43 /* Function:  esl_normal_pdf()
44  * Incept:    SRE, Tue Nov 21 14:15:43 2006 [Janelia]
45  *
46  * Purpose:   Calculates the normal (Gaussian) probability density
47  *            function $P(X=x)$ for a normal distribution, given
48  *            value <x>, mean <mu>, and standard deviation <sigma>.
49  *
50  * Xref:      STL11/94.
51  */
52 double
esl_normal_pdf(double x,double mu,double sigma)53 esl_normal_pdf(double x, double mu, double sigma)
54 {
55   double z;
56 
57   z = (x - mu) / sigma;
58   return  exp(-z*z*0.5) / (sigma * sqrt(2. * eslCONST_PI));
59 }
60 
61 /* Function:  esl_normal_logpdf()
62  * Incept:    SRE, Tue Jan  9 20:43:52 2007 [Casa de Gatos]
63  *
64  * Purpose:   Calculates the log of the probabiility density function
65  *            for the normal (Gaussian), $\log P(X=x)$, given value
66  *            <x>, mean <mu>, and standard deviation <sigma>.
67  *
68  * Xref:      STL11/94.
69  */
70 double
esl_normal_logpdf(double x,double mu,double sigma)71 esl_normal_logpdf(double x, double mu, double sigma)
72 {
73   double z;
74 
75   z = (x - mu) / sigma;
76   return  (-z*z*0.5) - log(sigma) - log(sqrt(2.*eslCONST_PI));
77 }
78 
79 /* Function:  esl_normal_cdf()
80  * Incept:    SRE, Tue Jan  9 20:59:04 2007 [Casa de Gatos]
81  *
82  * Purpose:   Calculates the cumulative distribution function for the
83  *            normal, $P(X \leq x)$, given value <x>, mean <mu>,
84  *            and standard deviation <sigma>.
85  *
86  * Xref:      STL11/94.
87  */
88 double
esl_normal_cdf(double x,double mu,double sigma)89 esl_normal_cdf(double x, double mu, double sigma)
90 {
91   double z;
92 
93   /* for z -> -inf, CDF->0, so we rearrange in order to avoid 1 - 1
94    * cancellation error that arises in 0.5 * (1 + erf(z)) version.
95    * This way, esl_normal_cdf() returns full double-precision dynamic
96    * range.
97    */
98   z = (x - mu) / sigma;
99   return 0.5 * erfc(-1. * z / sqrt(2.));
100 }
101 
102 /* Function:  esl_normal_surv()
103  * Incept:    SRE, Thu Jan 11 20:16:23 2007 [Casa de Gatos]
104  *
105  * Purpose:   Calculates the survivor function, $P(X>x)$ (that is,
106  *            1-CDF, the right tail probability mass) for a normal
107  *            distribution, given value <x>, mean <mu>, and standard
108  *            deviation <sigma>.
109  *
110  * Xref:      STL11/94
111  */
112 double
esl_normal_surv(double x,double mu,double sigma)113 esl_normal_surv(double x, double mu, double sigma)
114 {
115   double z = (x - mu) / sigma;
116 
117   /* As above, we avoid the use of 1-CDF or the more
118    * common 1/2 (1 - erf(z)) version because we need to
119    * avoid 1-1 cancellation error.
120    */
121   return 0.5 * erfc( z / sqrt(2.));
122 }
123 
124 
125 /*****************************************************************
126  * 2. Generic API, interface to histogram module
127  *****************************************************************/
128 
129 double
esl_normal_generic_pdf(double x,void * params)130 esl_normal_generic_pdf(double x, void *params)
131 {
132   double *v = (double *) params;
133   return esl_normal_pdf(x, v[0], v[1]);
134 }
135 
136 double
esl_normal_generic_cdf(double x,void * params)137 esl_normal_generic_cdf(double x, void *params)
138 {
139   double *v = (double *) params;
140   return esl_normal_cdf(x, v[0], v[1]);
141 }
142 
143 double
esl_normal_generic_surv(double x,void * params)144 esl_normal_generic_surv(double x, void *params)
145 {
146   double *v = (double *) params;
147   return esl_normal_surv(x, v[0], v[1]);
148 }
149 
150 
151 /*****************************************************************
152  * 3. Unit tests.
153  *****************************************************************/
154 #ifdef eslNORMAL_TESTDRIVE
155 static int
utest_pdf(void)156 utest_pdf(void)
157 {
158   char   msg[] = "gaussian PDF unit test failed";
159   double mu    = 0.;
160   double sigma = 1.;
161   double delta = 0.01;
162   double x;
163   double newpdf, lastpdf;
164   double cdf;
165 
166   /* One way to test the PDF is to integrate the CDF by quadrature, which should give us ~ 1. */
167   for (cdf = 0., x = -40.; x < 40.; x += delta)
168     cdf += esl_normal_pdf(x, mu, sigma) * delta;
169   if (esl_DCompare(cdf, 1.0, 1e-9) != eslOK)  esl_fatal(msg);
170 
171   /* We also verify that we're using double-precision range */
172   x = 0.;
173   newpdf = esl_normal_pdf(x, mu, sigma);
174   do {
175     x += 1.;
176     lastpdf = newpdf;
177     newpdf  = esl_normal_pdf(x, mu, sigma);
178   } while (newpdf > 0.);
179   /* If denormals flush to zero, we reach x=38; lastpdf = 2.12001e-298.
180    * With denormals, we reach one more step, x=39; lastpdf = 1.09722e-314.
181    * icc enables flush-to-zero at all -O levels, and gcc does not.
182    */
183   if (lastpdf > 1e-297 || x < 38.) esl_fatal(msg);
184   return eslOK;
185 }
186 
187 static int
utest_logpdf(void)188 utest_logpdf(void)
189 {
190   char   msg[] = "gaussian log PDF unit test failed";
191   double mu    = 0.;
192   double sigma = 1.;
193   double delta = 0.01;
194   double x;
195   double old, new;
196   double cdf;
197 
198   /* One way to test the log PDF is to integrate the CDF by quadrature, which should give us ~ 1. */
199   for (cdf = 0., x = -40.; x < 40.; x += delta)
200     cdf += exp(esl_normal_logpdf(x, mu, sigma)) * delta;
201   if (esl_DCompare(cdf, 1.0, 1e-9) != eslOK) esl_fatal(msg);
202 
203   /* Another way is to compare exp(logpdf) to the PDF */
204   for (x = -20.; x < 20.; x += delta)
205     {
206       old = esl_normal_pdf       (x, mu, sigma);
207       new = exp(esl_normal_logpdf(x, mu, sigma));
208       if (esl_DCompare(old, new, 1e-9) != eslOK) esl_fatal(msg);
209     }
210 
211   return eslOK;
212 }
213 
214 static int
utest_cdf(void)215 utest_cdf(void)
216 {
217   char   msg[] = "gaussian CDF unit test failed";
218   double mu    = 0.;
219   double sigma = 1.;
220   double x;
221 
222   x = esl_normal_cdf(mu, mu, sigma);
223   if (esl_DCompare(x, 0.5, 1e-9) != eslOK) esl_fatal(msg);
224 
225   x = esl_normal_cdf(99., mu, sigma);
226   if (esl_DCompare(x, 1.0, 1e-9) != eslOK) esl_fatal(msg);
227 
228   x = esl_normal_cdf(-99., mu, sigma);
229   if (esl_DCompare(x, 0.0, 1e-9) != eslOK) esl_fatal(msg);
230 
231   x = esl_normal_cdf(-30., mu, sigma);
232   if (x > 1e-100 || x == 0.) esl_fatal(msg);
233 
234   return eslOK;
235 }
236 
237 
238 static int
utest_surv(void)239 utest_surv(void)
240 {
241   char   msg[] = "gaussian survival unit test failed";
242   double mu    = 0.;
243   double sigma = 1.;
244   double x;
245 
246   x = esl_normal_surv(mu, mu, sigma);
247   if (esl_DCompare(x, 0.5, 1e-9) != eslOK) esl_fatal(msg);
248 
249   x = esl_normal_surv(-99., mu, sigma);
250   if (esl_DCompare(x, 1.0, 1e-9) != eslOK) esl_fatal(msg);
251 
252   x = esl_normal_surv(99., mu, sigma);
253   if (esl_DCompare(x, 0.0, 1e-9) != eslOK) esl_fatal(msg);
254 
255   x = esl_normal_surv(30., mu, sigma);
256   if (x > 1e-100 || x == 0.) esl_fatal(msg);
257 
258   return eslOK;
259 }
260 #endif /*eslNORMAL_TESTDRIVE*/
261 
262 
263 
264 
265 /*****************************************************************
266  * 4. Test driver.
267  *****************************************************************/
268 #ifdef eslNORMAL_TESTDRIVE
269 /* Compile:
270    gcc -g -Wall -I. -L. -o esl_normal_utest -DeslNORMAL_TESTDRIVE esl_normal.c -leasel -lm
271 */
272 #include <stdio.h>
273 #include <math.h>
274 #include "easel.h"
275 #include "esl_normal.h"
276 
277 int
main(int argc,char ** argv)278 main(int argc, char **argv)
279 {
280   utest_pdf();
281   utest_logpdf();
282   utest_cdf();
283   utest_surv();
284 
285   return eslOK;
286 }
287 #endif /*eslNORMAL_TESTDRIVE*/
288 
289 /*****************************************************************
290  * 5. Example.
291  *****************************************************************/
292 
293 #ifdef eslNORMAL_EXAMPLE
294 /* Print Gaussian distribution(s) in xmgrace XY set format
295    gcc -g -Wall -I. -L. -o esl_normal_example -DeslNORMAL_EXAMPLE esl_normal.c -leasel -lm
296  */
297 #include <stdio.h>
298 #include <math.h>
299 
300 #include "easel.h"
301 #include "esl_getopts.h"
302 #include "esl_normal.h"
303 
304 static ESL_OPTIONS options[] = {
305   /* name           type      default  env  range toggles reqs incomp  help                                       docgroup*/
306   { "-h",        eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",    0 },
307   { "--mean",    eslARG_REAL,   "0.0",  NULL, NULL,  NULL,  NULL, NULL, "mean of normal distribution",             0 },
308   { "--sd",      eslARG_REAL,   "1.0",  NULL, NULL,  NULL,  NULL, NULL, "s.d. of normal distribution",             0 },
309   { "--min",     eslARG_REAL,  "-10.0", NULL, NULL,  NULL,  NULL, NULL, "minimum for xaxis",                       0 },
310   { "--max",     eslARG_REAL,   "10.0", NULL, NULL,  NULL,  NULL, NULL, "maximum for xaxis",                       0 },
311   { "--step",    eslARG_REAL,    "1.0", NULL, NULL,  NULL,  NULL, NULL, "step size for xaxis",                     0 },
312   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
313 };
314 static char usage[]  = "[-options]";
315 static char banner[] = "output a Gaussian histogram";
316 
317 int
main(int argc,char ** argv)318 main(int argc, char **argv)
319 {
320   ESL_GETOPTS  *go        = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage);
321   double        minx      = esl_opt_GetReal(go, "--min");
322   double        maxx      = esl_opt_GetReal(go, "--max");
323   double        xstep     = esl_opt_GetReal(go, "--step");
324   double        mean      = esl_opt_GetReal(go, "--mean");
325   double        sd        = esl_opt_GetReal(go, "--sd");
326   double        x;
327   double        val;
328 
329   for (x = minx; x < maxx; x += xstep)
330     {
331       val = esl_normal_logpdf(x, mean, sd) * xstep; /* replace w/ whatever you want to test drive */
332       printf("%f %g\n", x, val);
333     }
334   printf("&\n");
335 
336   esl_getopts_Destroy(go);
337   return 0;
338 }
339 #endif /*eslNORMAL_EXAMPLE*/
340 
341