1 /* sampling.h
2  *
3  * Copyright (C) 2012 Tamas Nepusz
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or (at
8  * your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  */
19 
20 #ifndef __SAMPLING_H__
21 #define __SAMPLING_H__
22 
23 #include <stdlib.h>
24 #include "mt.h"
25 
26 #undef __BEGIN_DECLS
27 #undef __END_DECLS
28 #ifdef __cplusplus
29 # define __BEGIN_DECLS extern "C" {
30 # define __END_DECLS }
31 #else
32 # define __BEGIN_DECLS /* empty */
33 # define __END_DECLS /* empty */
34 #endif
35 
36 __BEGIN_DECLS
37 
38 /**
39  * Draws a sample from a binomial distribution with the given count and
40  * probability values.
41  *
42  * This function is borrowed from R; see the corresponding license in
43  * \c rbinom.c. The return value is always an integer.
44  *
45  * The function is \em not thread-safe.
46  *
47  * \param  n    the number of trials
48  * \param  p    the success probability of each trial
49  * \param  rng  the Mersenne Twister random number generator to use
50  * \return the value drawn from the given binomial distribution.
51  */
52 double plfit_rbinom(double n, double p, mt_rng_t* rng);
53 
54 /**
55  * Draws a sample from a Pareto distribution with the given minimum value and
56  * power-law exponent.
57  *
58  * \param  xmin    the minimum value of the distribution. Must be positive.
59  * \param  alpha   the exponent. Must be positive
60  * \param  rng     the Mersenne Twister random number generator to use
61  *
62  * \return the sample or NaN if one of the parameters is invalid
63  */
64 extern double plfit_rpareto(double xmin, double alpha, mt_rng_t* rng);
65 
66 /**
67  * Draws a given number of samples from a Pareto distribution with the given
68  * minimum value and power-law exponent.
69  *
70  * \param  xmin    the minimum value of the distribution. Must be positive.
71  * \param  alpha   the exponent. Must be positive
72  * \param  n       the number of samples to draw
73  * \param  rng     the Mersenne Twister random number generator to use
74  * \param  result  the array where the result should be written. It must
75  *                 have enough space to store n items
76  *
77  * \return \c PLFIT_EINVAL if one of the parameters is invalid, zero otherwise
78  */
79 int plfit_rpareto_array(double xmin, double alpha, size_t n, mt_rng_t* rng,
80         double* result);
81 
82 /**
83  * Draws a sample from a zeta distribution with the given minimum value and
84  * power-law exponent.
85  *
86  * \param  xmin    the minimum value of the distribution. Must be positive.
87  * \param  alpha   the exponent. Must be positive
88  * \param  rng     the Mersenne Twister random number generator to use
89  *
90  * \return the sample or NaN if one of the parameters is invalid
91  */
92 extern double plfit_rzeta(long int xmin, double alpha, mt_rng_t* rng);
93 
94 /**
95  * Draws a given number of samples from a zeta distribution with the given
96  * minimum value and power-law exponent.
97  *
98  * \param  xmin    the minimum value of the distribution. Must be positive.
99  * \param  alpha   the exponent. Must be positive
100  * \param  n       the number of samples to draw
101  * \param  rng     the Mersenne Twister random number generator to use
102  * \param  result  the array where the result should be written. It must
103  *                 have enough space to store n items
104  *
105  * \return \c PLFIT_EINVAL if one of the parameters is invalid, zero otherwise
106  */
107 int plfit_rzeta_array(long int xmin, double alpha, size_t n, mt_rng_t* rng,
108         double* result);
109 
110 /**
111  * Draws a sample from a uniform distribution with the given lower and
112  * upper bounds.
113  *
114  * The lower bound is inclusive, the uppoer bound is not.
115  *
116  * \param  lo   the lower bound
117  * \param  hi   the upper bound
118  * \param  rng  the Mersenne Twister random number generator to use
119  * \return the value drawn from the given uniform distribution.
120  */
121 extern double plfit_runif(double lo, double hi, mt_rng_t* rng);
122 
123 /**
124  * Draws a sample from a uniform distribution over the [0; 1) interval.
125  *
126  * The interval is closed from the left and open from the right.
127  *
128  * \param  rng  the Mersenne Twister random number generator to use
129  * \return the value drawn from the given uniform distribution.
130  */
131 extern double plfit_runif_01(mt_rng_t* rng);
132 
133 /**
134  * Random sampler using Walker's alias method.
135  */
136 typedef struct {
137     long int num_bins;            /**< Number of bins */
138     long int* indexes;            /**< Index of the "other" element in each bin */
139     double* probs;                /**< Probability of drawing the "own" element from a bin */
140 } plfit_walker_alias_sampler_t;
141 
142 /**
143  * \brief Initializes the sampler with item probabilities.
144  *
145  * \param  sampler  the sampler to initialize
146  * \param  ps   pointer to an array containing a value proportional to the
147  *              sampling probability of each item in the set being sampled.
148  * \param  n    the number of items in the array
149  * \return error code
150  */
151 int plfit_walker_alias_sampler_init(plfit_walker_alias_sampler_t* sampler,
152         double* ps, size_t n);
153 
154 /**
155  * \brief Destroys an initialized sampler and frees the allocated memory.
156  *
157  * \param  sampler  the sampler to destroy
158  */
159 void plfit_walker_alias_sampler_destroy(plfit_walker_alias_sampler_t* sampler);
160 
161 /**
162  * \brief Draws a given number of samples from the sampler and writes them
163  *        to a given array.
164  *
165  * \param  sampler  the sampler to use
166  * \param  xs       pointer to an array where the sampled items should be
167  *                  written
168  * \param  n        the number of samples to draw
169  * \param  rng      the Mersenne Twister random number generator to use
170  * \return error code
171  */
172 int plfit_walker_alias_sampler_sample(const plfit_walker_alias_sampler_t* sampler,
173         long int* xs, size_t n, mt_rng_t* rng);
174 
175 __END_DECLS
176 
177 #endif
178