1 /* boost random/discrete_distribution.hpp header file
2  *
3  * Copyright Steven Watanabe 2009-2011
4  * Distributed under the Boost Software License, Version 1.0. (See
5  * accompanying file LICENSE_1_0.txt or copy at
6  * http://www.boost.org/LICENSE_1_0.txt)
7  *
8  * See http://www.boost.org for most recent version including documentation.
9  *
10  * $Id$
11  */
12 
13 #ifndef BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED
14 #define BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED
15 
16 #include <vector>
17 #include <limits>
18 #include <numeric>
19 #include <utility>
20 #include <iterator>
21 #include <boost/assert.hpp>
22 #include <boost/random/uniform_01.hpp>
23 #include <boost/random/uniform_int_distribution.hpp>
24 #include <boost/random/detail/config.hpp>
25 #include <boost/random/detail/operators.hpp>
26 #include <boost/random/detail/vector_io.hpp>
27 
28 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
29 #include <initializer_list>
30 #endif
31 
32 #include <boost/range/begin.hpp>
33 #include <boost/range/end.hpp>
34 
35 #include <boost/random/detail/disable_warnings.hpp>
36 
37 namespace boost {
38 namespace random {
39 namespace detail {
40 
41 template<class IntType, class WeightType>
42 struct integer_alias_table {
get_weightboost::random::detail::integer_alias_table43     WeightType get_weight(IntType bin) const {
44         WeightType result = _average;
45         if(bin < _excess) ++result;
46         return result;
47     }
48     template<class Iter>
init_averageboost::random::detail::integer_alias_table49     WeightType init_average(Iter begin, Iter end) {
50         WeightType weight_average = 0;
51         IntType excess = 0;
52         IntType n = 0;
53         // weight_average * n + excess == current partial sum
54         // This is a bit messy, but it's guaranteed not to overflow
55         for(Iter iter = begin; iter != end; ++iter) {
56             ++n;
57             if(*iter < weight_average) {
58                 WeightType diff = weight_average - *iter;
59                 weight_average -= diff / n;
60                 if(diff % n > excess) {
61                     --weight_average;
62                     excess += n - diff % n;
63                 } else {
64                     excess -= diff % n;
65                 }
66             } else {
67                 WeightType diff = *iter - weight_average;
68                 weight_average += diff / n;
69                 if(diff % n < n - excess) {
70                     excess += diff % n;
71                 } else {
72                     ++weight_average;
73                     excess -= n - diff % n;
74                 }
75             }
76         }
77         _alias_table.resize(static_cast<std::size_t>(n));
78         _average = weight_average;
79         _excess = excess;
80         return weight_average;
81     }
init_emptyboost::random::detail::integer_alias_table82     void init_empty()
83     {
84         _alias_table.clear();
85         _alias_table.push_back(std::make_pair(static_cast<WeightType>(1),
86                                               static_cast<IntType>(0)));
87         _average = static_cast<WeightType>(1);
88         _excess = static_cast<IntType>(0);
89     }
operator ==boost::random::detail::integer_alias_table90     bool operator==(const integer_alias_table& other) const
91     {
92         return _alias_table == other._alias_table &&
93             _average == other._average && _excess == other._excess;
94     }
normalizeboost::random::detail::integer_alias_table95     static WeightType normalize(WeightType val, WeightType average)
96     {
97         return val;
98     }
normalizeboost::random::detail::integer_alias_table99     static void normalize(std::vector<WeightType>&) {}
100     template<class URNG>
testboost::random::detail::integer_alias_table101     WeightType test(URNG &urng) const
102     {
103         return uniform_int_distribution<WeightType>(0, _average)(urng);
104     }
acceptboost::random::detail::integer_alias_table105     bool accept(IntType result, WeightType val) const
106     {
107         return result < _excess || val < _average;
108     }
try_get_sumboost::random::detail::integer_alias_table109     static WeightType try_get_sum(const std::vector<WeightType>& weights)
110     {
111         WeightType result = static_cast<WeightType>(0);
112         for(typename std::vector<WeightType>::const_iterator
113                 iter = weights.begin(), end = weights.end();
114             iter != end; ++iter)
115         {
116             if((std::numeric_limits<WeightType>::max)() - result > *iter) {
117                 return static_cast<WeightType>(0);
118             }
119             result += *iter;
120         }
121         return result;
122     }
123     template<class URNG>
generate_in_rangeboost::random::detail::integer_alias_table124     static WeightType generate_in_range(URNG &urng, WeightType max)
125     {
126         return uniform_int_distribution<WeightType>(
127             static_cast<WeightType>(0), max-1)(urng);
128     }
129     typedef std::vector<std::pair<WeightType, IntType> > alias_table_t;
130     alias_table_t _alias_table;
131     WeightType _average;
132     IntType _excess;
133 };
134 
135 template<class IntType, class WeightType>
136 struct real_alias_table {
get_weightboost::random::detail::real_alias_table137     WeightType get_weight(IntType) const
138     {
139         return WeightType(1.0);
140     }
141     template<class Iter>
init_averageboost::random::detail::real_alias_table142     WeightType init_average(Iter first, Iter last)
143     {
144         std::size_t size = std::distance(first, last);
145         WeightType weight_sum =
146             std::accumulate(first, last, static_cast<WeightType>(0));
147         _alias_table.resize(size);
148         return weight_sum / size;
149     }
init_emptyboost::random::detail::real_alias_table150     void init_empty()
151     {
152         _alias_table.clear();
153         _alias_table.push_back(std::make_pair(static_cast<WeightType>(1),
154                                               static_cast<IntType>(0)));
155     }
operator ==boost::random::detail::real_alias_table156     bool operator==(const real_alias_table& other) const
157     {
158         return _alias_table == other._alias_table;
159     }
normalizeboost::random::detail::real_alias_table160     static WeightType normalize(WeightType val, WeightType average)
161     {
162         return val / average;
163     }
normalizeboost::random::detail::real_alias_table164     static void normalize(std::vector<WeightType>& weights)
165     {
166         WeightType sum =
167             std::accumulate(weights.begin(), weights.end(),
168                             static_cast<WeightType>(0));
169         for(typename std::vector<WeightType>::iterator
170                 iter = weights.begin(),
171                 end = weights.end();
172             iter != end; ++iter)
173         {
174             *iter /= sum;
175         }
176     }
177     template<class URNG>
testboost::random::detail::real_alias_table178     WeightType test(URNG &urng) const
179     {
180         return uniform_01<WeightType>()(urng);
181     }
acceptboost::random::detail::real_alias_table182     bool accept(IntType, WeightType) const
183     {
184         return true;
185     }
try_get_sumboost::random::detail::real_alias_table186     static WeightType try_get_sum(const std::vector<WeightType>& weights)
187     {
188         return static_cast<WeightType>(1);
189     }
190     template<class URNG>
generate_in_rangeboost::random::detail::real_alias_table191     static WeightType generate_in_range(URNG &urng, WeightType)
192     {
193         return uniform_01<WeightType>()(urng);
194     }
195     typedef std::vector<std::pair<WeightType, IntType> > alias_table_t;
196     alias_table_t _alias_table;
197 };
198 
199 template<bool IsIntegral>
200 struct select_alias_table;
201 
202 template<>
203 struct select_alias_table<true> {
204     template<class IntType, class WeightType>
205     struct apply {
206         typedef integer_alias_table<IntType, WeightType> type;
207     };
208 };
209 
210 template<>
211 struct select_alias_table<false> {
212     template<class IntType, class WeightType>
213     struct apply {
214         typedef real_alias_table<IntType, WeightType> type;
215     };
216 };
217 
218 }
219 
220 /**
221  * The class @c discrete_distribution models a \random_distribution.
222  * It produces integers in the range [0, n) with the probability
223  * of producing each value is specified by the parameters of the
224  * distribution.
225  */
226 template<class IntType = int, class WeightType = double>
227 class discrete_distribution {
228 public:
229     typedef WeightType input_type;
230     typedef IntType result_type;
231 
232     class param_type {
233     public:
234 
235         typedef discrete_distribution distribution_type;
236 
237         /**
238          * Constructs a @c param_type object, representing a distribution
239          * with \f$p(0) = 1\f$ and \f$p(k|k>0) = 0\f$.
240          */
param_type()241         param_type() : _probabilities(1, static_cast<WeightType>(1)) {}
242         /**
243          * If @c first == @c last, equivalent to the default constructor.
244          * Otherwise, the values of the range represent weights for the
245          * possible values of the distribution.
246          */
247         template<class Iter>
param_type(Iter first,Iter last)248         param_type(Iter first, Iter last) : _probabilities(first, last)
249         {
250             normalize();
251         }
252 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
253         /**
254          * If wl.size() == 0, equivalent to the default constructor.
255          * Otherwise, the values of the @c initializer_list represent
256          * weights for the possible values of the distribution.
257          */
param_type(const std::initializer_list<WeightType> & wl)258         param_type(const std::initializer_list<WeightType>& wl)
259           : _probabilities(wl)
260         {
261             normalize();
262         }
263 #endif
264         /**
265          * If the range is empty, equivalent to the default constructor.
266          * Otherwise, the elements of the range represent
267          * weights for the possible values of the distribution.
268          */
269         template<class Range>
param_type(const Range & range)270         explicit param_type(const Range& range)
271           : _probabilities(boost::begin(range), boost::end(range))
272         {
273             normalize();
274         }
275 
276         /**
277          * If nw is zero, equivalent to the default constructor.
278          * Otherwise, the range of the distribution is [0, nw),
279          * and the weights are found by  calling fw with values
280          * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and
281          * \f$\mbox{xmax} - \delta/2\f$, where
282          * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$.
283          */
284         template<class Func>
param_type(std::size_t nw,double xmin,double xmax,Func fw)285         param_type(std::size_t nw, double xmin, double xmax, Func fw)
286         {
287             std::size_t n = (nw == 0) ? 1 : nw;
288             double delta = (xmax - xmin) / n;
289             BOOST_ASSERT(delta > 0);
290             for(std::size_t k = 0; k < n; ++k) {
291                 _probabilities.push_back(fw(xmin + k*delta + delta/2));
292             }
293             normalize();
294         }
295 
296         /**
297          * Returns a vector containing the probabilities of each possible
298          * value of the distribution.
299          */
probabilities() const300         std::vector<WeightType> probabilities() const
301         {
302             return _probabilities;
303         }
304 
305         /** Writes the parameters to a @c std::ostream. */
BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os,param_type,parm)306         BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, param_type, parm)
307         {
308             detail::print_vector(os, parm._probabilities);
309             return os;
310         }
311 
312         /** Reads the parameters from a @c std::istream. */
BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is,param_type,parm)313         BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, param_type, parm)
314         {
315             std::vector<WeightType> temp;
316             detail::read_vector(is, temp);
317             if(is) {
318                 parm._probabilities.swap(temp);
319             }
320             return is;
321         }
322 
323         /** Returns true if the two sets of parameters are the same. */
BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(param_type,lhs,rhs)324         BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(param_type, lhs, rhs)
325         {
326             return lhs._probabilities == rhs._probabilities;
327         }
328         /** Returns true if the two sets of parameters are different. */
329         BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(param_type)
330     private:
331         /// @cond show_private
332         friend class discrete_distribution;
param_type(const discrete_distribution & dist)333         explicit param_type(const discrete_distribution& dist)
334           : _probabilities(dist.probabilities())
335         {}
normalize()336         void normalize()
337         {
338             impl_type::normalize(_probabilities);
339         }
340         std::vector<WeightType> _probabilities;
341         /// @endcond
342     };
343 
344     /**
345      * Creates a new @c discrete_distribution object that has
346      * \f$p(0) = 1\f$ and \f$p(i|i>0) = 0\f$.
347      */
discrete_distribution()348     discrete_distribution()
349     {
350         _impl.init_empty();
351     }
352     /**
353      * Constructs a discrete_distribution from an iterator range.
354      * If @c first == @c last, equivalent to the default constructor.
355      * Otherwise, the values of the range represent weights for the
356      * possible values of the distribution.
357      */
358     template<class Iter>
discrete_distribution(Iter first,Iter last)359     discrete_distribution(Iter first, Iter last)
360     {
361         init(first, last);
362     }
363 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
364     /**
365      * Constructs a @c discrete_distribution from a @c std::initializer_list.
366      * If the @c initializer_list is empty, equivalent to the default
367      * constructor.  Otherwise, the values of the @c initializer_list
368      * represent weights for the possible values of the distribution.
369      * For example, given the distribution
370      *
371      * @code
372      * discrete_distribution<> dist{1, 4, 5};
373      * @endcode
374      *
375      * The probability of a 0 is 1/10, the probability of a 1 is 2/5,
376      * the probability of a 2 is 1/2, and no other values are possible.
377      */
discrete_distribution(std::initializer_list<WeightType> wl)378     discrete_distribution(std::initializer_list<WeightType> wl)
379     {
380         init(wl.begin(), wl.end());
381     }
382 #endif
383     /**
384      * Constructs a discrete_distribution from a Boost.Range range.
385      * If the range is empty, equivalent to the default constructor.
386      * Otherwise, the values of the range represent weights for the
387      * possible values of the distribution.
388      */
389     template<class Range>
discrete_distribution(const Range & range)390     explicit discrete_distribution(const Range& range)
391     {
392         init(boost::begin(range), boost::end(range));
393     }
394     /**
395      * Constructs a discrete_distribution that approximates a function.
396      * If nw is zero, equivalent to the default constructor.
397      * Otherwise, the range of the distribution is [0, nw),
398      * and the weights are found by  calling fw with values
399      * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and
400      * \f$\mbox{xmax} - \delta/2\f$, where
401      * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$.
402      */
403     template<class Func>
discrete_distribution(std::size_t nw,double xmin,double xmax,Func fw)404     discrete_distribution(std::size_t nw, double xmin, double xmax, Func fw)
405     {
406         std::size_t n = (nw == 0) ? 1 : nw;
407         double delta = (xmax - xmin) / n;
408         BOOST_ASSERT(delta > 0);
409         std::vector<WeightType> weights;
410         for(std::size_t k = 0; k < n; ++k) {
411             weights.push_back(fw(xmin + k*delta + delta/2));
412         }
413         init(weights.begin(), weights.end());
414     }
415     /**
416      * Constructs a discrete_distribution from its parameters.
417      */
discrete_distribution(const param_type & parm)418     explicit discrete_distribution(const param_type& parm)
419     {
420         param(parm);
421     }
422 
423     /**
424      * Returns a value distributed according to the parameters of the
425      * discrete_distribution.
426      */
427     template<class URNG>
operator ()(URNG & urng) const428     IntType operator()(URNG& urng) const
429     {
430         BOOST_ASSERT(!_impl._alias_table.empty());
431         IntType result;
432         WeightType test;
433         do {
434             result = uniform_int_distribution<IntType>((min)(), (max)())(urng);
435             test = _impl.test(urng);
436         } while(!_impl.accept(result, test));
437         if(test < _impl._alias_table[static_cast<std::size_t>(result)].first) {
438             return result;
439         } else {
440             return(_impl._alias_table[static_cast<std::size_t>(result)].second);
441         }
442     }
443 
444     /**
445      * Returns a value distributed according to the parameters
446      * specified by param.
447      */
448     template<class URNG>
operator ()(URNG & urng,const param_type & parm) const449     IntType operator()(URNG& urng, const param_type& parm) const
450     {
451         if(WeightType limit = impl_type::try_get_sum(parm._probabilities)) {
452             WeightType val = impl_type::generate_in_range(urng, limit);
453             WeightType sum = 0;
454             std::size_t result = 0;
455             for(typename std::vector<WeightType>::const_iterator
456                     iter = parm._probabilities.begin(),
457                     end = parm._probabilities.end();
458                 iter != end; ++iter, ++result)
459             {
460                 sum += *iter;
461                 if(sum > val) {
462                     return result;
463                 }
464             }
465             // This shouldn't be reachable, but round-off error
466             // can prevent any match from being found when val is
467             // very close to 1.
468             return static_cast<IntType>(parm._probabilities.size() - 1);
469         } else {
470             // WeightType is integral and sum(parm._probabilities)
471             // would overflow.  Just use the easy solution.
472             return discrete_distribution(parm)(urng);
473         }
474     }
475 
476     /** Returns the smallest value that the distribution can produce. */
BOOST_PREVENT_MACRO_SUBSTITUTION() const477     result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return 0; }
478     /** Returns the largest value that the distribution can produce. */
BOOST_PREVENT_MACRO_SUBSTITUTION() const479     result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const
480     { return static_cast<result_type>(_impl._alias_table.size() - 1); }
481 
482     /**
483      * Returns a vector containing the probabilities of each
484      * value of the distribution.  For example, given
485      *
486      * @code
487      * discrete_distribution<> dist = { 1, 4, 5 };
488      * std::vector<double> p = dist.param();
489      * @endcode
490      *
491      * the vector, p will contain {0.1, 0.4, 0.5}.
492      *
493      * If @c WeightType is integral, then the weights
494      * will be returned unchanged.
495      */
probabilities() const496     std::vector<WeightType> probabilities() const
497     {
498         std::vector<WeightType> result(_impl._alias_table.size(), static_cast<WeightType>(0));
499         std::size_t i = 0;
500         for(typename impl_type::alias_table_t::const_iterator
501                 iter = _impl._alias_table.begin(),
502                 end = _impl._alias_table.end();
503                 iter != end; ++iter, ++i)
504         {
505             WeightType val = iter->first;
506             result[i] += val;
507             result[static_cast<std::size_t>(iter->second)] += _impl.get_weight(i) - val;
508         }
509         impl_type::normalize(result);
510         return(result);
511     }
512 
513     /** Returns the parameters of the distribution. */
param() const514     param_type param() const
515     {
516         return param_type(*this);
517     }
518     /** Sets the parameters of the distribution. */
param(const param_type & parm)519     void param(const param_type& parm)
520     {
521         init(parm._probabilities.begin(), parm._probabilities.end());
522     }
523 
524     /**
525      * Effects: Subsequent uses of the distribution do not depend
526      * on values produced by any engine prior to invoking reset.
527      */
reset()528     void reset() {}
529 
530     /** Writes a distribution to a @c std::ostream. */
BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os,discrete_distribution,dd)531     BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, discrete_distribution, dd)
532     {
533         os << dd.param();
534         return os;
535     }
536 
537     /** Reads a distribution from a @c std::istream */
BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is,discrete_distribution,dd)538     BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, discrete_distribution, dd)
539     {
540         param_type parm;
541         if(is >> parm) {
542             dd.param(parm);
543         }
544         return is;
545     }
546 
547     /**
548      * Returns true if the two distributions will return the
549      * same sequence of values, when passed equal generators.
550      */
BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(discrete_distribution,lhs,rhs)551     BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(discrete_distribution, lhs, rhs)
552     {
553         return lhs._impl == rhs._impl;
554     }
555     /**
556      * Returns true if the two distributions may return different
557      * sequences of values, when passed equal generators.
558      */
559     BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(discrete_distribution)
560 
561 private:
562 
563     /// @cond show_private
564 
565     template<class Iter>
init(Iter first,Iter last,std::input_iterator_tag)566     void init(Iter first, Iter last, std::input_iterator_tag)
567     {
568         std::vector<WeightType> temp(first, last);
569         init(temp.begin(), temp.end());
570     }
571     template<class Iter>
init(Iter first,Iter last,std::forward_iterator_tag)572     void init(Iter first, Iter last, std::forward_iterator_tag)
573     {
574         std::vector<std::pair<WeightType, IntType> > below_average;
575         std::vector<std::pair<WeightType, IntType> > above_average;
576         WeightType weight_average = _impl.init_average(first, last);
577         WeightType normalized_average = _impl.get_weight(0);
578         std::size_t i = 0;
579         for(; first != last; ++first, ++i) {
580             WeightType val = impl_type::normalize(*first, weight_average);
581             std::pair<WeightType, IntType> elem(val, static_cast<IntType>(i));
582             if(val < normalized_average) {
583                 below_average.push_back(elem);
584             } else {
585                 above_average.push_back(elem);
586             }
587         }
588 
589         typename impl_type::alias_table_t::iterator
590             b_iter = below_average.begin(),
591             b_end = below_average.end(),
592             a_iter = above_average.begin(),
593             a_end = above_average.end()
594             ;
595         while(b_iter != b_end && a_iter != a_end) {
596             _impl._alias_table[static_cast<std::size_t>(b_iter->second)] =
597                 std::make_pair(b_iter->first, a_iter->second);
598             a_iter->first -= (_impl.get_weight(b_iter->second) - b_iter->first);
599             if(a_iter->first < normalized_average) {
600                 *b_iter = *a_iter++;
601             } else {
602                 ++b_iter;
603             }
604         }
605         for(; b_iter != b_end; ++b_iter) {
606             _impl._alias_table[static_cast<std::size_t>(b_iter->second)].first =
607                 _impl.get_weight(b_iter->second);
608         }
609         for(; a_iter != a_end; ++a_iter) {
610             _impl._alias_table[static_cast<std::size_t>(a_iter->second)].first =
611                 _impl.get_weight(a_iter->second);
612         }
613     }
614     template<class Iter>
init(Iter first,Iter last)615     void init(Iter first, Iter last)
616     {
617         if(first == last) {
618             _impl.init_empty();
619         } else {
620             typename std::iterator_traits<Iter>::iterator_category category;
621             init(first, last, category);
622         }
623     }
624     typedef typename detail::select_alias_table<
625         (::boost::is_integral<WeightType>::value)
626     >::template apply<IntType, WeightType>::type impl_type;
627     impl_type _impl;
628     /// @endcond
629 };
630 
631 }
632 }
633 
634 #include <boost/random/detail/enable_warnings.hpp>
635 
636 #endif
637