1 /* boost random/discrete_distribution.hpp header file 2 * 3 * Copyright Steven Watanabe 2009-2011 4 * Distributed under the Boost Software License, Version 1.0. (See 5 * accompanying file LICENSE_1_0.txt or copy at 6 * http://www.boost.org/LICENSE_1_0.txt) 7 * 8 * See http://www.boost.org for most recent version including documentation. 9 * 10 * $Id$ 11 */ 12 13 #ifndef BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED 14 #define BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED 15 16 #include <vector> 17 #include <limits> 18 #include <numeric> 19 #include <utility> 20 #include <iterator> 21 #include <boost/assert.hpp> 22 #include <boost/random/uniform_01.hpp> 23 #include <boost/random/uniform_int_distribution.hpp> 24 #include <boost/random/detail/config.hpp> 25 #include <boost/random/detail/operators.hpp> 26 #include <boost/random/detail/vector_io.hpp> 27 28 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST 29 #include <initializer_list> 30 #endif 31 32 #include <boost/range/begin.hpp> 33 #include <boost/range/end.hpp> 34 35 #include <boost/random/detail/disable_warnings.hpp> 36 37 namespace boost { 38 namespace random { 39 namespace detail { 40 41 template<class IntType, class WeightType> 42 struct integer_alias_table { get_weightboost::random::detail::integer_alias_table43 WeightType get_weight(IntType bin) const { 44 WeightType result = _average; 45 if(bin < _excess) ++result; 46 return result; 47 } 48 template<class Iter> init_averageboost::random::detail::integer_alias_table49 WeightType init_average(Iter begin, Iter end) { 50 WeightType weight_average = 0; 51 IntType excess = 0; 52 IntType n = 0; 53 // weight_average * n + excess == current partial sum 54 // This is a bit messy, but it's guaranteed not to overflow 55 for(Iter iter = begin; iter != end; ++iter) { 56 ++n; 57 if(*iter < weight_average) { 58 WeightType diff = weight_average - *iter; 59 weight_average -= diff / n; 60 if(diff % n > excess) { 61 --weight_average; 62 excess += n - diff % n; 63 } else { 64 excess -= diff % n; 65 } 66 } else { 67 WeightType diff = *iter - weight_average; 68 weight_average += diff / n; 69 if(diff % n < n - excess) { 70 excess += diff % n; 71 } else { 72 ++weight_average; 73 excess -= n - diff % n; 74 } 75 } 76 } 77 _alias_table.resize(static_cast<std::size_t>(n)); 78 _average = weight_average; 79 _excess = excess; 80 return weight_average; 81 } init_emptyboost::random::detail::integer_alias_table82 void init_empty() 83 { 84 _alias_table.clear(); 85 _alias_table.push_back(std::make_pair(static_cast<WeightType>(1), 86 static_cast<IntType>(0))); 87 _average = static_cast<WeightType>(1); 88 _excess = static_cast<IntType>(0); 89 } operator ==boost::random::detail::integer_alias_table90 bool operator==(const integer_alias_table& other) const 91 { 92 return _alias_table == other._alias_table && 93 _average == other._average && _excess == other._excess; 94 } normalizeboost::random::detail::integer_alias_table95 static WeightType normalize(WeightType val, WeightType average) 96 { 97 return val; 98 } normalizeboost::random::detail::integer_alias_table99 static void normalize(std::vector<WeightType>&) {} 100 template<class URNG> testboost::random::detail::integer_alias_table101 WeightType test(URNG &urng) const 102 { 103 return uniform_int_distribution<WeightType>(0, _average)(urng); 104 } acceptboost::random::detail::integer_alias_table105 bool accept(IntType result, WeightType val) const 106 { 107 return result < _excess || val < _average; 108 } try_get_sumboost::random::detail::integer_alias_table109 static WeightType try_get_sum(const std::vector<WeightType>& weights) 110 { 111 WeightType result = static_cast<WeightType>(0); 112 for(typename std::vector<WeightType>::const_iterator 113 iter = weights.begin(), end = weights.end(); 114 iter != end; ++iter) 115 { 116 if((std::numeric_limits<WeightType>::max)() - result > *iter) { 117 return static_cast<WeightType>(0); 118 } 119 result += *iter; 120 } 121 return result; 122 } 123 template<class URNG> generate_in_rangeboost::random::detail::integer_alias_table124 static WeightType generate_in_range(URNG &urng, WeightType max) 125 { 126 return uniform_int_distribution<WeightType>( 127 static_cast<WeightType>(0), max-1)(urng); 128 } 129 typedef std::vector<std::pair<WeightType, IntType> > alias_table_t; 130 alias_table_t _alias_table; 131 WeightType _average; 132 IntType _excess; 133 }; 134 135 template<class IntType, class WeightType> 136 struct real_alias_table { get_weightboost::random::detail::real_alias_table137 WeightType get_weight(IntType) const 138 { 139 return WeightType(1.0); 140 } 141 template<class Iter> init_averageboost::random::detail::real_alias_table142 WeightType init_average(Iter first, Iter last) 143 { 144 std::size_t size = std::distance(first, last); 145 WeightType weight_sum = 146 std::accumulate(first, last, static_cast<WeightType>(0)); 147 _alias_table.resize(size); 148 return weight_sum / size; 149 } init_emptyboost::random::detail::real_alias_table150 void init_empty() 151 { 152 _alias_table.clear(); 153 _alias_table.push_back(std::make_pair(static_cast<WeightType>(1), 154 static_cast<IntType>(0))); 155 } operator ==boost::random::detail::real_alias_table156 bool operator==(const real_alias_table& other) const 157 { 158 return _alias_table == other._alias_table; 159 } normalizeboost::random::detail::real_alias_table160 static WeightType normalize(WeightType val, WeightType average) 161 { 162 return val / average; 163 } normalizeboost::random::detail::real_alias_table164 static void normalize(std::vector<WeightType>& weights) 165 { 166 WeightType sum = 167 std::accumulate(weights.begin(), weights.end(), 168 static_cast<WeightType>(0)); 169 for(typename std::vector<WeightType>::iterator 170 iter = weights.begin(), 171 end = weights.end(); 172 iter != end; ++iter) 173 { 174 *iter /= sum; 175 } 176 } 177 template<class URNG> testboost::random::detail::real_alias_table178 WeightType test(URNG &urng) const 179 { 180 return uniform_01<WeightType>()(urng); 181 } acceptboost::random::detail::real_alias_table182 bool accept(IntType, WeightType) const 183 { 184 return true; 185 } try_get_sumboost::random::detail::real_alias_table186 static WeightType try_get_sum(const std::vector<WeightType>& weights) 187 { 188 return static_cast<WeightType>(1); 189 } 190 template<class URNG> generate_in_rangeboost::random::detail::real_alias_table191 static WeightType generate_in_range(URNG &urng, WeightType) 192 { 193 return uniform_01<WeightType>()(urng); 194 } 195 typedef std::vector<std::pair<WeightType, IntType> > alias_table_t; 196 alias_table_t _alias_table; 197 }; 198 199 template<bool IsIntegral> 200 struct select_alias_table; 201 202 template<> 203 struct select_alias_table<true> { 204 template<class IntType, class WeightType> 205 struct apply { 206 typedef integer_alias_table<IntType, WeightType> type; 207 }; 208 }; 209 210 template<> 211 struct select_alias_table<false> { 212 template<class IntType, class WeightType> 213 struct apply { 214 typedef real_alias_table<IntType, WeightType> type; 215 }; 216 }; 217 218 } 219 220 /** 221 * The class @c discrete_distribution models a \random_distribution. 222 * It produces integers in the range [0, n) with the probability 223 * of producing each value is specified by the parameters of the 224 * distribution. 225 */ 226 template<class IntType = int, class WeightType = double> 227 class discrete_distribution { 228 public: 229 typedef WeightType input_type; 230 typedef IntType result_type; 231 232 class param_type { 233 public: 234 235 typedef discrete_distribution distribution_type; 236 237 /** 238 * Constructs a @c param_type object, representing a distribution 239 * with \f$p(0) = 1\f$ and \f$p(k|k>0) = 0\f$. 240 */ param_type()241 param_type() : _probabilities(1, static_cast<WeightType>(1)) {} 242 /** 243 * If @c first == @c last, equivalent to the default constructor. 244 * Otherwise, the values of the range represent weights for the 245 * possible values of the distribution. 246 */ 247 template<class Iter> param_type(Iter first,Iter last)248 param_type(Iter first, Iter last) : _probabilities(first, last) 249 { 250 normalize(); 251 } 252 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST 253 /** 254 * If wl.size() == 0, equivalent to the default constructor. 255 * Otherwise, the values of the @c initializer_list represent 256 * weights for the possible values of the distribution. 257 */ param_type(const std::initializer_list<WeightType> & wl)258 param_type(const std::initializer_list<WeightType>& wl) 259 : _probabilities(wl) 260 { 261 normalize(); 262 } 263 #endif 264 /** 265 * If the range is empty, equivalent to the default constructor. 266 * Otherwise, the elements of the range represent 267 * weights for the possible values of the distribution. 268 */ 269 template<class Range> param_type(const Range & range)270 explicit param_type(const Range& range) 271 : _probabilities(boost::begin(range), boost::end(range)) 272 { 273 normalize(); 274 } 275 276 /** 277 * If nw is zero, equivalent to the default constructor. 278 * Otherwise, the range of the distribution is [0, nw), 279 * and the weights are found by calling fw with values 280 * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and 281 * \f$\mbox{xmax} - \delta/2\f$, where 282 * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$. 283 */ 284 template<class Func> param_type(std::size_t nw,double xmin,double xmax,Func fw)285 param_type(std::size_t nw, double xmin, double xmax, Func fw) 286 { 287 std::size_t n = (nw == 0) ? 1 : nw; 288 double delta = (xmax - xmin) / n; 289 BOOST_ASSERT(delta > 0); 290 for(std::size_t k = 0; k < n; ++k) { 291 _probabilities.push_back(fw(xmin + k*delta + delta/2)); 292 } 293 normalize(); 294 } 295 296 /** 297 * Returns a vector containing the probabilities of each possible 298 * value of the distribution. 299 */ probabilities() const300 std::vector<WeightType> probabilities() const 301 { 302 return _probabilities; 303 } 304 305 /** Writes the parameters to a @c std::ostream. */ BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os,param_type,parm)306 BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, param_type, parm) 307 { 308 detail::print_vector(os, parm._probabilities); 309 return os; 310 } 311 312 /** Reads the parameters from a @c std::istream. */ BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is,param_type,parm)313 BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, param_type, parm) 314 { 315 std::vector<WeightType> temp; 316 detail::read_vector(is, temp); 317 if(is) { 318 parm._probabilities.swap(temp); 319 } 320 return is; 321 } 322 323 /** Returns true if the two sets of parameters are the same. */ BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(param_type,lhs,rhs)324 BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(param_type, lhs, rhs) 325 { 326 return lhs._probabilities == rhs._probabilities; 327 } 328 /** Returns true if the two sets of parameters are different. */ 329 BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(param_type) 330 private: 331 /// @cond show_private 332 friend class discrete_distribution; param_type(const discrete_distribution & dist)333 explicit param_type(const discrete_distribution& dist) 334 : _probabilities(dist.probabilities()) 335 {} normalize()336 void normalize() 337 { 338 impl_type::normalize(_probabilities); 339 } 340 std::vector<WeightType> _probabilities; 341 /// @endcond 342 }; 343 344 /** 345 * Creates a new @c discrete_distribution object that has 346 * \f$p(0) = 1\f$ and \f$p(i|i>0) = 0\f$. 347 */ discrete_distribution()348 discrete_distribution() 349 { 350 _impl.init_empty(); 351 } 352 /** 353 * Constructs a discrete_distribution from an iterator range. 354 * If @c first == @c last, equivalent to the default constructor. 355 * Otherwise, the values of the range represent weights for the 356 * possible values of the distribution. 357 */ 358 template<class Iter> discrete_distribution(Iter first,Iter last)359 discrete_distribution(Iter first, Iter last) 360 { 361 init(first, last); 362 } 363 #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST 364 /** 365 * Constructs a @c discrete_distribution from a @c std::initializer_list. 366 * If the @c initializer_list is empty, equivalent to the default 367 * constructor. Otherwise, the values of the @c initializer_list 368 * represent weights for the possible values of the distribution. 369 * For example, given the distribution 370 * 371 * @code 372 * discrete_distribution<> dist{1, 4, 5}; 373 * @endcode 374 * 375 * The probability of a 0 is 1/10, the probability of a 1 is 2/5, 376 * the probability of a 2 is 1/2, and no other values are possible. 377 */ discrete_distribution(std::initializer_list<WeightType> wl)378 discrete_distribution(std::initializer_list<WeightType> wl) 379 { 380 init(wl.begin(), wl.end()); 381 } 382 #endif 383 /** 384 * Constructs a discrete_distribution from a Boost.Range range. 385 * If the range is empty, equivalent to the default constructor. 386 * Otherwise, the values of the range represent weights for the 387 * possible values of the distribution. 388 */ 389 template<class Range> discrete_distribution(const Range & range)390 explicit discrete_distribution(const Range& range) 391 { 392 init(boost::begin(range), boost::end(range)); 393 } 394 /** 395 * Constructs a discrete_distribution that approximates a function. 396 * If nw is zero, equivalent to the default constructor. 397 * Otherwise, the range of the distribution is [0, nw), 398 * and the weights are found by calling fw with values 399 * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and 400 * \f$\mbox{xmax} - \delta/2\f$, where 401 * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$. 402 */ 403 template<class Func> discrete_distribution(std::size_t nw,double xmin,double xmax,Func fw)404 discrete_distribution(std::size_t nw, double xmin, double xmax, Func fw) 405 { 406 std::size_t n = (nw == 0) ? 1 : nw; 407 double delta = (xmax - xmin) / n; 408 BOOST_ASSERT(delta > 0); 409 std::vector<WeightType> weights; 410 for(std::size_t k = 0; k < n; ++k) { 411 weights.push_back(fw(xmin + k*delta + delta/2)); 412 } 413 init(weights.begin(), weights.end()); 414 } 415 /** 416 * Constructs a discrete_distribution from its parameters. 417 */ discrete_distribution(const param_type & parm)418 explicit discrete_distribution(const param_type& parm) 419 { 420 param(parm); 421 } 422 423 /** 424 * Returns a value distributed according to the parameters of the 425 * discrete_distribution. 426 */ 427 template<class URNG> operator ()(URNG & urng) const428 IntType operator()(URNG& urng) const 429 { 430 BOOST_ASSERT(!_impl._alias_table.empty()); 431 IntType result; 432 WeightType test; 433 do { 434 result = uniform_int_distribution<IntType>((min)(), (max)())(urng); 435 test = _impl.test(urng); 436 } while(!_impl.accept(result, test)); 437 if(test < _impl._alias_table[static_cast<std::size_t>(result)].first) { 438 return result; 439 } else { 440 return(_impl._alias_table[static_cast<std::size_t>(result)].second); 441 } 442 } 443 444 /** 445 * Returns a value distributed according to the parameters 446 * specified by param. 447 */ 448 template<class URNG> operator ()(URNG & urng,const param_type & parm) const449 IntType operator()(URNG& urng, const param_type& parm) const 450 { 451 if(WeightType limit = impl_type::try_get_sum(parm._probabilities)) { 452 WeightType val = impl_type::generate_in_range(urng, limit); 453 WeightType sum = 0; 454 std::size_t result = 0; 455 for(typename std::vector<WeightType>::const_iterator 456 iter = parm._probabilities.begin(), 457 end = parm._probabilities.end(); 458 iter != end; ++iter, ++result) 459 { 460 sum += *iter; 461 if(sum > val) { 462 return result; 463 } 464 } 465 // This shouldn't be reachable, but round-off error 466 // can prevent any match from being found when val is 467 // very close to 1. 468 return static_cast<IntType>(parm._probabilities.size() - 1); 469 } else { 470 // WeightType is integral and sum(parm._probabilities) 471 // would overflow. Just use the easy solution. 472 return discrete_distribution(parm)(urng); 473 } 474 } 475 476 /** Returns the smallest value that the distribution can produce. */ BOOST_PREVENT_MACRO_SUBSTITUTION() const477 result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return 0; } 478 /** Returns the largest value that the distribution can produce. */ BOOST_PREVENT_MACRO_SUBSTITUTION() const479 result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const 480 { return static_cast<result_type>(_impl._alias_table.size() - 1); } 481 482 /** 483 * Returns a vector containing the probabilities of each 484 * value of the distribution. For example, given 485 * 486 * @code 487 * discrete_distribution<> dist = { 1, 4, 5 }; 488 * std::vector<double> p = dist.param(); 489 * @endcode 490 * 491 * the vector, p will contain {0.1, 0.4, 0.5}. 492 * 493 * If @c WeightType is integral, then the weights 494 * will be returned unchanged. 495 */ probabilities() const496 std::vector<WeightType> probabilities() const 497 { 498 std::vector<WeightType> result(_impl._alias_table.size(), static_cast<WeightType>(0)); 499 std::size_t i = 0; 500 for(typename impl_type::alias_table_t::const_iterator 501 iter = _impl._alias_table.begin(), 502 end = _impl._alias_table.end(); 503 iter != end; ++iter, ++i) 504 { 505 WeightType val = iter->first; 506 result[i] += val; 507 result[static_cast<std::size_t>(iter->second)] += _impl.get_weight(i) - val; 508 } 509 impl_type::normalize(result); 510 return(result); 511 } 512 513 /** Returns the parameters of the distribution. */ param() const514 param_type param() const 515 { 516 return param_type(*this); 517 } 518 /** Sets the parameters of the distribution. */ param(const param_type & parm)519 void param(const param_type& parm) 520 { 521 init(parm._probabilities.begin(), parm._probabilities.end()); 522 } 523 524 /** 525 * Effects: Subsequent uses of the distribution do not depend 526 * on values produced by any engine prior to invoking reset. 527 */ reset()528 void reset() {} 529 530 /** Writes a distribution to a @c std::ostream. */ BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os,discrete_distribution,dd)531 BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, discrete_distribution, dd) 532 { 533 os << dd.param(); 534 return os; 535 } 536 537 /** Reads a distribution from a @c std::istream */ BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is,discrete_distribution,dd)538 BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, discrete_distribution, dd) 539 { 540 param_type parm; 541 if(is >> parm) { 542 dd.param(parm); 543 } 544 return is; 545 } 546 547 /** 548 * Returns true if the two distributions will return the 549 * same sequence of values, when passed equal generators. 550 */ BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(discrete_distribution,lhs,rhs)551 BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(discrete_distribution, lhs, rhs) 552 { 553 return lhs._impl == rhs._impl; 554 } 555 /** 556 * Returns true if the two distributions may return different 557 * sequences of values, when passed equal generators. 558 */ 559 BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(discrete_distribution) 560 561 private: 562 563 /// @cond show_private 564 565 template<class Iter> init(Iter first,Iter last,std::input_iterator_tag)566 void init(Iter first, Iter last, std::input_iterator_tag) 567 { 568 std::vector<WeightType> temp(first, last); 569 init(temp.begin(), temp.end()); 570 } 571 template<class Iter> init(Iter first,Iter last,std::forward_iterator_tag)572 void init(Iter first, Iter last, std::forward_iterator_tag) 573 { 574 std::vector<std::pair<WeightType, IntType> > below_average; 575 std::vector<std::pair<WeightType, IntType> > above_average; 576 WeightType weight_average = _impl.init_average(first, last); 577 WeightType normalized_average = _impl.get_weight(0); 578 std::size_t i = 0; 579 for(; first != last; ++first, ++i) { 580 WeightType val = impl_type::normalize(*first, weight_average); 581 std::pair<WeightType, IntType> elem(val, static_cast<IntType>(i)); 582 if(val < normalized_average) { 583 below_average.push_back(elem); 584 } else { 585 above_average.push_back(elem); 586 } 587 } 588 589 typename impl_type::alias_table_t::iterator 590 b_iter = below_average.begin(), 591 b_end = below_average.end(), 592 a_iter = above_average.begin(), 593 a_end = above_average.end() 594 ; 595 while(b_iter != b_end && a_iter != a_end) { 596 _impl._alias_table[static_cast<std::size_t>(b_iter->second)] = 597 std::make_pair(b_iter->first, a_iter->second); 598 a_iter->first -= (_impl.get_weight(b_iter->second) - b_iter->first); 599 if(a_iter->first < normalized_average) { 600 *b_iter = *a_iter++; 601 } else { 602 ++b_iter; 603 } 604 } 605 for(; b_iter != b_end; ++b_iter) { 606 _impl._alias_table[static_cast<std::size_t>(b_iter->second)].first = 607 _impl.get_weight(b_iter->second); 608 } 609 for(; a_iter != a_end; ++a_iter) { 610 _impl._alias_table[static_cast<std::size_t>(a_iter->second)].first = 611 _impl.get_weight(a_iter->second); 612 } 613 } 614 template<class Iter> init(Iter first,Iter last)615 void init(Iter first, Iter last) 616 { 617 if(first == last) { 618 _impl.init_empty(); 619 } else { 620 typename std::iterator_traits<Iter>::iterator_category category; 621 init(first, last, category); 622 } 623 } 624 typedef typename detail::select_alias_table< 625 (::boost::is_integral<WeightType>::value) 626 >::template apply<IntType, WeightType>::type impl_type; 627 impl_type _impl; 628 /// @endcond 629 }; 630 631 } 632 } 633 634 #include <boost/random/detail/enable_warnings.hpp> 635 636 #endif 637