1 /////////////////////////////////////////////////////////////////////////////// 2 // weighted_density.hpp 3 // 4 // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost 5 // Software License, Version 1.0. (See accompanying file 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006 9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006 10 11 #include <vector> 12 #include <limits> 13 #include <functional> 14 #include <boost/range.hpp> 15 #include <boost/parameter/keyword.hpp> 16 #include <boost/mpl/placeholders.hpp> 17 #include <boost/accumulators/framework/accumulator_base.hpp> 18 #include <boost/accumulators/framework/extractor.hpp> 19 #include <boost/accumulators/numeric/functional.hpp> 20 #include <boost/accumulators/framework/parameters/sample.hpp> 21 #include <boost/accumulators/statistics_fwd.hpp> 22 #include <boost/accumulators/statistics/sum.hpp> 23 #include <boost/accumulators/statistics/max.hpp> 24 #include <boost/accumulators/statistics/min.hpp> 25 #include <boost/accumulators/statistics/density.hpp> // for named parameters density_cache_size and density_num_bins 26 27 namespace boost { namespace accumulators 28 { 29 30 namespace impl 31 { 32 /////////////////////////////////////////////////////////////////////////////// 33 // weighted_density_impl 34 // density histogram for weighted samples 35 /** 36 @brief Histogram density estimator for weighted samples 37 38 The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins 39 are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the 40 maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally, 41 an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined, 42 the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is 43 returned, where each pair contains the position of the bin (lower bound) and the sum of the weights (normalized with the 44 sum of all weights). 45 46 @param density_cache_size Number of first samples used to determine min and max. 47 @param density_num_bins Number of bins (two additional bins collect under- and overflow samples). 48 */ 49 template<typename Sample, typename Weight> 50 struct weighted_density_impl 51 : accumulator_base 52 { 53 typedef typename numeric::functional::fdiv<Weight, std::size_t>::result_type float_type; 54 typedef std::vector<std::pair<float_type, float_type> > histogram_type; 55 typedef std::vector<float_type> array_type; 56 // for boost::result_of 57 typedef iterator_range<typename histogram_type::iterator> result_type; 58 59 template<typename Args> weighted_density_implboost::accumulators::impl::weighted_density_impl60 weighted_density_impl(Args const &args) 61 : cache_size(args[density_cache_size]) 62 , cache(cache_size) 63 , num_bins(args[density_num_bins]) 64 , samples_in_bin(num_bins + 2, 0.) 65 , bin_positions(num_bins + 2) 66 , histogram( 67 num_bins + 2 68 , std::make_pair( 69 numeric::fdiv(args[sample | Sample()],(std::size_t)1) 70 , numeric::fdiv(args[sample | Sample()],(std::size_t)1) 71 ) 72 ) 73 , is_dirty(true) 74 { 75 } 76 77 template<typename Args> operator ()boost::accumulators::impl::weighted_density_impl78 void operator ()(Args const &args) 79 { 80 this->is_dirty = true; 81 82 std::size_t cnt = count(args); 83 84 // Fill up cache with cache_size first samples 85 if (cnt <= this->cache_size) 86 { 87 this->cache[cnt - 1] = std::make_pair(args[sample], args[weight]); 88 } 89 90 // Once cache_size samples have been accumulated, create num_bins bins of same size between 91 // the minimum and maximum of the cached samples as well as an under- and an overflow bin. 92 // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin). 93 if (cnt == this->cache_size) 94 { 95 float_type minimum = numeric::fdiv((min)(args),(std::size_t)1); 96 float_type maximum = numeric::fdiv((max)(args),(std::size_t)1); 97 float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins); 98 99 // determine bin positions (their lower bounds) 100 for (std::size_t i = 0; i < this->num_bins + 2; ++i) 101 { 102 this->bin_positions[i] = minimum + (i - 1.) * bin_size; 103 } 104 105 for (typename histogram_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter) 106 { 107 if (iter->first < this->bin_positions[1]) 108 { 109 this->samples_in_bin[0] += iter->second; 110 } 111 else if (iter->first >= this->bin_positions[this->num_bins + 1]) 112 { 113 this->samples_in_bin[this->num_bins + 1] += iter->second; 114 } 115 else 116 { 117 typename array_type::iterator it = std::upper_bound( 118 this->bin_positions.begin() 119 , this->bin_positions.end() 120 , iter->first 121 ); 122 123 std::size_t d = std::distance(this->bin_positions.begin(), it); 124 this->samples_in_bin[d - 1] += iter->second; 125 } 126 } 127 } 128 // Add each subsequent sample to the correct bin 129 else if (cnt > this->cache_size) 130 { 131 if (args[sample] < this->bin_positions[1]) 132 { 133 this->samples_in_bin[0] += args[weight]; 134 } 135 else if (args[sample] >= this->bin_positions[this->num_bins + 1]) 136 { 137 this->samples_in_bin[this->num_bins + 1] += args[weight]; 138 } 139 else 140 { 141 typename array_type::iterator it = std::upper_bound( 142 this->bin_positions.begin() 143 , this->bin_positions.end() 144 , args[sample] 145 ); 146 147 std::size_t d = std::distance(this->bin_positions.begin(), it); 148 this->samples_in_bin[d - 1] += args[weight]; 149 } 150 } 151 } 152 153 template<typename Args> resultboost::accumulators::impl::weighted_density_impl154 result_type result(Args const &args) const 155 { 156 if (this->is_dirty) 157 { 158 this->is_dirty = false; 159 160 // creates a vector of std::pair where each pair i holds 161 // the values bin_positions[i] (x-axis of histogram) and 162 // samples_in_bin[i] / cnt (y-axis of histogram). 163 164 for (std::size_t i = 0; i < this->num_bins + 2; ++i) 165 { 166 this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], sum_of_weights(args))); 167 } 168 } 169 170 // returns a range of pairs 171 return make_iterator_range(this->histogram); 172 } 173 174 private: 175 std::size_t cache_size; // number of cached samples 176 histogram_type cache; // cache to store the first cache_size samples with their weights as std::pair 177 std::size_t num_bins; // number of bins 178 array_type samples_in_bin; // number of samples in each bin 179 array_type bin_positions; // lower bounds of bins 180 mutable histogram_type histogram; // histogram 181 mutable bool is_dirty; 182 }; 183 184 } // namespace impl 185 186 /////////////////////////////////////////////////////////////////////////////// 187 // tag::weighted_density 188 // 189 namespace tag 190 { 191 struct weighted_density 192 : depends_on<count, sum_of_weights, min, max> 193 , density_cache_size 194 , density_num_bins 195 { 196 /// INTERNAL ONLY 197 /// 198 typedef accumulators::impl::weighted_density_impl<mpl::_1, mpl::_2> impl; 199 200 #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED 201 static boost::parameter::keyword<density_cache_size> const cache_size; 202 static boost::parameter::keyword<density_num_bins> const num_bins; 203 #endif 204 }; 205 } 206 207 /////////////////////////////////////////////////////////////////////////////// 208 // extract::weighted_density 209 // 210 namespace extract 211 { 212 extractor<tag::density> const weighted_density = {}; 213 214 BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_density) 215 } 216 217 using extract::weighted_density; 218 219 }} // namespace boost::accumulators 220 221 #endif 222