1 /////////////////////////////////////////////////////////////////////////////// 2 // weighted_density.hpp 3 // 4 // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost 5 // Software License, Version 1.0. (See accompanying file 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006 9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006 10 11 #include <vector> 12 #include <limits> 13 #include <functional> 14 #include <boost/range.hpp> 15 #include <boost/parameter/keyword.hpp> 16 #include <boost/mpl/placeholders.hpp> 17 #include <boost/accumulators/framework/accumulator_base.hpp> 18 #include <boost/accumulators/framework/extractor.hpp> 19 #include <boost/accumulators/numeric/functional.hpp> 20 #include <boost/accumulators/framework/parameters/sample.hpp> 21 #include <boost/accumulators/statistics_fwd.hpp> 22 #include <boost/accumulators/statistics/sum.hpp> 23 #include <boost/accumulators/statistics/max.hpp> 24 #include <boost/accumulators/statistics/min.hpp> 25 #include <boost/accumulators/statistics/density.hpp> // for named parameters density_cache_size and density_num_bins 26 #include <boost/serialization/vector.hpp> 27 #include <boost/serialization/utility.hpp> 28 29 namespace boost { namespace accumulators 30 { 31 32 namespace impl 33 { 34 /////////////////////////////////////////////////////////////////////////////// 35 // weighted_density_impl 36 // density histogram for weighted samples 37 /** 38 @brief Histogram density estimator for weighted samples 39 40 The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins 41 are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the 42 maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally, 43 an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined, 44 the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is 45 returned, where each pair contains the position of the bin (lower bound) and the sum of the weights (normalized with the 46 sum of all weights). 47 48 @param density_cache_size Number of first samples used to determine min and max. 49 @param density_num_bins Number of bins (two additional bins collect under- and overflow samples). 50 */ 51 template<typename Sample, typename Weight> 52 struct weighted_density_impl 53 : accumulator_base 54 { 55 typedef typename numeric::functional::fdiv<Weight, std::size_t>::result_type float_type; 56 typedef std::vector<std::pair<float_type, float_type> > histogram_type; 57 typedef std::vector<float_type> array_type; 58 // for boost::result_of 59 typedef iterator_range<typename histogram_type::iterator> result_type; 60 61 template<typename Args> weighted_density_implboost::accumulators::impl::weighted_density_impl62 weighted_density_impl(Args const &args) 63 : cache_size(args[density_cache_size]) 64 , cache(cache_size) 65 , num_bins(args[density_num_bins]) 66 , samples_in_bin(num_bins + 2, 0.) 67 , bin_positions(num_bins + 2) 68 , histogram( 69 num_bins + 2 70 , std::make_pair( 71 numeric::fdiv(args[sample | Sample()],(std::size_t)1) 72 , numeric::fdiv(args[sample | Sample()],(std::size_t)1) 73 ) 74 ) 75 , is_dirty(true) 76 { 77 } 78 79 template<typename Args> operator ()boost::accumulators::impl::weighted_density_impl80 void operator ()(Args const &args) 81 { 82 this->is_dirty = true; 83 84 std::size_t cnt = count(args); 85 86 // Fill up cache with cache_size first samples 87 if (cnt <= this->cache_size) 88 { 89 this->cache[cnt - 1] = std::make_pair(args[sample], args[weight]); 90 } 91 92 // Once cache_size samples have been accumulated, create num_bins bins of same size between 93 // the minimum and maximum of the cached samples as well as an under- and an overflow bin. 94 // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin). 95 if (cnt == this->cache_size) 96 { 97 float_type minimum = numeric::fdiv((min)(args),(std::size_t)1); 98 float_type maximum = numeric::fdiv((max)(args),(std::size_t)1); 99 float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins); 100 101 // determine bin positions (their lower bounds) 102 for (std::size_t i = 0; i < this->num_bins + 2; ++i) 103 { 104 this->bin_positions[i] = minimum + (i - 1.) * bin_size; 105 } 106 107 for (typename histogram_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter) 108 { 109 if (iter->first < this->bin_positions[1]) 110 { 111 this->samples_in_bin[0] += iter->second; 112 } 113 else if (iter->first >= this->bin_positions[this->num_bins + 1]) 114 { 115 this->samples_in_bin[this->num_bins + 1] += iter->second; 116 } 117 else 118 { 119 typename array_type::iterator it = std::upper_bound( 120 this->bin_positions.begin() 121 , this->bin_positions.end() 122 , iter->first 123 ); 124 125 std::size_t d = std::distance(this->bin_positions.begin(), it); 126 this->samples_in_bin[d - 1] += iter->second; 127 } 128 } 129 } 130 // Add each subsequent sample to the correct bin 131 else if (cnt > this->cache_size) 132 { 133 if (args[sample] < this->bin_positions[1]) 134 { 135 this->samples_in_bin[0] += args[weight]; 136 } 137 else if (args[sample] >= this->bin_positions[this->num_bins + 1]) 138 { 139 this->samples_in_bin[this->num_bins + 1] += args[weight]; 140 } 141 else 142 { 143 typename array_type::iterator it = std::upper_bound( 144 this->bin_positions.begin() 145 , this->bin_positions.end() 146 , args[sample] 147 ); 148 149 std::size_t d = std::distance(this->bin_positions.begin(), it); 150 this->samples_in_bin[d - 1] += args[weight]; 151 } 152 } 153 } 154 155 template<typename Args> resultboost::accumulators::impl::weighted_density_impl156 result_type result(Args const &args) const 157 { 158 if (this->is_dirty) 159 { 160 this->is_dirty = false; 161 162 // creates a vector of std::pair where each pair i holds 163 // the values bin_positions[i] (x-axis of histogram) and 164 // samples_in_bin[i] / cnt (y-axis of histogram). 165 166 for (std::size_t i = 0; i < this->num_bins + 2; ++i) 167 { 168 this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], sum_of_weights(args))); 169 } 170 } 171 172 // returns a range of pairs 173 return make_iterator_range(this->histogram); 174 } 175 176 // make this accumulator serializeable 177 // TODO split to save/load and check on parameters provided in ctor 178 template<class Archive> serializeboost::accumulators::impl::weighted_density_impl179 void serialize(Archive & ar, const unsigned int file_version) 180 { 181 ar & cache_size; 182 ar & cache; 183 ar & num_bins; 184 ar & samples_in_bin; 185 ar & bin_positions; 186 ar & histogram; 187 ar & is_dirty; 188 } 189 190 private: 191 std::size_t cache_size; // number of cached samples 192 histogram_type cache; // cache to store the first cache_size samples with their weights as std::pair 193 std::size_t num_bins; // number of bins 194 array_type samples_in_bin; // number of samples in each bin 195 array_type bin_positions; // lower bounds of bins 196 mutable histogram_type histogram; // histogram 197 mutable bool is_dirty; 198 }; 199 200 } // namespace impl 201 202 /////////////////////////////////////////////////////////////////////////////// 203 // tag::weighted_density 204 // 205 namespace tag 206 { 207 struct weighted_density 208 : depends_on<count, sum_of_weights, min, max> 209 , density_cache_size 210 , density_num_bins 211 { 212 /// INTERNAL ONLY 213 /// 214 typedef accumulators::impl::weighted_density_impl<mpl::_1, mpl::_2> impl; 215 216 #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED 217 static boost::parameter::keyword<density_cache_size> const cache_size; 218 static boost::parameter::keyword<density_num_bins> const num_bins; 219 #endif 220 }; 221 } 222 223 /////////////////////////////////////////////////////////////////////////////// 224 // extract::weighted_density 225 // 226 namespace extract 227 { 228 extractor<tag::density> const weighted_density = {}; 229 230 BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_density) 231 } 232 233 using extract::weighted_density; 234 235 }} // namespace boost::accumulators 236 237 #endif 238