1 2 /////////////////////////////////////////////////////////////////////////////// 3 // density.hpp 4 // 5 // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost 6 // Software License, Version 1.0. (See accompanying file 7 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 8 9 #ifndef BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006 10 #define BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006 11 12 #include <vector> 13 #include <limits> 14 #include <functional> 15 #include <boost/range.hpp> 16 #include <boost/parameter/keyword.hpp> 17 #include <boost/mpl/placeholders.hpp> 18 #include <boost/accumulators/accumulators_fwd.hpp> 19 #include <boost/accumulators/framework/accumulator_base.hpp> 20 #include <boost/accumulators/framework/extractor.hpp> 21 #include <boost/accumulators/numeric/functional.hpp> 22 #include <boost/accumulators/framework/parameters/sample.hpp> 23 #include <boost/accumulators/framework/depends_on.hpp> 24 #include <boost/accumulators/statistics_fwd.hpp> 25 #include <boost/accumulators/statistics/count.hpp> 26 #include <boost/accumulators/statistics/max.hpp> 27 #include <boost/accumulators/statistics/min.hpp> 28 #include <boost/serialization/vector.hpp> 29 #include <boost/serialization/utility.hpp> 30 31 namespace boost { namespace accumulators 32 { 33 34 /////////////////////////////////////////////////////////////////////////////// 35 // cache_size and num_bins named parameters 36 // 37 BOOST_PARAMETER_NESTED_KEYWORD(tag, density_cache_size, cache_size) 38 BOOST_PARAMETER_NESTED_KEYWORD(tag, density_num_bins, num_bins) 39 40 BOOST_ACCUMULATORS_IGNORE_GLOBAL(density_cache_size) 41 BOOST_ACCUMULATORS_IGNORE_GLOBAL(density_num_bins) 42 43 namespace impl 44 { 45 /////////////////////////////////////////////////////////////////////////////// 46 // density_impl 47 // density histogram 48 /** 49 @brief Histogram density estimator 50 51 The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins 52 are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the 53 maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally, 54 an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined, 55 the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is 56 return, where each pair contains the position of the bin (lower bound) and the samples count (normalized with the 57 total number of samples). 58 59 @param density_cache_size Number of first samples used to determine min and max. 60 @param density_num_bins Number of bins (two additional bins collect under- and overflow samples). 61 */ 62 template<typename Sample> 63 struct density_impl 64 : accumulator_base 65 { 66 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; 67 typedef std::vector<std::pair<float_type, float_type> > histogram_type; 68 typedef std::vector<float_type> array_type; 69 // for boost::result_of 70 typedef iterator_range<typename histogram_type::iterator> result_type; 71 72 template<typename Args> density_implboost::accumulators::impl::density_impl73 density_impl(Args const &args) 74 : cache_size(args[density_cache_size]) 75 , cache(cache_size) 76 , num_bins(args[density_num_bins]) 77 , samples_in_bin(num_bins + 2, 0.) 78 , bin_positions(num_bins + 2) 79 , histogram( 80 num_bins + 2 81 , std::make_pair( 82 numeric::fdiv(args[sample | Sample()],(std::size_t)1) 83 , numeric::fdiv(args[sample | Sample()],(std::size_t)1) 84 ) 85 ) 86 , is_dirty(true) 87 { 88 } 89 90 template<typename Args> operator ()boost::accumulators::impl::density_impl91 void operator ()(Args const &args) 92 { 93 this->is_dirty = true; 94 95 std::size_t cnt = count(args); 96 97 // Fill up cache with cache_size first samples 98 if (cnt <= this->cache_size) 99 { 100 this->cache[cnt - 1] = args[sample]; 101 } 102 103 // Once cache_size samples have been accumulated, create num_bins bins of same size between 104 // the minimum and maximum of the cached samples as well as under and overflow bins. 105 // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin). 106 if (cnt == this->cache_size) 107 { 108 float_type minimum = numeric::fdiv((min)(args), (std::size_t)1); 109 float_type maximum = numeric::fdiv((max)(args), (std::size_t)1); 110 float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins ); 111 112 // determine bin positions (their lower bounds) 113 for (std::size_t i = 0; i < this->num_bins + 2; ++i) 114 { 115 this->bin_positions[i] = minimum + (i - 1.) * bin_size; 116 } 117 118 for (typename array_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter) 119 { 120 if (*iter < this->bin_positions[1]) 121 { 122 ++(this->samples_in_bin[0]); 123 } 124 else if (*iter >= this->bin_positions[this->num_bins + 1]) 125 { 126 ++(this->samples_in_bin[this->num_bins + 1]); 127 } 128 else 129 { 130 typename array_type::iterator it = std::upper_bound( 131 this->bin_positions.begin() 132 , this->bin_positions.end() 133 , *iter 134 ); 135 136 std::size_t d = std::distance(this->bin_positions.begin(), it); 137 ++(this->samples_in_bin[d - 1]); 138 } 139 } 140 } 141 // Add each subsequent sample to the correct bin 142 else if (cnt > this->cache_size) 143 { 144 if (args[sample] < this->bin_positions[1]) 145 { 146 ++(this->samples_in_bin[0]); 147 } 148 else if (args[sample] >= this->bin_positions[this->num_bins + 1]) 149 { 150 ++(this->samples_in_bin[this->num_bins + 1]); 151 } 152 else 153 { 154 typename array_type::iterator it = std::upper_bound( 155 this->bin_positions.begin() 156 , this->bin_positions.end() 157 , args[sample] 158 ); 159 160 std::size_t d = std::distance(this->bin_positions.begin(), it); 161 ++(this->samples_in_bin[d - 1]); 162 } 163 } 164 } 165 166 /** 167 @pre The number of samples must meet or exceed the cache size 168 */ 169 template<typename Args> resultboost::accumulators::impl::density_impl170 result_type result(Args const &args) const 171 { 172 if (this->is_dirty) 173 { 174 this->is_dirty = false; 175 176 // creates a vector of std::pair where each pair i holds 177 // the values bin_positions[i] (x-axis of histogram) and 178 // samples_in_bin[i] / cnt (y-axis of histogram). 179 180 for (std::size_t i = 0; i < this->num_bins + 2; ++i) 181 { 182 this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], count(args))); 183 } 184 } 185 // returns a range of pairs 186 return make_iterator_range(this->histogram); 187 } 188 189 // make this accumulator serializeable 190 // TODO split to save/load and check on parameters provided in ctor 191 template<class Archive> serializeboost::accumulators::impl::density_impl192 void serialize(Archive & ar, const unsigned int file_version) 193 { 194 ar & cache_size; 195 ar & cache; 196 ar & num_bins; 197 ar & samples_in_bin; 198 ar & bin_positions; 199 ar & histogram; 200 ar & is_dirty; 201 } 202 203 private: 204 std::size_t cache_size; // number of cached samples 205 array_type cache; // cache to store the first cache_size samples 206 std::size_t num_bins; // number of bins 207 array_type samples_in_bin; // number of samples in each bin 208 array_type bin_positions; // lower bounds of bins 209 mutable histogram_type histogram; // histogram 210 mutable bool is_dirty; 211 }; 212 213 } // namespace impl 214 215 /////////////////////////////////////////////////////////////////////////////// 216 // tag::density 217 // 218 namespace tag 219 { 220 struct density 221 : depends_on<count, min, max> 222 , density_cache_size 223 , density_num_bins 224 { 225 /// INTERNAL ONLY 226 /// 227 typedef accumulators::impl::density_impl<mpl::_1> impl; 228 229 #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED 230 /// tag::density::cache_size named parameter 231 /// tag::density::num_bins named parameter 232 static boost::parameter::keyword<density_cache_size> const cache_size; 233 static boost::parameter::keyword<density_num_bins> const num_bins; 234 #endif 235 }; 236 } 237 238 /////////////////////////////////////////////////////////////////////////////// 239 // extract::density 240 // 241 namespace extract 242 { 243 extractor<tag::density> const density = {}; 244 245 BOOST_ACCUMULATORS_IGNORE_GLOBAL(density) 246 } 247 248 using extract::density; 249 250 // So that density can be automatically substituted 251 // with weighted_density when the weight parameter is non-void. 252 template<> 253 struct as_weighted_feature<tag::density> 254 { 255 typedef tag::weighted_density type; 256 }; 257 258 template<> 259 struct feature_of<tag::weighted_density> 260 : feature_of<tag::density> 261 { 262 }; 263 264 }} // namespace boost::accumulators 265 266 #endif 267