1 2 /////////////////////////////////////////////////////////////////////////////// 3 // density.hpp 4 // 5 // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost 6 // Software License, Version 1.0. (See accompanying file 7 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 8 9 #ifndef BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006 10 #define BOOST_ACCUMULATORS_STATISTICS_DENSITY_HPP_DE_01_01_2006 11 12 #include <vector> 13 #include <limits> 14 #include <functional> 15 #include <boost/range.hpp> 16 #include <boost/parameter/keyword.hpp> 17 #include <boost/mpl/placeholders.hpp> 18 #include <boost/accumulators/accumulators_fwd.hpp> 19 #include <boost/accumulators/framework/accumulator_base.hpp> 20 #include <boost/accumulators/framework/extractor.hpp> 21 #include <boost/accumulators/numeric/functional.hpp> 22 #include <boost/accumulators/framework/parameters/sample.hpp> 23 #include <boost/accumulators/framework/depends_on.hpp> 24 #include <boost/accumulators/statistics_fwd.hpp> 25 #include <boost/accumulators/statistics/count.hpp> 26 #include <boost/accumulators/statistics/max.hpp> 27 #include <boost/accumulators/statistics/min.hpp> 28 29 namespace boost { namespace accumulators 30 { 31 32 /////////////////////////////////////////////////////////////////////////////// 33 // cache_size and num_bins named parameters 34 // 35 BOOST_PARAMETER_NESTED_KEYWORD(tag, density_cache_size, cache_size) 36 BOOST_PARAMETER_NESTED_KEYWORD(tag, density_num_bins, num_bins) 37 38 BOOST_ACCUMULATORS_IGNORE_GLOBAL(density_cache_size) 39 BOOST_ACCUMULATORS_IGNORE_GLOBAL(density_num_bins) 40 41 namespace impl 42 { 43 /////////////////////////////////////////////////////////////////////////////// 44 // density_impl 45 // density histogram 46 /** 47 @brief Histogram density estimator 48 49 The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins 50 are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the 51 maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally, 52 an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined, 53 the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is 54 return, where each pair contains the position of the bin (lower bound) and the samples count (normalized with the 55 total number of samples). 56 57 @param density_cache_size Number of first samples used to determine min and max. 58 @param density_num_bins Number of bins (two additional bins collect under- and overflow samples). 59 */ 60 template<typename Sample> 61 struct density_impl 62 : accumulator_base 63 { 64 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; 65 typedef std::vector<std::pair<float_type, float_type> > histogram_type; 66 typedef std::vector<float_type> array_type; 67 // for boost::result_of 68 typedef iterator_range<typename histogram_type::iterator> result_type; 69 70 template<typename Args> density_implboost::accumulators::impl::density_impl71 density_impl(Args const &args) 72 : cache_size(args[density_cache_size]) 73 , cache(cache_size) 74 , num_bins(args[density_num_bins]) 75 , samples_in_bin(num_bins + 2, 0.) 76 , bin_positions(num_bins + 2) 77 , histogram( 78 num_bins + 2 79 , std::make_pair( 80 numeric::fdiv(args[sample | Sample()],(std::size_t)1) 81 , numeric::fdiv(args[sample | Sample()],(std::size_t)1) 82 ) 83 ) 84 , is_dirty(true) 85 { 86 } 87 88 template<typename Args> operator ()boost::accumulators::impl::density_impl89 void operator ()(Args const &args) 90 { 91 this->is_dirty = true; 92 93 std::size_t cnt = count(args); 94 95 // Fill up cache with cache_size first samples 96 if (cnt <= this->cache_size) 97 { 98 this->cache[cnt - 1] = args[sample]; 99 } 100 101 // Once cache_size samples have been accumulated, create num_bins bins of same size between 102 // the minimum and maximum of the cached samples as well as under and overflow bins. 103 // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin). 104 if (cnt == this->cache_size) 105 { 106 float_type minimum = numeric::fdiv((min)(args), (std::size_t)1); 107 float_type maximum = numeric::fdiv((max)(args), (std::size_t)1); 108 float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins ); 109 110 // determine bin positions (their lower bounds) 111 for (std::size_t i = 0; i < this->num_bins + 2; ++i) 112 { 113 this->bin_positions[i] = minimum + (i - 1.) * bin_size; 114 } 115 116 for (typename array_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter) 117 { 118 if (*iter < this->bin_positions[1]) 119 { 120 ++(this->samples_in_bin[0]); 121 } 122 else if (*iter >= this->bin_positions[this->num_bins + 1]) 123 { 124 ++(this->samples_in_bin[this->num_bins + 1]); 125 } 126 else 127 { 128 typename array_type::iterator it = std::upper_bound( 129 this->bin_positions.begin() 130 , this->bin_positions.end() 131 , *iter 132 ); 133 134 std::size_t d = std::distance(this->bin_positions.begin(), it); 135 ++(this->samples_in_bin[d - 1]); 136 } 137 } 138 } 139 // Add each subsequent sample to the correct bin 140 else if (cnt > this->cache_size) 141 { 142 if (args[sample] < this->bin_positions[1]) 143 { 144 ++(this->samples_in_bin[0]); 145 } 146 else if (args[sample] >= this->bin_positions[this->num_bins + 1]) 147 { 148 ++(this->samples_in_bin[this->num_bins + 1]); 149 } 150 else 151 { 152 typename array_type::iterator it = std::upper_bound( 153 this->bin_positions.begin() 154 , this->bin_positions.end() 155 , args[sample] 156 ); 157 158 std::size_t d = std::distance(this->bin_positions.begin(), it); 159 ++(this->samples_in_bin[d - 1]); 160 } 161 } 162 } 163 164 /** 165 @pre The number of samples must meet or exceed the cache size 166 */ 167 template<typename Args> resultboost::accumulators::impl::density_impl168 result_type result(Args const &args) const 169 { 170 if (this->is_dirty) 171 { 172 this->is_dirty = false; 173 174 // creates a vector of std::pair where each pair i holds 175 // the values bin_positions[i] (x-axis of histogram) and 176 // samples_in_bin[i] / cnt (y-axis of histogram). 177 178 for (std::size_t i = 0; i < this->num_bins + 2; ++i) 179 { 180 this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], count(args))); 181 } 182 } 183 // returns a range of pairs 184 return make_iterator_range(this->histogram); 185 } 186 187 private: 188 std::size_t cache_size; // number of cached samples 189 array_type cache; // cache to store the first cache_size samples 190 std::size_t num_bins; // number of bins 191 array_type samples_in_bin; // number of samples in each bin 192 array_type bin_positions; // lower bounds of bins 193 mutable histogram_type histogram; // histogram 194 mutable bool is_dirty; 195 }; 196 197 } // namespace impl 198 199 /////////////////////////////////////////////////////////////////////////////// 200 // tag::density 201 // 202 namespace tag 203 { 204 struct density 205 : depends_on<count, min, max> 206 , density_cache_size 207 , density_num_bins 208 { 209 /// INTERNAL ONLY 210 /// 211 typedef accumulators::impl::density_impl<mpl::_1> impl; 212 213 #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED 214 /// tag::density::cache_size named parameter 215 /// tag::density::num_bins named parameter 216 static boost::parameter::keyword<density_cache_size> const cache_size; 217 static boost::parameter::keyword<density_num_bins> const num_bins; 218 #endif 219 }; 220 } 221 222 /////////////////////////////////////////////////////////////////////////////// 223 // extract::density 224 // 225 namespace extract 226 { 227 extractor<tag::density> const density = {}; 228 229 BOOST_ACCUMULATORS_IGNORE_GLOBAL(density) 230 } 231 232 using extract::density; 233 234 // So that density can be automatically substituted 235 // with weighted_density when the weight parameter is non-void. 236 template<> 237 struct as_weighted_feature<tag::density> 238 { 239 typedef tag::weighted_density type; 240 }; 241 242 template<> 243 struct feature_of<tag::weighted_density> 244 : feature_of<tag::density> 245 { 246 }; 247 248 }} // namespace boost::accumulators 249 250 #endif 251