1 ///////////////////////////////////////////////////////////////////////////////
2 // weighted_density.hpp
3 //
4 //  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
10 
11 #include <vector>
12 #include <limits>
13 #include <functional>
14 #include <boost/range.hpp>
15 #include <boost/parameter/keyword.hpp>
16 #include <boost/mpl/placeholders.hpp>
17 #include <boost/accumulators/framework/accumulator_base.hpp>
18 #include <boost/accumulators/framework/extractor.hpp>
19 #include <boost/accumulators/numeric/functional.hpp>
20 #include <boost/accumulators/framework/parameters/sample.hpp>
21 #include <boost/accumulators/statistics_fwd.hpp>
22 #include <boost/accumulators/statistics/sum.hpp>
23 #include <boost/accumulators/statistics/max.hpp>
24 #include <boost/accumulators/statistics/min.hpp>
25 #include <boost/accumulators/statistics/density.hpp> // for named parameters density_cache_size and density_num_bins
26 
27 namespace boost { namespace accumulators
28 {
29 
30 namespace impl
31 {
32     ///////////////////////////////////////////////////////////////////////////////
33     // weighted_density_impl
34     //  density histogram for weighted samples
35     /**
36         @brief Histogram density estimator for weighted samples
37 
38         The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins
39         are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the
40         maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally,
41         an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined,
42         the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is
43         returned, where each pair contains the position of the bin (lower bound) and the sum of the weights (normalized with the
44         sum of all weights).
45 
46         @param density_cache_size Number of first samples used to determine min and max.
47         @param density_num_bins Number of bins (two additional bins collect under- and overflow samples).
48     */
49     template<typename Sample, typename Weight>
50     struct weighted_density_impl
51       : accumulator_base
52     {
53         typedef typename numeric::functional::fdiv<Weight, std::size_t>::result_type float_type;
54         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
55         typedef std::vector<float_type> array_type;
56         // for boost::result_of
57         typedef iterator_range<typename histogram_type::iterator> result_type;
58 
59         template<typename Args>
weighted_density_implboost::accumulators::impl::weighted_density_impl60         weighted_density_impl(Args const &args)
61             : cache_size(args[density_cache_size])
62             , cache(cache_size)
63             , num_bins(args[density_num_bins])
64             , samples_in_bin(num_bins + 2, 0.)
65             , bin_positions(num_bins + 2)
66             , histogram(
67                 num_bins + 2
68               , std::make_pair(
69                     numeric::fdiv(args[sample | Sample()],(std::size_t)1)
70                   , numeric::fdiv(args[sample | Sample()],(std::size_t)1)
71                 )
72               )
73             , is_dirty(true)
74         {
75         }
76 
77         template<typename Args>
operator ()boost::accumulators::impl::weighted_density_impl78         void operator ()(Args const &args)
79         {
80             this->is_dirty = true;
81 
82             std::size_t cnt = count(args);
83 
84             // Fill up cache with cache_size first samples
85             if (cnt <= this->cache_size)
86             {
87                 this->cache[cnt - 1] = std::make_pair(args[sample], args[weight]);
88             }
89 
90             // Once cache_size samples have been accumulated, create num_bins bins of same size between
91             // the minimum and maximum of the cached samples as well as an under- and an overflow bin.
92             // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin).
93             if (cnt == this->cache_size)
94             {
95                 float_type minimum = numeric::fdiv((min)(args),(std::size_t)1);
96                 float_type maximum = numeric::fdiv((max)(args),(std::size_t)1);
97                 float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins);
98 
99                 // determine bin positions (their lower bounds)
100                 for (std::size_t i = 0; i < this->num_bins + 2; ++i)
101                 {
102                     this->bin_positions[i] = minimum + (i - 1.) * bin_size;
103                 }
104 
105                 for (typename histogram_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter)
106                 {
107                     if (iter->first < this->bin_positions[1])
108                     {
109                         this->samples_in_bin[0] += iter->second;
110                     }
111                     else if (iter->first >= this->bin_positions[this->num_bins + 1])
112                     {
113                         this->samples_in_bin[this->num_bins + 1] += iter->second;
114                     }
115                     else
116                     {
117                         typename array_type::iterator it = std::upper_bound(
118                             this->bin_positions.begin()
119                           , this->bin_positions.end()
120                           , iter->first
121                         );
122 
123                         std::size_t d = std::distance(this->bin_positions.begin(), it);
124                         this->samples_in_bin[d - 1] += iter->second;
125                     }
126                 }
127             }
128             // Add each subsequent sample to the correct bin
129             else if (cnt > this->cache_size)
130             {
131                 if (args[sample] < this->bin_positions[1])
132                 {
133                     this->samples_in_bin[0] += args[weight];
134                 }
135                 else if (args[sample] >= this->bin_positions[this->num_bins + 1])
136                 {
137                     this->samples_in_bin[this->num_bins + 1] += args[weight];
138                 }
139                 else
140                 {
141                     typename array_type::iterator it = std::upper_bound(
142                         this->bin_positions.begin()
143                       , this->bin_positions.end()
144                       , args[sample]
145                     );
146 
147                     std::size_t d = std::distance(this->bin_positions.begin(), it);
148                     this->samples_in_bin[d - 1] += args[weight];
149                 }
150             }
151         }
152 
153         template<typename Args>
resultboost::accumulators::impl::weighted_density_impl154         result_type result(Args const &args) const
155         {
156             if (this->is_dirty)
157             {
158                 this->is_dirty = false;
159 
160                 // creates a vector of std::pair where each pair i holds
161                 // the values bin_positions[i] (x-axis of histogram) and
162                 // samples_in_bin[i] / cnt (y-axis of histogram).
163 
164                 for (std::size_t i = 0; i < this->num_bins + 2; ++i)
165                 {
166                     this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], sum_of_weights(args)));
167                 }
168             }
169 
170             // returns a range of pairs
171             return make_iterator_range(this->histogram);
172         }
173 
174     private:
175         std::size_t            cache_size;      // number of cached samples
176         histogram_type         cache;           // cache to store the first cache_size samples with their weights as std::pair
177         std::size_t            num_bins;        // number of bins
178         array_type             samples_in_bin;  // number of samples in each bin
179         array_type             bin_positions;   // lower bounds of bins
180         mutable histogram_type histogram;       // histogram
181         mutable bool is_dirty;
182     };
183 
184 } // namespace impl
185 
186 ///////////////////////////////////////////////////////////////////////////////
187 // tag::weighted_density
188 //
189 namespace tag
190 {
191     struct weighted_density
192       : depends_on<count, sum_of_weights, min, max>
193       , density_cache_size
194       , density_num_bins
195     {
196         /// INTERNAL ONLY
197         ///
198         typedef accumulators::impl::weighted_density_impl<mpl::_1, mpl::_2> impl;
199 
200         #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
201         static boost::parameter::keyword<density_cache_size> const cache_size;
202         static boost::parameter::keyword<density_num_bins> const num_bins;
203         #endif
204     };
205 }
206 
207 ///////////////////////////////////////////////////////////////////////////////
208 // extract::weighted_density
209 //
210 namespace extract
211 {
212     extractor<tag::density> const weighted_density = {};
213 
214     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_density)
215 }
216 
217 using extract::weighted_density;
218 
219 }} // namespace boost::accumulators
220 
221 #endif
222