1 ///////////////////////////////////////////////////////////////////////////////
2 // weighted_density.hpp
3 //
4 //  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_DENSITY_HPP_DE_01_01_2006
10 
11 #include <vector>
12 #include <limits>
13 #include <functional>
14 #include <boost/range.hpp>
15 #include <boost/parameter/keyword.hpp>
16 #include <boost/mpl/placeholders.hpp>
17 #include <boost/accumulators/framework/accumulator_base.hpp>
18 #include <boost/accumulators/framework/extractor.hpp>
19 #include <boost/accumulators/numeric/functional.hpp>
20 #include <boost/accumulators/framework/parameters/sample.hpp>
21 #include <boost/accumulators/statistics_fwd.hpp>
22 #include <boost/accumulators/statistics/sum.hpp>
23 #include <boost/accumulators/statistics/max.hpp>
24 #include <boost/accumulators/statistics/min.hpp>
25 #include <boost/accumulators/statistics/density.hpp> // for named parameters density_cache_size and density_num_bins
26 #include <boost/serialization/vector.hpp>
27 #include <boost/serialization/utility.hpp>
28 
29 namespace boost { namespace accumulators
30 {
31 
32 namespace impl
33 {
34     ///////////////////////////////////////////////////////////////////////////////
35     // weighted_density_impl
36     //  density histogram for weighted samples
37     /**
38         @brief Histogram density estimator for weighted samples
39 
40         The histogram density estimator returns a histogram of the sample distribution. The positions and sizes of the bins
41         are determined using a specifiable number of cached samples (cache_size). The range between the minimum and the
42         maximum of the cached samples is subdivided into a specifiable number of bins (num_bins) of same size. Additionally,
43         an under- and an overflow bin is added to capture future under- and overflow samples. Once the bins are determined,
44         the cached samples and all subsequent samples are added to the correct bins. At the end, a range of std::pair is
45         returned, where each pair contains the position of the bin (lower bound) and the sum of the weights (normalized with the
46         sum of all weights).
47 
48         @param density_cache_size Number of first samples used to determine min and max.
49         @param density_num_bins Number of bins (two additional bins collect under- and overflow samples).
50     */
51     template<typename Sample, typename Weight>
52     struct weighted_density_impl
53       : accumulator_base
54     {
55         typedef typename numeric::functional::fdiv<Weight, std::size_t>::result_type float_type;
56         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
57         typedef std::vector<float_type> array_type;
58         // for boost::result_of
59         typedef iterator_range<typename histogram_type::iterator> result_type;
60 
61         template<typename Args>
weighted_density_implboost::accumulators::impl::weighted_density_impl62         weighted_density_impl(Args const &args)
63             : cache_size(args[density_cache_size])
64             , cache(cache_size)
65             , num_bins(args[density_num_bins])
66             , samples_in_bin(num_bins + 2, 0.)
67             , bin_positions(num_bins + 2)
68             , histogram(
69                 num_bins + 2
70               , std::make_pair(
71                     numeric::fdiv(args[sample | Sample()],(std::size_t)1)
72                   , numeric::fdiv(args[sample | Sample()],(std::size_t)1)
73                 )
74               )
75             , is_dirty(true)
76         {
77         }
78 
79         template<typename Args>
operator ()boost::accumulators::impl::weighted_density_impl80         void operator ()(Args const &args)
81         {
82             this->is_dirty = true;
83 
84             std::size_t cnt = count(args);
85 
86             // Fill up cache with cache_size first samples
87             if (cnt <= this->cache_size)
88             {
89                 this->cache[cnt - 1] = std::make_pair(args[sample], args[weight]);
90             }
91 
92             // Once cache_size samples have been accumulated, create num_bins bins of same size between
93             // the minimum and maximum of the cached samples as well as an under- and an overflow bin.
94             // Store their lower bounds (bin_positions) and fill the bins with the cached samples (samples_in_bin).
95             if (cnt == this->cache_size)
96             {
97                 float_type minimum = numeric::fdiv((min)(args),(std::size_t)1);
98                 float_type maximum = numeric::fdiv((max)(args),(std::size_t)1);
99                 float_type bin_size = numeric::fdiv(maximum - minimum, this->num_bins);
100 
101                 // determine bin positions (their lower bounds)
102                 for (std::size_t i = 0; i < this->num_bins + 2; ++i)
103                 {
104                     this->bin_positions[i] = minimum + (i - 1.) * bin_size;
105                 }
106 
107                 for (typename histogram_type::const_iterator iter = this->cache.begin(); iter != this->cache.end(); ++iter)
108                 {
109                     if (iter->first < this->bin_positions[1])
110                     {
111                         this->samples_in_bin[0] += iter->second;
112                     }
113                     else if (iter->first >= this->bin_positions[this->num_bins + 1])
114                     {
115                         this->samples_in_bin[this->num_bins + 1] += iter->second;
116                     }
117                     else
118                     {
119                         typename array_type::iterator it = std::upper_bound(
120                             this->bin_positions.begin()
121                           , this->bin_positions.end()
122                           , iter->first
123                         );
124 
125                         std::size_t d = std::distance(this->bin_positions.begin(), it);
126                         this->samples_in_bin[d - 1] += iter->second;
127                     }
128                 }
129             }
130             // Add each subsequent sample to the correct bin
131             else if (cnt > this->cache_size)
132             {
133                 if (args[sample] < this->bin_positions[1])
134                 {
135                     this->samples_in_bin[0] += args[weight];
136                 }
137                 else if (args[sample] >= this->bin_positions[this->num_bins + 1])
138                 {
139                     this->samples_in_bin[this->num_bins + 1] += args[weight];
140                 }
141                 else
142                 {
143                     typename array_type::iterator it = std::upper_bound(
144                         this->bin_positions.begin()
145                       , this->bin_positions.end()
146                       , args[sample]
147                     );
148 
149                     std::size_t d = std::distance(this->bin_positions.begin(), it);
150                     this->samples_in_bin[d - 1] += args[weight];
151                 }
152             }
153         }
154 
155         template<typename Args>
resultboost::accumulators::impl::weighted_density_impl156         result_type result(Args const &args) const
157         {
158             if (this->is_dirty)
159             {
160                 this->is_dirty = false;
161 
162                 // creates a vector of std::pair where each pair i holds
163                 // the values bin_positions[i] (x-axis of histogram) and
164                 // samples_in_bin[i] / cnt (y-axis of histogram).
165 
166                 for (std::size_t i = 0; i < this->num_bins + 2; ++i)
167                 {
168                     this->histogram[i] = std::make_pair(this->bin_positions[i], numeric::fdiv(this->samples_in_bin[i], sum_of_weights(args)));
169                 }
170             }
171 
172             // returns a range of pairs
173             return make_iterator_range(this->histogram);
174         }
175 
176         // make this accumulator serializeable
177         // TODO split to save/load and check on parameters provided in ctor
178         template<class Archive>
serializeboost::accumulators::impl::weighted_density_impl179         void serialize(Archive & ar, const unsigned int file_version)
180         {
181             ar & cache_size;
182             ar & cache;
183             ar & num_bins;
184             ar & samples_in_bin;
185             ar & bin_positions;
186             ar & histogram;
187             ar & is_dirty;
188         }
189 
190     private:
191         std::size_t            cache_size;      // number of cached samples
192         histogram_type         cache;           // cache to store the first cache_size samples with their weights as std::pair
193         std::size_t            num_bins;        // number of bins
194         array_type             samples_in_bin;  // number of samples in each bin
195         array_type             bin_positions;   // lower bounds of bins
196         mutable histogram_type histogram;       // histogram
197         mutable bool is_dirty;
198     };
199 
200 } // namespace impl
201 
202 ///////////////////////////////////////////////////////////////////////////////
203 // tag::weighted_density
204 //
205 namespace tag
206 {
207     struct weighted_density
208       : depends_on<count, sum_of_weights, min, max>
209       , density_cache_size
210       , density_num_bins
211     {
212         /// INTERNAL ONLY
213         ///
214         typedef accumulators::impl::weighted_density_impl<mpl::_1, mpl::_2> impl;
215 
216         #ifdef BOOST_ACCUMULATORS_DOXYGEN_INVOKED
217         static boost::parameter::keyword<density_cache_size> const cache_size;
218         static boost::parameter::keyword<density_num_bins> const num_bins;
219         #endif
220     };
221 }
222 
223 ///////////////////////////////////////////////////////////////////////////////
224 // extract::weighted_density
225 //
226 namespace extract
227 {
228     extractor<tag::density> const weighted_density = {};
229 
230     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_density)
231 }
232 
233 using extract::weighted_density;
234 
235 }} // namespace boost::accumulators
236 
237 #endif
238