1 ///////////////////////////////////////////////////////////////////////////////
2 // weighted_median.hpp
3 //
4 //  Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005
9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005
10 
11 #include <boost/mpl/placeholders.hpp>
12 #include <boost/range/iterator_range.hpp>
13 #include <boost/accumulators/framework/accumulator_base.hpp>
14 #include <boost/accumulators/framework/extractor.hpp>
15 #include <boost/accumulators/numeric/functional.hpp>
16 #include <boost/accumulators/framework/parameters/sample.hpp>
17 #include <boost/accumulators/framework/depends_on.hpp>
18 #include <boost/accumulators/statistics_fwd.hpp>
19 #include <boost/accumulators/statistics/count.hpp>
20 #include <boost/accumulators/statistics/median.hpp>
21 #include <boost/accumulators/statistics/weighted_p_square_quantile.hpp>
22 #include <boost/accumulators/statistics/weighted_density.hpp>
23 #include <boost/accumulators/statistics/weighted_p_square_cumul_dist.hpp>
24 
25 namespace boost { namespace accumulators
26 {
27 
28 namespace impl
29 {
30     ///////////////////////////////////////////////////////////////////////////////
31     // weighted_median_impl
32     //
33     /**
34         @brief Median estimation for weighted samples based on the \f$P^2\f$ quantile estimator
35 
36         The \f$P^2\f$ algorithm for weighted samples is invoked with a quantile probability of 0.5.
37     */
38     template<typename Sample>
39     struct weighted_median_impl
40       : accumulator_base
41     {
42         // for boost::result_of
43         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type;
44 
weighted_median_implboost::accumulators::impl::weighted_median_impl45         weighted_median_impl(dont_care) {}
46 
47         template<typename Args>
resultboost::accumulators::impl::weighted_median_impl48         result_type result(Args const &args) const
49         {
50             return weighted_p_square_quantile_for_median(args);
51         }
52     };
53 
54     ///////////////////////////////////////////////////////////////////////////////
55     // with_density_weighted_median_impl
56     //
57     /**
58         @brief Median estimation for weighted samples based on the density estimator
59 
60         The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being
61         the total number of samples. It returns the approximate horizontal position of this sample,
62         based on a linear interpolation inside the bin.
63     */
64     template<typename Sample>
65     struct with_density_weighted_median_impl
66       : accumulator_base
67     {
68         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
69         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
70         typedef iterator_range<typename histogram_type::iterator> range_type;
71         // for boost::result_of
72         typedef float_type result_type;
73 
74         template<typename Args>
with_density_weighted_median_implboost::accumulators::impl::with_density_weighted_median_impl75         with_density_weighted_median_impl(Args const &args)
76           : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
77           , is_dirty(true)
78         {
79         }
80 
operator ()boost::accumulators::impl::with_density_weighted_median_impl81         void operator ()(dont_care)
82         {
83             this->is_dirty = true;
84         }
85 
86         template<typename Args>
resultboost::accumulators::impl::with_density_weighted_median_impl87         result_type result(Args const &args) const
88         {
89             if (this->is_dirty)
90             {
91                 this->is_dirty = false;
92 
93                 std::size_t cnt = count(args);
94                 range_type histogram = weighted_density(args);
95                 typename range_type::iterator it = histogram.begin();
96                 while (this->sum < 0.5 * cnt)
97                 {
98                     this->sum += it->second * cnt;
99                     ++it;
100                 }
101                 --it;
102                 float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt);
103                 this->median = it->first * over + (it + 1)->first * ( 1. - over );
104             }
105 
106             return this->median;
107         }
108 
109         // make this accumulator serializeable
110         template<class Archive>
serializeboost::accumulators::impl::with_density_weighted_median_impl111         void serialize(Archive & ar, const unsigned int file_version)
112         {
113             ar & sum;
114             ar & is_dirty;
115             ar & median;
116         }
117 
118     private:
119         mutable float_type sum;
120         mutable bool is_dirty;
121         mutable float_type median;
122     };
123 
124     ///////////////////////////////////////////////////////////////////////////////
125     // with_p_square_cumulative_distribution_weighted_median_impl
126     //
127     /**
128         @brief Median estimation for weighted samples based on the \f$P^2\f$ cumulative distribution estimator
129 
130         The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It
131         returns the approximate horizontal position of where the cumulative distribution
132         equals 0.5, based on a linear interpolation inside the bin.
133     */
134     template<typename Sample, typename Weight>
135     struct with_p_square_cumulative_distribution_weighted_median_impl
136       : accumulator_base
137     {
138         typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
139         typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
140         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
141         typedef iterator_range<typename histogram_type::iterator> range_type;
142         // for boost::result_of
143         typedef float_type result_type;
144 
with_p_square_cumulative_distribution_weighted_median_implboost::accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl145         with_p_square_cumulative_distribution_weighted_median_impl(dont_care)
146           : is_dirty(true)
147         {
148         }
149 
operator ()boost::accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl150         void operator ()(dont_care)
151         {
152             this->is_dirty = true;
153         }
154 
155         template<typename Args>
resultboost::accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl156         result_type result(Args const &args) const
157         {
158             if (this->is_dirty)
159             {
160                 this->is_dirty = false;
161 
162                 range_type histogram = weighted_p_square_cumulative_distribution(args);
163                 typename range_type::iterator it = histogram.begin();
164                 while (it->second < 0.5)
165                 {
166                     ++it;
167                 }
168                 float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second);
169                 this->median = it->first * over + (it + 1)->first * ( 1. - over );
170             }
171 
172             return this->median;
173         }
174 
175         // make this accumulator serializeable
176         template<class Archive>
serializeboost::accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl177         void serialize(Archive & ar, const unsigned int file_version)
178         {
179             ar & is_dirty;
180             ar & median;
181         }
182 
183     private:
184         mutable bool is_dirty;
185         mutable float_type median;
186     };
187 
188 } // namespace impl
189 
190 ///////////////////////////////////////////////////////////////////////////////
191 // tag::weighted_median
192 // tag::with_density_weighted_median
193 // tag::with_p_square_cumulative_distribution_weighted_median
194 //
195 namespace tag
196 {
197     struct weighted_median
198       : depends_on<weighted_p_square_quantile_for_median>
199     {
200         /// INTERNAL ONLY
201         ///
202         typedef accumulators::impl::weighted_median_impl<mpl::_1> impl;
203     };
204     struct with_density_weighted_median
205       : depends_on<count, weighted_density>
206     {
207         /// INTERNAL ONLY
208         ///
209         typedef accumulators::impl::with_density_weighted_median_impl<mpl::_1> impl;
210     };
211     struct with_p_square_cumulative_distribution_weighted_median
212       : depends_on<weighted_p_square_cumulative_distribution>
213     {
214         /// INTERNAL ONLY
215         ///
216         typedef accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl<mpl::_1, mpl::_2> impl;
217     };
218 
219 }
220 
221 ///////////////////////////////////////////////////////////////////////////////
222 // extract::weighted_median
223 //
224 namespace extract
225 {
226     extractor<tag::median> const weighted_median = {};
227 
228     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_median)
229 }
230 
231 using extract::weighted_median;
232 // weighted_median(with_p_square_quantile) -> weighted_median
233 template<>
234 struct as_feature<tag::weighted_median(with_p_square_quantile)>
235 {
236     typedef tag::weighted_median type;
237 };
238 
239 // weighted_median(with_density) -> with_density_weighted_median
240 template<>
241 struct as_feature<tag::weighted_median(with_density)>
242 {
243     typedef tag::with_density_weighted_median type;
244 };
245 
246 // weighted_median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_weighted_median
247 template<>
248 struct as_feature<tag::weighted_median(with_p_square_cumulative_distribution)>
249 {
250     typedef tag::with_p_square_cumulative_distribution_weighted_median type;
251 };
252 
253 }} // namespace boost::accumulators
254 
255 #endif
256