1 ///////////////////////////////////////////////////////////////////////////////
2 // median.hpp
3 //
4 //  Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005
9 #define BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005
10 
11 #include <boost/mpl/placeholders.hpp>
12 #include <boost/range/iterator_range.hpp>
13 #include <boost/accumulators/framework/accumulator_base.hpp>
14 #include <boost/accumulators/framework/extractor.hpp>
15 #include <boost/accumulators/numeric/functional.hpp>
16 #include <boost/accumulators/framework/parameters/sample.hpp>
17 #include <boost/accumulators/framework/depends_on.hpp>
18 #include <boost/accumulators/statistics_fwd.hpp>
19 #include <boost/accumulators/statistics/count.hpp>
20 #include <boost/accumulators/statistics/p_square_quantile.hpp>
21 #include <boost/accumulators/statistics/density.hpp>
22 #include <boost/accumulators/statistics/p_square_cumul_dist.hpp>
23 
24 namespace boost { namespace accumulators
25 {
26 
27 namespace impl
28 {
29     ///////////////////////////////////////////////////////////////////////////////
30     // median_impl
31     //
32     /**
33         @brief Median estimation based on the \f$P^2\f$ quantile estimator
34 
35         The \f$P^2\f$ algorithm is invoked with a quantile probability of 0.5.
36     */
37     template<typename Sample>
38     struct median_impl
39       : accumulator_base
40     {
41         // for boost::result_of
42         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type;
43 
median_implboost::accumulators::impl::median_impl44         median_impl(dont_care) {}
45 
46         template<typename Args>
resultboost::accumulators::impl::median_impl47         result_type result(Args const &args) const
48         {
49             return p_square_quantile_for_median(args);
50         }
51 
52         // serialization is done by accumulators it depends on
53         template<class Archive>
serializeboost::accumulators::impl::median_impl54         void serialize(Archive & ar, const unsigned int file_version) {}
55     };
56     ///////////////////////////////////////////////////////////////////////////////
57     // with_density_median_impl
58     //
59     /**
60         @brief Median estimation based on the density estimator
61 
62         The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being
63         the total number of samples. It returns the approximate horizontal position of this sample,
64         based on a linear interpolation inside the bin.
65     */
66     template<typename Sample>
67     struct with_density_median_impl
68       : accumulator_base
69     {
70         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
71         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
72         typedef iterator_range<typename histogram_type::iterator> range_type;
73         // for boost::result_of
74         typedef float_type result_type;
75 
76         template<typename Args>
with_density_median_implboost::accumulators::impl::with_density_median_impl77         with_density_median_impl(Args const &args)
78           : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
79           , is_dirty(true)
80         {
81         }
82 
operator ()boost::accumulators::impl::with_density_median_impl83         void operator ()(dont_care)
84         {
85             this->is_dirty = true;
86         }
87 
88 
89         template<typename Args>
resultboost::accumulators::impl::with_density_median_impl90         result_type result(Args const &args) const
91         {
92             if (this->is_dirty)
93             {
94                 this->is_dirty = false;
95 
96                 std::size_t cnt = count(args);
97                 range_type histogram = density(args);
98                 typename range_type::iterator it = histogram.begin();
99                 while (this->sum < 0.5 * cnt)
100                 {
101                     this->sum += it->second * cnt;
102                     ++it;
103                 }
104                 --it;
105                 float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt);
106                 this->median = it->first * over + (it + 1)->first * (1. - over);
107             }
108 
109             return this->median;
110         }
111 
112         // make this accumulator serializeable
113         template<class Archive>
serializeboost::accumulators::impl::with_density_median_impl114         void serialize(Archive & ar, const unsigned int file_version)
115         {
116             ar & sum;
117             ar & is_dirty;
118             ar & median;
119         }
120 
121 
122     private:
123         mutable float_type sum;
124         mutable bool is_dirty;
125         mutable float_type median;
126     };
127 
128     ///////////////////////////////////////////////////////////////////////////////
129     // with_p_square_cumulative_distribution_median_impl
130     //
131     /**
132         @brief Median estimation based on the \f$P^2\f$ cumulative distribution estimator
133 
134         The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It
135         returns the approximate horizontal position of where the cumulative distribution
136         equals 0.5, based on a linear interpolation inside the bin.
137     */
138     template<typename Sample>
139     struct with_p_square_cumulative_distribution_median_impl
140       : accumulator_base
141     {
142         typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
143         typedef std::vector<std::pair<float_type, float_type> > histogram_type;
144         typedef iterator_range<typename histogram_type::iterator> range_type;
145         // for boost::result_of
146         typedef float_type result_type;
147 
with_p_square_cumulative_distribution_median_implboost::accumulators::impl::with_p_square_cumulative_distribution_median_impl148         with_p_square_cumulative_distribution_median_impl(dont_care)
149           : is_dirty(true)
150         {
151         }
152 
operator ()boost::accumulators::impl::with_p_square_cumulative_distribution_median_impl153         void operator ()(dont_care)
154         {
155             this->is_dirty = true;
156         }
157 
158         template<typename Args>
resultboost::accumulators::impl::with_p_square_cumulative_distribution_median_impl159         result_type result(Args const &args) const
160         {
161             if (this->is_dirty)
162             {
163                 this->is_dirty = false;
164 
165                 range_type histogram = p_square_cumulative_distribution(args);
166                 typename range_type::iterator it = histogram.begin();
167                 while (it->second < 0.5)
168                 {
169                     ++it;
170                 }
171                 float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second);
172                 this->median = it->first * over + (it + 1)->first * ( 1. - over );
173             }
174 
175             return this->median;
176         }
177 
178         // make this accumulator serializeable
179         template<class Archive>
serializeboost::accumulators::impl::with_p_square_cumulative_distribution_median_impl180         void serialize(Archive & ar, const unsigned int file_version)
181         {
182             ar & is_dirty;
183             ar & median;
184         }
185 
186     private:
187 
188         mutable bool is_dirty;
189         mutable float_type median;
190     };
191 
192 } // namespace impl
193 
194 ///////////////////////////////////////////////////////////////////////////////
195 // tag::median
196 // tag::with_densisty_median
197 // tag::with_p_square_cumulative_distribution_median
198 //
199 namespace tag
200 {
201     struct median
202       : depends_on<p_square_quantile_for_median>
203     {
204         /// INTERNAL ONLY
205         ///
206         typedef accumulators::impl::median_impl<mpl::_1> impl;
207     };
208     struct with_density_median
209       : depends_on<count, density>
210     {
211         /// INTERNAL ONLY
212         ///
213         typedef accumulators::impl::with_density_median_impl<mpl::_1> impl;
214     };
215     struct with_p_square_cumulative_distribution_median
216       : depends_on<p_square_cumulative_distribution>
217     {
218         /// INTERNAL ONLY
219         ///
220         typedef accumulators::impl::with_p_square_cumulative_distribution_median_impl<mpl::_1> impl;
221     };
222 }
223 
224 ///////////////////////////////////////////////////////////////////////////////
225 // extract::median
226 // extract::with_density_median
227 // extract::with_p_square_cumulative_distribution_median
228 //
229 namespace extract
230 {
231     extractor<tag::median> const median = {};
232     extractor<tag::with_density_median> const with_density_median = {};
233     extractor<tag::with_p_square_cumulative_distribution_median> const with_p_square_cumulative_distribution_median = {};
234 
235     BOOST_ACCUMULATORS_IGNORE_GLOBAL(median)
236     BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_density_median)
237     BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_p_square_cumulative_distribution_median)
238 }
239 
240 using extract::median;
241 using extract::with_density_median;
242 using extract::with_p_square_cumulative_distribution_median;
243 
244 // median(with_p_square_quantile) -> median
245 template<>
246 struct as_feature<tag::median(with_p_square_quantile)>
247 {
248     typedef tag::median type;
249 };
250 
251 // median(with_density) -> with_density_median
252 template<>
253 struct as_feature<tag::median(with_density)>
254 {
255     typedef tag::with_density_median type;
256 };
257 
258 // median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_median
259 template<>
260 struct as_feature<tag::median(with_p_square_cumulative_distribution)>
261 {
262     typedef tag::with_p_square_cumulative_distribution_median type;
263 };
264 
265 // for the purposes of feature-based dependency resolution,
266 // with_density_median and with_p_square_cumulative_distribution_median
267 // provide the same feature as median
268 template<>
269 struct feature_of<tag::with_density_median>
270   : feature_of<tag::median>
271 {
272 };
273 
274 template<>
275 struct feature_of<tag::with_p_square_cumulative_distribution_median>
276   : feature_of<tag::median>
277 {
278 };
279 
280 // So that median can be automatically substituted with
281 // weighted_median when the weight parameter is non-void.
282 template<>
283 struct as_weighted_feature<tag::median>
284 {
285     typedef tag::weighted_median type;
286 };
287 
288 template<>
289 struct feature_of<tag::weighted_median>
290   : feature_of<tag::median>
291 {
292 };
293 
294 // So that with_density_median can be automatically substituted with
295 // with_density_weighted_median when the weight parameter is non-void.
296 template<>
297 struct as_weighted_feature<tag::with_density_median>
298 {
299     typedef tag::with_density_weighted_median type;
300 };
301 
302 template<>
303 struct feature_of<tag::with_density_weighted_median>
304   : feature_of<tag::with_density_median>
305 {
306 };
307 
308 // So that with_p_square_cumulative_distribution_median can be automatically substituted with
309 // with_p_square_cumulative_distribution_weighted_median when the weight parameter is non-void.
310 template<>
311 struct as_weighted_feature<tag::with_p_square_cumulative_distribution_median>
312 {
313     typedef tag::with_p_square_cumulative_distribution_weighted_median type;
314 };
315 
316 template<>
317 struct feature_of<tag::with_p_square_cumulative_distribution_weighted_median>
318   : feature_of<tag::with_p_square_cumulative_distribution_median>
319 {
320 };
321 
322 }} // namespace boost::accumulators
323 
324 #endif
325