1 /////////////////////////////////////////////////////////////////////////////// 2 // weighted_median.hpp 3 // 4 // Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost 5 // Software License, Version 1.0. (See accompanying file 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005 9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_MEDIAN_HPP_EAN_28_10_2005 10 11 #include <boost/mpl/placeholders.hpp> 12 #include <boost/range/iterator_range.hpp> 13 #include <boost/accumulators/framework/accumulator_base.hpp> 14 #include <boost/accumulators/framework/extractor.hpp> 15 #include <boost/accumulators/numeric/functional.hpp> 16 #include <boost/accumulators/framework/parameters/sample.hpp> 17 #include <boost/accumulators/framework/depends_on.hpp> 18 #include <boost/accumulators/statistics_fwd.hpp> 19 #include <boost/accumulators/statistics/count.hpp> 20 #include <boost/accumulators/statistics/median.hpp> 21 #include <boost/accumulators/statistics/weighted_p_square_quantile.hpp> 22 #include <boost/accumulators/statistics/weighted_density.hpp> 23 #include <boost/accumulators/statistics/weighted_p_square_cumul_dist.hpp> 24 25 namespace boost { namespace accumulators 26 { 27 28 namespace impl 29 { 30 /////////////////////////////////////////////////////////////////////////////// 31 // weighted_median_impl 32 // 33 /** 34 @brief Median estimation for weighted samples based on the \f$P^2\f$ quantile estimator 35 36 The \f$P^2\f$ algorithm for weighted samples is invoked with a quantile probability of 0.5. 37 */ 38 template<typename Sample> 39 struct weighted_median_impl 40 : accumulator_base 41 { 42 // for boost::result_of 43 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type; 44 weighted_median_implboost::accumulators::impl::weighted_median_impl45 weighted_median_impl(dont_care) {} 46 47 template<typename Args> resultboost::accumulators::impl::weighted_median_impl48 result_type result(Args const &args) const 49 { 50 return weighted_p_square_quantile_for_median(args); 51 } 52 }; 53 54 /////////////////////////////////////////////////////////////////////////////// 55 // with_density_weighted_median_impl 56 // 57 /** 58 @brief Median estimation for weighted samples based on the density estimator 59 60 The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being 61 the total number of samples. It returns the approximate horizontal position of this sample, 62 based on a linear interpolation inside the bin. 63 */ 64 template<typename Sample> 65 struct with_density_weighted_median_impl 66 : accumulator_base 67 { 68 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; 69 typedef std::vector<std::pair<float_type, float_type> > histogram_type; 70 typedef iterator_range<typename histogram_type::iterator> range_type; 71 // for boost::result_of 72 typedef float_type result_type; 73 74 template<typename Args> with_density_weighted_median_implboost::accumulators::impl::with_density_weighted_median_impl75 with_density_weighted_median_impl(Args const &args) 76 : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) 77 , is_dirty(true) 78 { 79 } 80 operator ()boost::accumulators::impl::with_density_weighted_median_impl81 void operator ()(dont_care) 82 { 83 this->is_dirty = true; 84 } 85 86 template<typename Args> resultboost::accumulators::impl::with_density_weighted_median_impl87 result_type result(Args const &args) const 88 { 89 if (this->is_dirty) 90 { 91 this->is_dirty = false; 92 93 std::size_t cnt = count(args); 94 range_type histogram = weighted_density(args); 95 typename range_type::iterator it = histogram.begin(); 96 while (this->sum < 0.5 * cnt) 97 { 98 this->sum += it->second * cnt; 99 ++it; 100 } 101 --it; 102 float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt); 103 this->median = it->first * over + (it + 1)->first * ( 1. - over ); 104 } 105 106 return this->median; 107 } 108 109 // make this accumulator serializeable 110 template<class Archive> serializeboost::accumulators::impl::with_density_weighted_median_impl111 void serialize(Archive & ar, const unsigned int file_version) 112 { 113 ar & sum; 114 ar & is_dirty; 115 ar & median; 116 } 117 118 private: 119 mutable float_type sum; 120 mutable bool is_dirty; 121 mutable float_type median; 122 }; 123 124 /////////////////////////////////////////////////////////////////////////////// 125 // with_p_square_cumulative_distribution_weighted_median_impl 126 // 127 /** 128 @brief Median estimation for weighted samples based on the \f$P^2\f$ cumulative distribution estimator 129 130 The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It 131 returns the approximate horizontal position of where the cumulative distribution 132 equals 0.5, based on a linear interpolation inside the bin. 133 */ 134 template<typename Sample, typename Weight> 135 struct with_p_square_cumulative_distribution_weighted_median_impl 136 : accumulator_base 137 { 138 typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample; 139 typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type; 140 typedef std::vector<std::pair<float_type, float_type> > histogram_type; 141 typedef iterator_range<typename histogram_type::iterator> range_type; 142 // for boost::result_of 143 typedef float_type result_type; 144 with_p_square_cumulative_distribution_weighted_median_implboost::accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl145 with_p_square_cumulative_distribution_weighted_median_impl(dont_care) 146 : is_dirty(true) 147 { 148 } 149 operator ()boost::accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl150 void operator ()(dont_care) 151 { 152 this->is_dirty = true; 153 } 154 155 template<typename Args> resultboost::accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl156 result_type result(Args const &args) const 157 { 158 if (this->is_dirty) 159 { 160 this->is_dirty = false; 161 162 range_type histogram = weighted_p_square_cumulative_distribution(args); 163 typename range_type::iterator it = histogram.begin(); 164 while (it->second < 0.5) 165 { 166 ++it; 167 } 168 float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second); 169 this->median = it->first * over + (it + 1)->first * ( 1. - over ); 170 } 171 172 return this->median; 173 } 174 175 // make this accumulator serializeable 176 template<class Archive> serializeboost::accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl177 void serialize(Archive & ar, const unsigned int file_version) 178 { 179 ar & is_dirty; 180 ar & median; 181 } 182 183 private: 184 mutable bool is_dirty; 185 mutable float_type median; 186 }; 187 188 } // namespace impl 189 190 /////////////////////////////////////////////////////////////////////////////// 191 // tag::weighted_median 192 // tag::with_density_weighted_median 193 // tag::with_p_square_cumulative_distribution_weighted_median 194 // 195 namespace tag 196 { 197 struct weighted_median 198 : depends_on<weighted_p_square_quantile_for_median> 199 { 200 /// INTERNAL ONLY 201 /// 202 typedef accumulators::impl::weighted_median_impl<mpl::_1> impl; 203 }; 204 struct with_density_weighted_median 205 : depends_on<count, weighted_density> 206 { 207 /// INTERNAL ONLY 208 /// 209 typedef accumulators::impl::with_density_weighted_median_impl<mpl::_1> impl; 210 }; 211 struct with_p_square_cumulative_distribution_weighted_median 212 : depends_on<weighted_p_square_cumulative_distribution> 213 { 214 /// INTERNAL ONLY 215 /// 216 typedef accumulators::impl::with_p_square_cumulative_distribution_weighted_median_impl<mpl::_1, mpl::_2> impl; 217 }; 218 219 } 220 221 /////////////////////////////////////////////////////////////////////////////// 222 // extract::weighted_median 223 // 224 namespace extract 225 { 226 extractor<tag::median> const weighted_median = {}; 227 228 BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_median) 229 } 230 231 using extract::weighted_median; 232 // weighted_median(with_p_square_quantile) -> weighted_median 233 template<> 234 struct as_feature<tag::weighted_median(with_p_square_quantile)> 235 { 236 typedef tag::weighted_median type; 237 }; 238 239 // weighted_median(with_density) -> with_density_weighted_median 240 template<> 241 struct as_feature<tag::weighted_median(with_density)> 242 { 243 typedef tag::with_density_weighted_median type; 244 }; 245 246 // weighted_median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_weighted_median 247 template<> 248 struct as_feature<tag::weighted_median(with_p_square_cumulative_distribution)> 249 { 250 typedef tag::with_p_square_cumulative_distribution_weighted_median type; 251 }; 252 253 }} // namespace boost::accumulators 254 255 #endif 256