1 ///////////////////////////////////////////////////////////////////////////////
2 // weighted_extended_p_square.hpp
3 //
4 //  Copyright 2005 Daniel Egloff. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_EXTENDED_P_SQUARE_HPP_DE_01_01_2006
9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_EXTENDED_P_SQUARE_HPP_DE_01_01_2006
10 
11 #include <vector>
12 #include <functional>
13 #include <boost/range/begin.hpp>
14 #include <boost/range/end.hpp>
15 #include <boost/range/iterator_range.hpp>
16 #include <boost/iterator/transform_iterator.hpp>
17 #include <boost/iterator/counting_iterator.hpp>
18 #include <boost/iterator/permutation_iterator.hpp>
19 #include <boost/parameter/keyword.hpp>
20 #include <boost/mpl/placeholders.hpp>
21 #include <boost/accumulators/framework/accumulator_base.hpp>
22 #include <boost/accumulators/framework/extractor.hpp>
23 #include <boost/accumulators/numeric/functional.hpp>
24 #include <boost/accumulators/framework/parameters/sample.hpp>
25 #include <boost/accumulators/framework/depends_on.hpp>
26 #include <boost/accumulators/statistics_fwd.hpp>
27 #include <boost/accumulators/statistics/count.hpp>
28 #include <boost/accumulators/statistics/sum.hpp>
29 #include <boost/accumulators/statistics/times2_iterator.hpp>
30 #include <boost/accumulators/statistics/extended_p_square.hpp>
31 #include <boost/serialization/vector.hpp>
32 
33 namespace boost { namespace accumulators
34 {
35 
36 namespace impl
37 {
38     ///////////////////////////////////////////////////////////////////////////////
39     // weighted_extended_p_square_impl
40     //  multiple quantile estimation with weighted samples
41     /**
42         @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm for weighted samples
43 
44         This version of the extended \f$P^2\f$ algorithm extends the extended \f$P^2\f$ algorithm to
45         support weighted samples. The extended \f$P^2\f$ algorithm dynamically estimates several
46         quantiles without storing samples. Assume that \f$m\f$ quantiles
47         \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated. Instead of storing the whole sample
48         cumulative distribution, the algorithm maintains only \f$m+2\f$ principal markers and
49         \f$m+1\f$ middle markers, whose positions are updated with each sample and whose heights
50         are adjusted (if necessary) using a piecewise-parablic formula. The heights of the principal
51         markers are the current estimates of the quantiles and are returned as an iterator range.
52 
53         For further details, see
54 
55         K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49,
56         Number 4 (October), 1986, p. 159-164.
57 
58         The extended \f$ P^2 \f$ algorithm generalizes the \f$ P^2 \f$ algorithm of
59 
60         R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and
61         histograms without storing observations, Communications of the ACM,
62         Volume 28 (October), Number 10, 1985, p. 1076-1085.
63 
64         @param extended_p_square_probabilities A vector of quantile probabilities.
65     */
66     template<typename Sample, typename Weight>
67     struct weighted_extended_p_square_impl
68       : accumulator_base
69     {
70         typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
71         typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
72         typedef std::vector<float_type> array_type;
73         // for boost::result_of
74         typedef iterator_range<
75             detail::lvalue_index_iterator<
76                 permutation_iterator<
77                     typename array_type::const_iterator
78                   , detail::times2_iterator
79                 >
80             >
81         > result_type;
82 
83         template<typename Args>
weighted_extended_p_square_implboost::accumulators::impl::weighted_extended_p_square_impl84         weighted_extended_p_square_impl(Args const &args)
85           : probabilities(
86                 boost::begin(args[extended_p_square_probabilities])
87               , boost::end(args[extended_p_square_probabilities])
88             )
89           , heights(2 * probabilities.size() + 3)
90           , actual_positions(heights.size())
91           , desired_positions(heights.size())
92         {
93         }
94 
95         template<typename Args>
operator ()boost::accumulators::impl::weighted_extended_p_square_impl96         void operator ()(Args const &args)
97         {
98             std::size_t cnt = count(args);
99             std::size_t sample_cell = 1; // k
100             std::size_t num_quantiles = this->probabilities.size();
101 
102             // m+2 principal markers and m+1 middle markers
103             std::size_t num_markers = 2 * num_quantiles + 3;
104 
105             // first accumulate num_markers samples
106             if(cnt <= num_markers)
107             {
108                 this->heights[cnt - 1] = args[sample];
109                 this->actual_positions[cnt - 1] = args[weight];
110 
111                 // complete the initialization of heights (and actual_positions) by sorting
112                 if(cnt == num_markers)
113                 {
114                     // TODO: we need to sort the initial samples (in heights) in ascending order and
115                     // sort their weights (in actual_positions) the same way. The following lines do
116                     // it, but there must be a better and more efficient way of doing this.
117                     typename array_type::iterator it_begin, it_end, it_min;
118 
119                     it_begin = this->heights.begin();
120                     it_end   = this->heights.end();
121 
122                     std::size_t pos = 0;
123 
124                     while (it_begin != it_end)
125                     {
126                         it_min = std::min_element(it_begin, it_end);
127                         std::size_t d = std::distance(it_begin, it_min);
128                         std::swap(*it_begin, *it_min);
129                         std::swap(this->actual_positions[pos], this->actual_positions[pos + d]);
130                         ++it_begin;
131                         ++pos;
132                     }
133 
134                     // calculate correct initial actual positions
135                     for (std::size_t i = 1; i < num_markers; ++i)
136                     {
137                         actual_positions[i] += actual_positions[i - 1];
138                     }
139                 }
140             }
141             else
142             {
143                 if(args[sample] < this->heights[0])
144                 {
145                     this->heights[0] = args[sample];
146                     this->actual_positions[0] = args[weight];
147                     sample_cell = 1;
148                 }
149                 else if(args[sample] >= this->heights[num_markers - 1])
150                 {
151                     this->heights[num_markers - 1] = args[sample];
152                     sample_cell = num_markers - 1;
153                 }
154                 else
155                 {
156                     // find cell k = sample_cell such that heights[k-1] <= sample < heights[k]
157 
158                     typedef typename array_type::iterator iterator;
159                     iterator it = std::upper_bound(
160                         this->heights.begin()
161                       , this->heights.end()
162                       , args[sample]
163                     );
164 
165                     sample_cell = std::distance(this->heights.begin(), it);
166                 }
167 
168                 // update actual position of all markers above sample_cell
169                 for(std::size_t i = sample_cell; i < num_markers; ++i)
170                 {
171                     this->actual_positions[i] += args[weight];
172                 }
173 
174                 // compute desired positions
175                 {
176                     this->desired_positions[0] = this->actual_positions[0];
177                     this->desired_positions[num_markers - 1] = sum_of_weights(args);
178                     this->desired_positions[1] = (sum_of_weights(args) - this->actual_positions[0]) * probabilities[0]
179                                               / 2. + this->actual_positions[0];
180                     this->desired_positions[num_markers - 2] = (sum_of_weights(args) - this->actual_positions[0])
181                                                             * (probabilities[num_quantiles - 1] + 1.)
182                                                             / 2. + this->actual_positions[0];
183 
184                     for (std::size_t i = 0; i < num_quantiles; ++i)
185                     {
186                         this->desired_positions[2 * i + 2] = (sum_of_weights(args) - this->actual_positions[0])
187                                                           * probabilities[i] + this->actual_positions[0];
188                     }
189 
190                     for (std::size_t i = 1; i < num_quantiles; ++i)
191                     {
192                         this->desired_positions[2 * i + 1] = (sum_of_weights(args) - this->actual_positions[0])
193                                                       * (probabilities[i - 1] + probabilities[i])
194                                                       / 2. + this->actual_positions[0];
195                     }
196                 }
197 
198                 // adjust heights and actual_positions of markers 1 to num_markers - 2 if necessary
199                 for (std::size_t i = 1; i <= num_markers - 2; ++i)
200                 {
201                     // offset to desired position
202                     float_type d = this->desired_positions[i] - this->actual_positions[i];
203 
204                     // offset to next position
205                     float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
206 
207                     // offset to previous position
208                     float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
209 
210                     // height ds
211                     float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
212                     float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
213 
214                     if((d >= 1 && dp > 1) || (d <= -1 && dm < -1))
215                     {
216                         short sign_d = static_cast<short>(d / std::abs(d));
217 
218                         float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp + (dp - sign_d) * hm);
219 
220                         // try adjusting heights[i] using p-squared formula
221                         if(this->heights[i - 1] < h && h < this->heights[i + 1])
222                         {
223                             this->heights[i] = h;
224                         }
225                         else
226                         {
227                             // use linear formula
228                             if(d > 0)
229                             {
230                                 this->heights[i] += hp;
231                             }
232                             if(d < 0)
233                             {
234                                 this->heights[i] -= hm;
235                             }
236                         }
237                         this->actual_positions[i] += sign_d;
238                     }
239                 }
240             }
241         }
242 
resultboost::accumulators::impl::weighted_extended_p_square_impl243         result_type result(dont_care) const
244         {
245             // for i in [1,probabilities.size()], return heights[i * 2]
246             detail::times2_iterator idx_begin = detail::make_times2_iterator(1);
247             detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1);
248 
249             return result_type(
250                 make_permutation_iterator(this->heights.begin(), idx_begin)
251               , make_permutation_iterator(this->heights.begin(), idx_end)
252             );
253         }
254 
255         // make this accumulator serializeable
256         // TODO: do we need to split to load/save and verify that the parameters did not change?
257         template<class Archive>
serializeboost::accumulators::impl::weighted_extended_p_square_impl258         void serialize(Archive & ar, const unsigned int file_version)
259         {
260             ar & probabilities;
261             ar & heights;
262             ar & actual_positions;
263             ar & desired_positions;
264         }
265 
266     private:
267         array_type probabilities;         // the quantile probabilities
268         array_type heights;               // q_i
269         array_type actual_positions;      // n_i
270         array_type desired_positions;     // d_i
271     };
272 
273 } // namespace impl
274 
275 ///////////////////////////////////////////////////////////////////////////////
276 // tag::weighted_extended_p_square
277 //
278 namespace tag
279 {
280     struct weighted_extended_p_square
281       : depends_on<count, sum_of_weights>
282       , extended_p_square_probabilities
283     {
284         typedef accumulators::impl::weighted_extended_p_square_impl<mpl::_1, mpl::_2> impl;
285     };
286 }
287 
288 ///////////////////////////////////////////////////////////////////////////////
289 // extract::weighted_extended_p_square
290 //
291 namespace extract
292 {
293     extractor<tag::weighted_extended_p_square> const weighted_extended_p_square = {};
294 
295     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_extended_p_square)
296 }
297 
298 using extract::weighted_extended_p_square;
299 
300 }} // namespace boost::accumulators
301 
302 #endif
303