1 ///////////////////////////////////////////////////////////////////////////////
2 // weighted_peaks_over_threshold.hpp
3 //
4 //  Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
5 //  Software License, Version 1.0. (See accompanying file
6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7 
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
10 
11 #include <vector>
12 #include <limits>
13 #include <numeric>
14 #include <functional>
15 #include <boost/throw_exception.hpp>
16 #include <boost/range.hpp>
17 #include <boost/mpl/if.hpp>
18 #include <boost/mpl/placeholders.hpp>
19 #include <boost/parameter/keyword.hpp>
20 #include <boost/tuple/tuple.hpp>
21 #include <boost/accumulators/numeric/functional.hpp>
22 #include <boost/accumulators/framework/accumulator_base.hpp>
23 #include <boost/accumulators/framework/extractor.hpp>
24 #include <boost/accumulators/framework/parameters/sample.hpp>
25 #include <boost/accumulators/framework/depends_on.hpp>
26 #include <boost/accumulators/statistics_fwd.hpp>
27 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
28 #include <boost/accumulators/statistics/peaks_over_threshold.hpp> // for named parameters pot_threshold_value and pot_threshold_probability
29 #include <boost/accumulators/statistics/sum.hpp>
30 #include <boost/accumulators/statistics/tail_variate.hpp>
31 
32 #ifdef _MSC_VER
33 # pragma warning(push)
34 # pragma warning(disable: 4127) // conditional expression is constant
35 #endif
36 
37 namespace boost { namespace accumulators
38 {
39 
40 namespace impl
41 {
42 
43     ///////////////////////////////////////////////////////////////////////////////
44     // weighted_peaks_over_threshold_impl
45     //  works with an explicit threshold value and does not depend on order statistics of weighted samples
46     /**
47         @brief Weighted Peaks over Threshold Method for Weighted Quantile and Weighted Tail Mean Estimation
48 
49         @sa peaks_over_threshold_impl
50 
51         @param quantile_probability
52         @param pot_threshold_value
53     */
54     template<typename Sample, typename Weight, typename LeftRight>
55     struct weighted_peaks_over_threshold_impl
56       : accumulator_base
57     {
58         typedef typename numeric::functional::multiplies<Weight, Sample>::result_type weighted_sample;
59         typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
60         // for boost::result_of
61         typedef boost::tuple<float_type, float_type, float_type> result_type;
62 
63         template<typename Args>
weighted_peaks_over_threshold_implboost::accumulators::impl::weighted_peaks_over_threshold_impl64         weighted_peaks_over_threshold_impl(Args const &args)
65           : sign_((is_same<LeftRight, left>::value) ? -1 : 1)
66           , mu_(sign_ * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
67           , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
68           , w_sum_(numeric::fdiv(args[weight | Weight()], (std::size_t)1))
69           , threshold_(sign_ * args[pot_threshold_value])
70           , fit_parameters_(boost::make_tuple(0., 0., 0.))
71           , is_dirty_(true)
72         {
73         }
74 
75         template<typename Args>
operator ()boost::accumulators::impl::weighted_peaks_over_threshold_impl76         void operator ()(Args const &args)
77         {
78             this->is_dirty_ = true;
79 
80             if (this->sign_ * args[sample] > this->threshold_)
81             {
82                 this->mu_ += args[weight] * args[sample];
83                 this->sigma2_ += args[weight] * args[sample] * args[sample];
84                 this->w_sum_ += args[weight];
85             }
86         }
87 
88         template<typename Args>
resultboost::accumulators::impl::weighted_peaks_over_threshold_impl89         result_type result(Args const &args) const
90         {
91             if (this->is_dirty_)
92             {
93                 this->is_dirty_ = false;
94 
95                 this->mu_ = this->sign_ * numeric::fdiv(this->mu_, this->w_sum_);
96                 this->sigma2_ = numeric::fdiv(this->sigma2_, this->w_sum_);
97                 this->sigma2_ -= this->mu_ * this->mu_;
98 
99                 float_type threshold_probability = numeric::fdiv(sum_of_weights(args) - this->w_sum_, sum_of_weights(args));
100 
101                 float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_);
102                 float_type xi_hat = 0.5 * ( 1. - tmp );
103                 float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp );
104                 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat);
105                 float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat;
106                 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
107             }
108 
109             return this->fit_parameters_;
110         }
111 
112         // make this accumulator serializeable
113         // TODO: do we need to split to load/save and verify that threshold did not change?
114         template<class Archive>
serializeboost::accumulators::impl::weighted_peaks_over_threshold_impl115         void serialize(Archive & ar, const unsigned int file_version)
116         {
117             ar & sign_;
118             ar & mu_;
119             ar & sigma2_;
120             ar & threshold_;
121             ar & fit_parameters_;
122             ar & is_dirty_;
123         }
124 
125     private:
126         short sign_;                         // for left tail fitting, mirror the extreme values
127         mutable float_type mu_;              // mean of samples above threshold
128         mutable float_type sigma2_;          // variance of samples above threshold
129         mutable float_type w_sum_;           // sum of weights of samples above threshold
130         float_type threshold_;
131         mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
132         mutable bool is_dirty_;
133     };
134 
135     ///////////////////////////////////////////////////////////////////////////////
136     // weighted_peaks_over_threshold_prob_impl
137     //  determines threshold from a given threshold probability using order statistics
138     /**
139         @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
140 
141         @sa weighted_peaks_over_threshold_impl
142 
143         @param quantile_probability
144         @param pot_threshold_probability
145     */
146     template<typename Sample, typename Weight, typename LeftRight>
147     struct weighted_peaks_over_threshold_prob_impl
148       : accumulator_base
149     {
150         typedef typename numeric::functional::multiplies<Weight, Sample>::result_type weighted_sample;
151         typedef typename numeric::functional::fdiv<weighted_sample, std::size_t>::result_type float_type;
152         // for boost::result_of
153         typedef boost::tuple<float_type, float_type, float_type> result_type;
154 
155         template<typename Args>
weighted_peaks_over_threshold_prob_implboost::accumulators::impl::weighted_peaks_over_threshold_prob_impl156         weighted_peaks_over_threshold_prob_impl(Args const &args)
157           : sign_((is_same<LeftRight, left>::value) ? -1 : 1)
158           , mu_(sign_ * numeric::fdiv(args[sample | Sample()], (std::size_t)1))
159           , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1))
160           , threshold_probability_(args[pot_threshold_probability])
161           , fit_parameters_(boost::make_tuple(0., 0., 0.))
162           , is_dirty_(true)
163         {
164         }
165 
operator ()boost::accumulators::impl::weighted_peaks_over_threshold_prob_impl166         void operator ()(dont_care)
167         {
168             this->is_dirty_ = true;
169         }
170 
171         template<typename Args>
resultboost::accumulators::impl::weighted_peaks_over_threshold_prob_impl172         result_type result(Args const &args) const
173         {
174             if (this->is_dirty_)
175             {
176                 this->is_dirty_ = false;
177 
178                 float_type threshold = sum_of_weights(args)
179                              * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ );
180 
181                 std::size_t n = 0;
182                 Weight sum = Weight(0);
183 
184                 while (sum < threshold)
185                 {
186                     if (n < static_cast<std::size_t>(tail_weights(args).size()))
187                     {
188                         mu_ += *(tail_weights(args).begin() + n) * *(tail(args).begin() + n);
189                         sigma2_ += *(tail_weights(args).begin() + n) * *(tail(args).begin() + n) * (*(tail(args).begin() + n));
190                         sum += *(tail_weights(args).begin() + n);
191                         n++;
192                     }
193                     else
194                     {
195                         if (std::numeric_limits<float_type>::has_quiet_NaN)
196                         {
197                             return boost::make_tuple(
198                                 std::numeric_limits<float_type>::quiet_NaN()
199                               , std::numeric_limits<float_type>::quiet_NaN()
200                               , std::numeric_limits<float_type>::quiet_NaN()
201                             );
202                         }
203                         else
204                         {
205                             std::ostringstream msg;
206                             msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
207                             boost::throw_exception(std::runtime_error(msg.str()));
208                             return boost::make_tuple(Sample(0), Sample(0), Sample(0));
209                         }
210                     }
211                 }
212 
213                 float_type u = *(tail(args).begin() + n - 1) * this->sign_;
214 
215 
216                 this->mu_ = this->sign_ * numeric::fdiv(this->mu_, sum);
217                 this->sigma2_ = numeric::fdiv(this->sigma2_, sum);
218                 this->sigma2_ -= this->mu_ * this->mu_;
219 
220                 if (is_same<LeftRight, left>::value)
221                     this->threshold_probability_ = 1. - this->threshold_probability_;
222 
223                 float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_);
224                 float_type xi_hat = 0.5 * ( 1. - tmp );
225                 float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp );
226                 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat);
227                 float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat;
228                 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
229 
230             }
231 
232             return this->fit_parameters_;
233         }
234 
235     private:
236         short sign_;                                // for left tail fitting, mirror the extreme values
237         mutable float_type mu_;                     // mean of samples above threshold u
238         mutable float_type sigma2_;                 // variance of samples above threshold u
239         mutable float_type threshold_probability_;
240         mutable result_type fit_parameters_;        // boost::tuple that stores fit parameters
241         mutable bool is_dirty_;
242     };
243 
244 } // namespace impl
245 
246 ///////////////////////////////////////////////////////////////////////////////
247 // tag::weighted_peaks_over_threshold
248 //
249 namespace tag
250 {
251     template<typename LeftRight>
252     struct weighted_peaks_over_threshold
253       : depends_on<sum_of_weights>
254       , pot_threshold_value
255     {
256         /// INTERNAL ONLY
257         typedef accumulators::impl::weighted_peaks_over_threshold_impl<mpl::_1, mpl::_2, LeftRight> impl;
258     };
259 
260     template<typename LeftRight>
261     struct weighted_peaks_over_threshold_prob
262       : depends_on<sum_of_weights, tail_weights<LeftRight> >
263       , pot_threshold_probability
264     {
265         /// INTERNAL ONLY
266         typedef accumulators::impl::weighted_peaks_over_threshold_prob_impl<mpl::_1, mpl::_2, LeftRight> impl;
267     };
268 }
269 
270 ///////////////////////////////////////////////////////////////////////////////
271 // extract::weighted_peaks_over_threshold
272 //
273 namespace extract
274 {
275     extractor<tag::abstract_peaks_over_threshold> const weighted_peaks_over_threshold = {};
276 
277     BOOST_ACCUMULATORS_IGNORE_GLOBAL(weighted_peaks_over_threshold)
278 }
279 
280 using extract::weighted_peaks_over_threshold;
281 
282 // weighted_peaks_over_threshold<LeftRight>(with_threshold_value) -> weighted_peaks_over_threshold<LeftRight>
283 template<typename LeftRight>
284 struct as_feature<tag::weighted_peaks_over_threshold<LeftRight>(with_threshold_value)>
285 {
286     typedef tag::weighted_peaks_over_threshold<LeftRight> type;
287 };
288 
289 // weighted_peaks_over_threshold<LeftRight>(with_threshold_probability) -> weighted_peaks_over_threshold_prob<LeftRight>
290 template<typename LeftRight>
291 struct as_feature<tag::weighted_peaks_over_threshold<LeftRight>(with_threshold_probability)>
292 {
293     typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
294 };
295 
296 }} // namespace boost::accumulators
297 
298 #ifdef _MSC_VER
299 # pragma warning(pop)
300 #endif
301 
302 #endif
303