1 /////////////////////////////////////////////////////////////////////////////// 2 // peaks_over_threshold.hpp 3 // 4 // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost 5 // Software License, Version 1.0. (See accompanying file 6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 7 8 #ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 9 #define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 10 11 #include <vector> 12 #include <limits> 13 #include <numeric> 14 #include <functional> 15 #include <boost/config/no_tr1/cmath.hpp> // pow 16 #include <sstream> // stringstream 17 #include <stdexcept> // runtime_error 18 #include <boost/throw_exception.hpp> 19 #include <boost/range.hpp> 20 #include <boost/mpl/if.hpp> 21 #include <boost/mpl/int.hpp> 22 #include <boost/mpl/placeholders.hpp> 23 #include <boost/parameter/keyword.hpp> 24 #include <boost/tuple/tuple.hpp> 25 #include <boost/accumulators/accumulators_fwd.hpp> 26 #include <boost/accumulators/framework/accumulator_base.hpp> 27 #include <boost/accumulators/framework/extractor.hpp> 28 #include <boost/accumulators/numeric/functional.hpp> 29 #include <boost/accumulators/framework/parameters/sample.hpp> 30 #include <boost/accumulators/framework/depends_on.hpp> 31 #include <boost/accumulators/statistics_fwd.hpp> 32 #include <boost/accumulators/statistics/parameters/quantile_probability.hpp> 33 #include <boost/accumulators/statistics/count.hpp> 34 #include <boost/accumulators/statistics/tail.hpp> 35 36 #ifdef _MSC_VER 37 # pragma warning(push) 38 # pragma warning(disable: 4127) // conditional expression is constant 39 #endif 40 41 namespace boost { namespace accumulators 42 { 43 44 /////////////////////////////////////////////////////////////////////////////// 45 // threshold_probability and threshold named parameters 46 // 47 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value) 48 BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability) 49 50 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value) 51 BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability) 52 53 namespace impl 54 { 55 /////////////////////////////////////////////////////////////////////////////// 56 // peaks_over_threshold_impl 57 // works with an explicit threshold value and does not depend on order statistics 58 /** 59 @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation 60 61 According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of 62 the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$ 63 may be approximated by a generalized Pareto distribution 64 \f[ 65 G_{\xi,\beta}(x) = 66 \left\{ 67 \begin{array}{ll} 68 \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\ 69 \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0, 70 \end{array} 71 \right. 72 \f] 73 with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf. 74 Hosking and Wallis (1987), 75 \f[ 76 \begin{array}{lll} 77 \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\ 78 \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right], 79 \end{array} 80 \f] 81 \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over 82 the threshold \f$u\f$. Equivalently, the distribution function 83 \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by 84 \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$ 85 can be written as 86 \f[ 87 F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u) 88 \f] 89 and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function 90 \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by 91 \f[ 92 \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u). 93 \f] 94 It can be shown that \f$\widehat{F}(x)\f$ is a generalized 95 Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$ 96 and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$, 97 one obtains an estimator for the \f$\alpha\f$-quantile, 98 \f[ 99 \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right], 100 \f] 101 and similarly an estimator for the (coherent) tail mean, 102 \f[ 103 \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi}, 104 \f] 105 cf. McNeil and Frey (2000). 106 107 Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the 108 \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define 109 the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are 110 computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the 111 correct result. 112 113 For further details, see 114 115 J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution, 116 Technometrics, Volume 29, 1987, p. 339-349 117 118 A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series: 119 an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300 120 121 @param quantile_probability 122 @param pot_threshold_value 123 */ 124 template<typename Sample, typename LeftRight> 125 struct peaks_over_threshold_impl 126 : accumulator_base 127 { 128 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; 129 // for boost::result_of 130 typedef boost::tuple<float_type, float_type, float_type> result_type; 131 // for left tail fitting, mirror the extreme values 132 typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign; 133 134 template<typename Args> peaks_over_threshold_implboost::accumulators::impl::peaks_over_threshold_impl135 peaks_over_threshold_impl(Args const &args) 136 : Nu_(0) 137 , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1)) 138 , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) 139 , threshold_(sign::value * args[pot_threshold_value]) 140 , fit_parameters_(boost::make_tuple(0., 0., 0.)) 141 , is_dirty_(true) 142 { 143 } 144 145 template<typename Args> operator ()boost::accumulators::impl::peaks_over_threshold_impl146 void operator ()(Args const &args) 147 { 148 this->is_dirty_ = true; 149 150 if (sign::value * args[sample] > this->threshold_) 151 { 152 this->mu_ += args[sample]; 153 this->sigma2_ += args[sample] * args[sample]; 154 ++this->Nu_; 155 } 156 } 157 158 template<typename Args> resultboost::accumulators::impl::peaks_over_threshold_impl159 result_type result(Args const &args) const 160 { 161 if (this->is_dirty_) 162 { 163 this->is_dirty_ = false; 164 165 std::size_t cnt = count(args); 166 167 this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_); 168 this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_); 169 this->sigma2_ -= this->mu_ * this->mu_; 170 171 float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt); 172 173 float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_); 174 float_type xi_hat = 0.5 * ( 1. - tmp ); 175 float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp ); 176 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat); 177 float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat; 178 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); 179 } 180 181 return this->fit_parameters_; 182 } 183 184 // make this accumulator serializeable 185 // TODO: do we need to split to load/save and verify that threshold did not change? 186 template<class Archive> serializeboost::accumulators::impl::peaks_over_threshold_impl187 void serialize(Archive & ar, const unsigned int file_version) 188 { 189 ar & Nu_; 190 ar & mu_; 191 ar & sigma2_; 192 ar & threshold_; 193 ar & get<0>(fit_parameters_); 194 ar & get<1>(fit_parameters_); 195 ar & get<2>(fit_parameters_); 196 ar & is_dirty_; 197 } 198 199 private: 200 std::size_t Nu_; // number of samples larger than threshold 201 mutable float_type mu_; // mean of Nu_ largest samples 202 mutable float_type sigma2_; // variance of Nu_ largest samples 203 float_type threshold_; 204 mutable result_type fit_parameters_; // boost::tuple that stores fit parameters 205 mutable bool is_dirty_; 206 }; 207 208 /////////////////////////////////////////////////////////////////////////////// 209 // peaks_over_threshold_prob_impl 210 // determines threshold from a given threshold probability using order statistics 211 /** 212 @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation 213 214 @sa peaks_over_threshold_impl 215 216 @param quantile_probability 217 @param pot_threshold_probability 218 */ 219 template<typename Sample, typename LeftRight> 220 struct peaks_over_threshold_prob_impl 221 : accumulator_base 222 { 223 typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; 224 // for boost::result_of 225 typedef boost::tuple<float_type, float_type, float_type> result_type; 226 // for left tail fitting, mirror the extreme values 227 typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign; 228 229 template<typename Args> peaks_over_threshold_prob_implboost::accumulators::impl::peaks_over_threshold_prob_impl230 peaks_over_threshold_prob_impl(Args const &args) 231 : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1)) 232 , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) 233 , threshold_probability_(args[pot_threshold_probability]) 234 , fit_parameters_(boost::make_tuple(0., 0., 0.)) 235 , is_dirty_(true) 236 { 237 } 238 operator ()boost::accumulators::impl::peaks_over_threshold_prob_impl239 void operator ()(dont_care) 240 { 241 this->is_dirty_ = true; 242 } 243 244 template<typename Args> resultboost::accumulators::impl::peaks_over_threshold_prob_impl245 result_type result(Args const &args) const 246 { 247 if (this->is_dirty_) 248 { 249 this->is_dirty_ = false; 250 251 std::size_t cnt = count(args); 252 253 // the n'th cached sample provides an approximate threshold value u 254 std::size_t n = static_cast<std::size_t>( 255 std::ceil( 256 cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ ) 257 ) 258 ); 259 260 // If n is in a valid range, return result, otherwise return NaN or throw exception 261 if ( n >= static_cast<std::size_t>(tail(args).size())) 262 { 263 if (std::numeric_limits<float_type>::has_quiet_NaN) 264 { 265 return boost::make_tuple( 266 std::numeric_limits<float_type>::quiet_NaN() 267 , std::numeric_limits<float_type>::quiet_NaN() 268 , std::numeric_limits<float_type>::quiet_NaN() 269 ); 270 } 271 else 272 { 273 std::ostringstream msg; 274 msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")"; 275 boost::throw_exception(std::runtime_error(msg.str())); 276 return boost::make_tuple(Sample(0), Sample(0), Sample(0)); 277 } 278 } 279 else 280 { 281 float_type u = *(tail(args).begin() + n - 1) * sign::value; 282 283 // compute mean and variance of samples above/under threshold value u 284 for (std::size_t i = 0; i < n; ++i) 285 { 286 mu_ += *(tail(args).begin() + i); 287 sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i)); 288 } 289 290 this->mu_ = sign::value * numeric::fdiv(this->mu_, n); 291 this->sigma2_ = numeric::fdiv(this->sigma2_, n); 292 this->sigma2_ -= this->mu_ * this->mu_; 293 294 if (is_same<LeftRight, left>::value) 295 this->threshold_probability_ = 1. - this->threshold_probability_; 296 297 float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_); 298 float_type xi_hat = 0.5 * ( 1. - tmp ); 299 float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp ); 300 float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat); 301 float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat; 302 this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); 303 } 304 } 305 306 return this->fit_parameters_; 307 } 308 309 // make this accumulator serializeable 310 // TODO: do we need to split to load/save and verify that threshold did not change? 311 template<class Archive> serializeboost::accumulators::impl::peaks_over_threshold_prob_impl312 void serialize(Archive & ar, const unsigned int file_version) 313 { 314 ar & mu_; 315 ar & sigma2_; 316 ar & threshold_probability_; 317 ar & get<0>(fit_parameters_); 318 ar & get<1>(fit_parameters_); 319 ar & get<2>(fit_parameters_); 320 ar & is_dirty_; 321 } 322 323 private: 324 mutable float_type mu_; // mean of samples above threshold u 325 mutable float_type sigma2_; // variance of samples above threshold u 326 mutable float_type threshold_probability_; 327 mutable result_type fit_parameters_; // boost::tuple that stores fit parameters 328 mutable bool is_dirty_; 329 }; 330 331 } // namespace impl 332 333 /////////////////////////////////////////////////////////////////////////////// 334 // tag::peaks_over_threshold 335 // 336 namespace tag 337 { 338 template<typename LeftRight> 339 struct peaks_over_threshold 340 : depends_on<count> 341 , pot_threshold_value 342 { 343 /// INTERNAL ONLY 344 /// 345 typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl; 346 }; 347 348 template<typename LeftRight> 349 struct peaks_over_threshold_prob 350 : depends_on<count, tail<LeftRight> > 351 , pot_threshold_probability 352 { 353 /// INTERNAL ONLY 354 /// 355 typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl; 356 }; 357 358 struct abstract_peaks_over_threshold 359 : depends_on<> 360 { 361 }; 362 } 363 364 /////////////////////////////////////////////////////////////////////////////// 365 // extract::peaks_over_threshold 366 // 367 namespace extract 368 { 369 extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {}; 370 371 BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold) 372 } 373 374 using extract::peaks_over_threshold; 375 376 // peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight> 377 template<typename LeftRight> 378 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)> 379 { 380 typedef tag::peaks_over_threshold<LeftRight> type; 381 }; 382 383 // peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight> 384 template<typename LeftRight> 385 struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)> 386 { 387 typedef tag::peaks_over_threshold_prob<LeftRight> type; 388 }; 389 390 template<typename LeftRight> 391 struct feature_of<tag::peaks_over_threshold<LeftRight> > 392 : feature_of<tag::abstract_peaks_over_threshold> 393 { 394 }; 395 396 template<typename LeftRight> 397 struct feature_of<tag::peaks_over_threshold_prob<LeftRight> > 398 : feature_of<tag::abstract_peaks_over_threshold> 399 { 400 }; 401 402 // So that peaks_over_threshold can be automatically substituted 403 // with weighted_peaks_over_threshold when the weight parameter is non-void. 404 template<typename LeftRight> 405 struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> > 406 { 407 typedef tag::weighted_peaks_over_threshold<LeftRight> type; 408 }; 409 410 template<typename LeftRight> 411 struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> > 412 : feature_of<tag::peaks_over_threshold<LeftRight> > 413 {}; 414 415 // So that peaks_over_threshold_prob can be automatically substituted 416 // with weighted_peaks_over_threshold_prob when the weight parameter is non-void. 417 template<typename LeftRight> 418 struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> > 419 { 420 typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type; 421 }; 422 423 template<typename LeftRight> 424 struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> > 425 : feature_of<tag::peaks_over_threshold_prob<LeftRight> > 426 {}; 427 428 }} // namespace boost::accumulators 429 430 #ifdef _MSC_VER 431 # pragma warning(pop) 432 #endif 433 434 #endif 435