1 /*
2  *  Created by Joachim on 16/04/2019.
3  *  Adapted from donated nonius code.
4  *
5  *  Distributed under the Boost Software License, Version 1.0. (See accompanying
6  *  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7  */
8 
9 // Statistical analysis tools
10 
11 #ifndef TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED
12 #define TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED
13 
14 #include "../catch_clock.hpp"
15 #include "../catch_estimate.hpp"
16 #include "../catch_outlier_classification.hpp"
17 
18 #include <algorithm>
19 #include <functional>
20 #include <vector>
21 #include <iterator>
22 #include <numeric>
23 #include <tuple>
24 #include <cmath>
25 #include <utility>
26 #include <cstddef>
27 #include <random>
28 
29 namespace Catch {
30     namespace Benchmark {
31         namespace Detail {
32             using sample = std::vector<double>;
33 
34             double weighted_average_quantile(int k, int q, std::vector<double>::iterator first, std::vector<double>::iterator last);
35 
36             template <typename Iterator>
classify_outliers(Iterator first,Iterator last)37             OutlierClassification classify_outliers(Iterator first, Iterator last) {
38                 std::vector<double> copy(first, last);
39 
40                 auto q1 = weighted_average_quantile(1, 4, copy.begin(), copy.end());
41                 auto q3 = weighted_average_quantile(3, 4, copy.begin(), copy.end());
42                 auto iqr = q3 - q1;
43                 auto los = q1 - (iqr * 3.);
44                 auto lom = q1 - (iqr * 1.5);
45                 auto him = q3 + (iqr * 1.5);
46                 auto his = q3 + (iqr * 3.);
47 
48                 OutlierClassification o;
49                 for (; first != last; ++first) {
50                     auto&& t = *first;
51                     if (t < los) ++o.low_severe;
52                     else if (t < lom) ++o.low_mild;
53                     else if (t > his) ++o.high_severe;
54                     else if (t > him) ++o.high_mild;
55                     ++o.samples_seen;
56                 }
57                 return o;
58             }
59 
60             template <typename Iterator>
mean(Iterator first,Iterator last)61             double mean(Iterator first, Iterator last) {
62                 auto count = last - first;
63                 double sum = std::accumulate(first, last, 0.);
64                 return sum / count;
65             }
66 
67             template <typename URng, typename Iterator, typename Estimator>
resample(URng & rng,int resamples,Iterator first,Iterator last,Estimator & estimator)68             sample resample(URng& rng, int resamples, Iterator first, Iterator last, Estimator& estimator) {
69                 auto n = last - first;
70                 std::uniform_int_distribution<decltype(n)> dist(0, n - 1);
71 
72                 sample out;
73                 out.reserve(resamples);
74                 std::generate_n(std::back_inserter(out), resamples, [n, first, &estimator, &dist, &rng] {
75                     std::vector<double> resampled;
76                     resampled.reserve(n);
77                     std::generate_n(std::back_inserter(resampled), n, [first, &dist, &rng] { return first[dist(rng)]; });
78                     return estimator(resampled.begin(), resampled.end());
79                 });
80                 std::sort(out.begin(), out.end());
81                 return out;
82             }
83 
84             template <typename Estimator, typename Iterator>
jackknife(Estimator && estimator,Iterator first,Iterator last)85             sample jackknife(Estimator&& estimator, Iterator first, Iterator last) {
86                 auto n = last - first;
87                 auto second = std::next(first);
88                 sample results;
89                 results.reserve(n);
90 
91                 for (auto it = first; it != last; ++it) {
92                     std::iter_swap(it, first);
93                     results.push_back(estimator(second, last));
94                 }
95 
96                 return results;
97             }
98 
normal_cdf(double x)99             inline double normal_cdf(double x) {
100                 return std::erfc(-x / std::sqrt(2.0)) / 2.0;
101             }
102 
103             double erfc_inv(double x);
104 
105             double normal_quantile(double p);
106 
107             template <typename Iterator, typename Estimator>
bootstrap(double confidence_level,Iterator first,Iterator last,sample const & resample,Estimator && estimator)108             Estimate<double> bootstrap(double confidence_level, Iterator first, Iterator last, sample const& resample, Estimator&& estimator) {
109                 auto n_samples = last - first;
110 
111                 double point = estimator(first, last);
112                 // Degenerate case with a single sample
113                 if (n_samples == 1) return { point, point, point, confidence_level };
114 
115                 sample jack = jackknife(estimator, first, last);
116                 double jack_mean = mean(jack.begin(), jack.end());
117                 double sum_squares, sum_cubes;
118                 std::tie(sum_squares, sum_cubes) = std::accumulate(jack.begin(), jack.end(), std::make_pair(0., 0.), [jack_mean](std::pair<double, double> sqcb, double x) -> std::pair<double, double> {
119                     auto d = jack_mean - x;
120                     auto d2 = d * d;
121                     auto d3 = d2 * d;
122                     return { sqcb.first + d2, sqcb.second + d3 };
123                 });
124 
125                 double accel = sum_cubes / (6 * std::pow(sum_squares, 1.5));
126                 int n = static_cast<int>(resample.size());
127                 double prob_n = std::count_if(resample.begin(), resample.end(), [point](double x) { return x < point; }) / (double)n;
128                 // degenerate case with uniform samples
129                 if (prob_n == 0) return { point, point, point, confidence_level };
130 
131                 double bias = normal_quantile(prob_n);
132                 double z1 = normal_quantile((1. - confidence_level) / 2.);
133 
134                 auto cumn = [n](double x) -> int {
135                     return std::lround(normal_cdf(x) * n); };
136                 auto a = [bias, accel](double b) { return bias + b / (1. - accel * b); };
137                 double b1 = bias + z1;
138                 double b2 = bias - z1;
139                 double a1 = a(b1);
140                 double a2 = a(b2);
141                 auto lo = std::max(cumn(a1), 0);
142                 auto hi = std::min(cumn(a2), n - 1);
143 
144                 return { point, resample[lo], resample[hi], confidence_level };
145             }
146 
147             double outlier_variance(Estimate<double> mean, Estimate<double> stddev, int n);
148 
149             struct bootstrap_analysis {
150                 Estimate<double> mean;
151                 Estimate<double> standard_deviation;
152                 double outlier_variance;
153             };
154 
155             bootstrap_analysis analyse_samples(double confidence_level, int n_resamples, std::vector<double>::iterator first, std::vector<double>::iterator last);
156         } // namespace Detail
157     } // namespace Benchmark
158 } // namespace Catch
159 
160 #endif // TWOBLUECUBES_CATCH_DETAIL_ANALYSIS_HPP_INCLUDED
161