1 #pragma once
2 
3 #include <unordered_map>
4 #include <vector>
5 
6 #include <pdal/Dimension.hpp>
7 
8 #include "../untwine/Common.hpp"
9 
10 namespace untwine
11 {
12 
13 class Stats
14 {
15 public:
16     enum EnumType
17     {
18         NoEnum,
19         Enumerate,
20         Count,
21         Global
22     };
23 
24 using EnumMap = std::unordered_map<double, PointCount>;
25 using DataVector = std::vector<double>;
26 
27 public:
Stats(std::string name,EnumType enumerate,bool advanced=true)28     Stats(std::string name, EnumType enumerate, bool advanced = true) :
29         m_name(name), m_enumerate(enumerate), m_advanced(advanced)
30     { reset(); }
31 
32     // Merge another summary with this one. 'name', 'enumerate' and 'advanced' must match
33     // or false is returns and no merge occurs.
34     bool merge(const Stats& s);
minimum() const35     double minimum() const
36         { return m_min; }
maximum() const37     double maximum() const
38         { return m_max; }
average() const39     double average() const
40         { return M1; }
populationVariance() const41     double populationVariance() const
42         { return M2 / m_cnt; }
sampleVariance() const43     double sampleVariance() const
44         { return M2 / (m_cnt - 1.0); }
variance() const45     double variance() const
46         { return sampleVariance(); }
populationStddev() const47     double populationStddev() const
48         { return std::sqrt(populationVariance()); }
sampleStddev() const49     double sampleStddev() const
50         { return std::sqrt(sampleVariance()); }
stddev() const51     double stddev() const
52         { return sampleStddev(); }
populationSkewness() const53     double populationSkewness() const
54     {
55         if (!M2 || ! m_advanced)
56             return 0;
57         return std::sqrt(double(m_cnt)) * M3 / std::pow(M2, 1.5);
58     }
sampleSkewness() const59     double sampleSkewness() const
60     {
61         if (M2 == 0 || m_cnt <= 2 || !m_advanced)
62             return 0.0;
63         double c((double)m_cnt);
64         return populationSkewness() * std::sqrt(c) * std::sqrt(c - 1) / (c - 2);
65     }
skewness() const66     double skewness() const
67     {
68         return sampleSkewness();
69     }
populationKurtosis() const70     double populationKurtosis() const
71     {
72         if (M2 == 0 || !m_advanced)
73             return 0;
74         return double(m_cnt) * M4 / (M2 * M2);
75     }
populationExcessKurtosis() const76     double populationExcessKurtosis() const
77     {
78         if (M2 == 0 || !m_advanced)
79             return 0;
80         return populationKurtosis() - 3;
81     }
sampleKurtosis() const82     double sampleKurtosis() const
83     {
84         if (M2 == 0 || m_cnt <= 3 || !m_advanced)
85             return 0;
86         double c((double)m_cnt);
87         return populationKurtosis() * (c + 1) * (c - 1) / ((c - 2) * (c - 3));
88     }
sampleExcessKurtosis() const89     double sampleExcessKurtosis() const
90     {
91         if (M2 == 0 || m_cnt <= 3 || !m_advanced)
92             return 0;
93         double c((double)m_cnt);
94         return sampleKurtosis() - 3 * (c - 1) * (c - 1) / ((c - 2) * (c - 3));
95     }
kurtosis() const96     double kurtosis() const
97     {
98         return sampleExcessKurtosis();
99     }
median() const100     double median() const
101         { return m_median; }
mad() const102     double mad() const
103         { return m_mad; }
count() const104     PointCount count() const
105         { return m_cnt; }
name() const106     std::string name() const
107         { return m_name; }
values() const108     const EnumMap& values() const
109         { return m_values; }
110 
111     void computeGlobalStats();
112 
reset()113     void reset()
114     {
115         m_max = (std::numeric_limits<double>::lowest)();
116         m_min = (std::numeric_limits<double>::max)();
117         m_cnt = 0;
118         m_median = 0.0;
119         m_mad = 0.0;
120         M1 = M2 = M3 = M4 = 0.0;
121     }
122 
insert(double value)123     void insert(double value)
124     {
125         m_cnt++;
126         m_min = (std::min)(m_min, value);
127         m_max = (std::max)(m_max, value);
128 
129         if (m_enumerate != NoEnum)
130             m_values[value]++;
131         if (m_enumerate == Global)
132         {
133             if (m_data.capacity() - m_data.size() < 10000)
134                 m_data.reserve(m_data.capacity() + m_cnt);
135             m_data.push_back(value);
136         }
137 
138         // stolen from http://www.johndcook.com/blog/skewness_kurtosis/
139 
140         PointCount n(m_cnt);
141 
142         // Difference from the mean
143         double delta = value - M1;
144         // Portion that this point's difference from the mean contributes
145         // to the mean.
146         double delta_n = delta / n;
147         double term1 = delta * delta_n * (n - 1);
148 
149         // First moment - average.
150         M1 += delta_n;
151 
152         if (m_advanced)
153         {
154             double delta_n2 = pow(delta_n, 2.0);
155             // Fourth moment - kurtosis (sum part)
156             M4 += term1 * delta_n2 * (n*n - 3*n + 3) +
157                 (6 * delta_n2 * M2) - (4 * delta_n * M3);
158             // Third moment - skewness (sum part)
159             M3 += term1 * delta_n * (n - 2) - 3 * delta_n * M2;
160         }
161         // Second moment - variance (sum part)
162         M2 += term1;
163     }
164 
165 private:
166     std::string m_name;
167     EnumType m_enumerate;
168     bool m_advanced;
169     double m_max;
170     double m_min;
171     double m_mad;
172     double m_median;
173     EnumMap m_values;
174     DataVector m_data;
175     PointCount m_cnt;
176     double M1, M2, M3, M4;
177 };
178 
179 using IndexedStats = std::vector<std::pair<pdal::Dimension::Id, Stats>>;
180 } // namespace untwine
181 
182