1 #pragma once 2 3 #include <unordered_map> 4 #include <vector> 5 6 #include <pdal/Dimension.hpp> 7 8 #include "../untwine/Common.hpp" 9 10 namespace untwine 11 { 12 13 class Stats 14 { 15 public: 16 enum EnumType 17 { 18 NoEnum, 19 Enumerate, 20 Count, 21 Global 22 }; 23 24 using EnumMap = std::unordered_map<double, PointCount>; 25 using DataVector = std::vector<double>; 26 27 public: Stats(std::string name,EnumType enumerate,bool advanced=true)28 Stats(std::string name, EnumType enumerate, bool advanced = true) : 29 m_name(name), m_enumerate(enumerate), m_advanced(advanced) 30 { reset(); } 31 32 // Merge another summary with this one. 'name', 'enumerate' and 'advanced' must match 33 // or false is returns and no merge occurs. 34 bool merge(const Stats& s); minimum() const35 double minimum() const 36 { return m_min; } maximum() const37 double maximum() const 38 { return m_max; } average() const39 double average() const 40 { return M1; } populationVariance() const41 double populationVariance() const 42 { return M2 / m_cnt; } sampleVariance() const43 double sampleVariance() const 44 { return M2 / (m_cnt - 1.0); } variance() const45 double variance() const 46 { return sampleVariance(); } populationStddev() const47 double populationStddev() const 48 { return std::sqrt(populationVariance()); } sampleStddev() const49 double sampleStddev() const 50 { return std::sqrt(sampleVariance()); } stddev() const51 double stddev() const 52 { return sampleStddev(); } populationSkewness() const53 double populationSkewness() const 54 { 55 if (!M2 || ! m_advanced) 56 return 0; 57 return std::sqrt(double(m_cnt)) * M3 / std::pow(M2, 1.5); 58 } sampleSkewness() const59 double sampleSkewness() const 60 { 61 if (M2 == 0 || m_cnt <= 2 || !m_advanced) 62 return 0.0; 63 double c((double)m_cnt); 64 return populationSkewness() * std::sqrt(c) * std::sqrt(c - 1) / (c - 2); 65 } skewness() const66 double skewness() const 67 { 68 return sampleSkewness(); 69 } populationKurtosis() const70 double populationKurtosis() const 71 { 72 if (M2 == 0 || !m_advanced) 73 return 0; 74 return double(m_cnt) * M4 / (M2 * M2); 75 } populationExcessKurtosis() const76 double populationExcessKurtosis() const 77 { 78 if (M2 == 0 || !m_advanced) 79 return 0; 80 return populationKurtosis() - 3; 81 } sampleKurtosis() const82 double sampleKurtosis() const 83 { 84 if (M2 == 0 || m_cnt <= 3 || !m_advanced) 85 return 0; 86 double c((double)m_cnt); 87 return populationKurtosis() * (c + 1) * (c - 1) / ((c - 2) * (c - 3)); 88 } sampleExcessKurtosis() const89 double sampleExcessKurtosis() const 90 { 91 if (M2 == 0 || m_cnt <= 3 || !m_advanced) 92 return 0; 93 double c((double)m_cnt); 94 return sampleKurtosis() - 3 * (c - 1) * (c - 1) / ((c - 2) * (c - 3)); 95 } kurtosis() const96 double kurtosis() const 97 { 98 return sampleExcessKurtosis(); 99 } median() const100 double median() const 101 { return m_median; } mad() const102 double mad() const 103 { return m_mad; } count() const104 PointCount count() const 105 { return m_cnt; } name() const106 std::string name() const 107 { return m_name; } values() const108 const EnumMap& values() const 109 { return m_values; } 110 111 void computeGlobalStats(); 112 reset()113 void reset() 114 { 115 m_max = (std::numeric_limits<double>::lowest)(); 116 m_min = (std::numeric_limits<double>::max)(); 117 m_cnt = 0; 118 m_median = 0.0; 119 m_mad = 0.0; 120 M1 = M2 = M3 = M4 = 0.0; 121 } 122 insert(double value)123 void insert(double value) 124 { 125 m_cnt++; 126 m_min = (std::min)(m_min, value); 127 m_max = (std::max)(m_max, value); 128 129 if (m_enumerate != NoEnum) 130 m_values[value]++; 131 if (m_enumerate == Global) 132 { 133 if (m_data.capacity() - m_data.size() < 10000) 134 m_data.reserve(m_data.capacity() + m_cnt); 135 m_data.push_back(value); 136 } 137 138 // stolen from http://www.johndcook.com/blog/skewness_kurtosis/ 139 140 PointCount n(m_cnt); 141 142 // Difference from the mean 143 double delta = value - M1; 144 // Portion that this point's difference from the mean contributes 145 // to the mean. 146 double delta_n = delta / n; 147 double term1 = delta * delta_n * (n - 1); 148 149 // First moment - average. 150 M1 += delta_n; 151 152 if (m_advanced) 153 { 154 double delta_n2 = pow(delta_n, 2.0); 155 // Fourth moment - kurtosis (sum part) 156 M4 += term1 * delta_n2 * (n*n - 3*n + 3) + 157 (6 * delta_n2 * M2) - (4 * delta_n * M3); 158 // Third moment - skewness (sum part) 159 M3 += term1 * delta_n * (n - 2) - 3 * delta_n * M2; 160 } 161 // Second moment - variance (sum part) 162 M2 += term1; 163 } 164 165 private: 166 std::string m_name; 167 EnumType m_enumerate; 168 bool m_advanced; 169 double m_max; 170 double m_min; 171 double m_mad; 172 double m_median; 173 EnumMap m_values; 174 DataVector m_data; 175 PointCount m_cnt; 176 double M1, M2, M3, M4; 177 }; 178 179 using IndexedStats = std::vector<std::pair<pdal::Dimension::Id, Stats>>; 180 } // namespace untwine 181 182