1 // This is brl/bbas/bsta/bsta_histogram.h
2 #ifndef bsta_histogram_h_
3 #define bsta_histogram_h_
4 //:
5 // \file
6 // \brief A simple histogram class
7 // \author Joseph Mundy
8 // \date May 19, 2004
9 //
10 // A templated histogram class.  Supports entropy calculations
11 //
12 // \verbatim
13 //  J.L. Mundy added min,max, percentile methods
14 //  B.A. Mayer add clear() method so a single instance can revert
15 //             to the default constructor to be reused.
16 // \endverbatim
17 
18 #include <vector>
19 #include <iostream>
20 #include <cassert>
21 #ifdef _MSC_VER
22 #  include <vcl_msvc_warnings.h>
23 #endif
24 #include <bsta/bsta_histogram_base.h>
25 
26 template <class T> class bsta_histogram : public bsta_histogram_base
27 {
28  public:
29   //: Default constructor
30   bsta_histogram();
31 
32   //:Simple constructor that assumes all data values are positive
33   bsta_histogram(const T range, const unsigned int nbins,
34                  const T min_prob = 0.0);
35 
36   //:More general constructor defining a signed value range
37   bsta_histogram(const T min, const T max, const unsigned int nbins,
38                  const T min_prob = 0.0);
39 
40   //:More general constructor specifying bin interval, delta
41   bsta_histogram(const unsigned int nbins, const T min, const T delta,
42                  const T min_prob = 0.0);
43 
44   //:construct from other histogram data
45   bsta_histogram(const T min, const T max, std::vector<T> const& data,
46                  const T min_prob = 0.0);
47 
48   ~bsta_histogram() override = default;
49 
50   // The number of bins in the histogram
nbins()51   unsigned int nbins() const { return nbins_; }
52 
53   //: min,max of total range
min()54   T min() const {return min_;}
max()55   T max() const {return max_;}
56 
57   //: bin interval
delta()58   T delta() const {return delta_;}
59 
60   //: minimum probability
min_prob()61   T min_prob() const {return min_prob_;}
62 
63   //: The value range for a bin
value_range(const unsigned int bin,T & vmin,T & vmax)64   void value_range(const unsigned int bin, T& vmin, T& vmax) const
65   { assert(bin<nbins_); vmin = bin*delta_+min_; vmax = (bin+1)*delta_+min_; }
66 
67   //: The average value for a bin
avg_bin_value(const unsigned int bin)68   T avg_bin_value(const unsigned int bin) const
69   { assert(bin<nbins_); return min_ + bin*delta_ + delta_/2; }
70 
71   //: The counts in a given bin
counts(const unsigned int bin)72   T counts(const unsigned int bin) const
73   { assert(bin<nbins_); return counts_[bin]; }
74 
75   //: probability of a given bin
76   T p(const unsigned int bin) const;
77 
78   //: probability of a value in the range
79   T p(const T value) const;
80 
81   //: Total area under the histogram
82   T area() const;
83 
84   //: The area under the histogram up to (excluding) the given bin
85   T cumulative_area(unsigned bin) const;
86 
87   //: Mean of distribution
88   T mean() const;
89 
90   //: Mean of distribution between bin indices
91   T mean(const unsigned int lowbin, const unsigned int highbin) const;
92 
93   //: Mean of distribution between values
94   T mean_vals(const T low, const T high) const;
95 
96   //: Variance of distribution
97   T variance() const;
98 
99   //: Variance of distribution between bin indices
100   T variance(const unsigned int lowbin, const unsigned int highbin) const;
101 
102   //: Variance of distribution between values
103   T variance_vals(const T low, const T high) const;
104 
105   //: First non-zero bin from below
106   unsigned low_bin();
107 
108   //: First non-zero bin from above
109   unsigned high_bin();
110 
111   //: Fraction of area less than val
112   T fraction_below(const T value) const;
113 
114   //: Fraction of area greater than val
115   T fraction_above(const T value) const;
116 
117   //: Value for area fraction below value
118   T value_with_area_below(const T area_fraction) const;
119 
120   //: Value for area fraction above value
121   T value_with_area_above(const T area_fraction) const;
122 
123   //: Entropy of p(x)
124   T entropy() const;
125 
126   //: Renyi alpha = 2 entropy of p(x)
127   T renyi_entropy() const;
128 
129  //: Increase the count of the bin corresponding to val by mag
130   void upcount(T val, T mag);
131 
132   //: Return the bin this element would fall on - it doesn't modify the current count
133   int bin_at_val(T val) const;
134 
135   //: set the count for a given bin
set_count(const unsigned bin,const T count)136   void set_count(const unsigned bin, const T count)
137   { if (bin<nbins_){ counts_[bin]=count; area_valid_ = false;}}
138 
139   //: array of bin values
value_array()140   std::vector<T> value_array() const {
141     std::vector<T> v(nbins_);
142     for (unsigned b = 0; b<nbins_; ++b) v[b]=avg_bin_value(b); return v; }
143 
144   //: array of bin counts
count_array()145   std::vector<T> count_array() const {
146     std::vector<T> v(nbins_);
147     for (unsigned b = 0; b<nbins_; ++b) v[b]=counts(b); return v; }
148 
149  //: Smooth the histogram with a Parzen window of sigma
150   void parzen(const T sigma);
151 
152   //: Write to stream
153   std::ostream& write(std::ostream&) const;
154 
155   //: Read from stream
156   std::istream& read(std::istream&);
157 
158   void pretty_print(std::ostream& os = std::cout) const;
159 
160   void print(std::ostream& os = std::cout) const;
161 
162   //: print as a matlab plot command
163   void print_to_m(std::ostream& os = std::cout) const;
164 
165   //: print x and y arrays
166   void print_to_arrays(std::ostream& os = std::cout) const;
167 
168   //: print values and bin probability in full (even if counts ==0)
169   void print_vals_prob(std::ostream& os = std::cout) const;
170 
171   //:restore default constructor state.
172   void clear();
173 
174  private:
175   void compute_area() const; // mutable const
176   mutable bool area_valid_;
177   mutable T area_;
178   unsigned int nbins_;
179   T range_;
180   T delta_;
181   T min_prob_;
182   T min_;
183   T max_;
184   std::vector<T> counts_;
185 };
186 // Histogram intersection (sum of min probabilies)
187 template <class T>
188 T hist_intersect(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb);
189 
190 // Bhattacharyya distance  -log(sum(sqrt(pa*pb)))
191 template <class T>
192 T bhatt_distance(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb);
193 
194 // Jensen-Shannon divergence
195 // 1/2 sum_i ( pa_i*log(2*pa_i/(pa_i+pb_i)+ pb_i*log(2*pb_i/(pa_i+pb_i)) )
196 template <class T>
197 T js_divergence(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb);
198 
199 // Bhattacharyya distance  -log(sum(sqrt(pa*pb)))
200 template <class T>
201 T bhatt_distance(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb);
202 
203 // Scale the range of the histogram
204 template <class T>
205 bsta_histogram<T> scale(bsta_histogram<T> const& h, T s);
206 
207 // Find best scale to minimize Jensen-Shannon divergence scaling "from" to "to"
208 template <class T>
209 T minimum_js_divergence_scale(bsta_histogram<T> const& h_from, bsta_histogram<T> const& h_to,
210                               T min_scale = T(1)/T(4));
211 // Histogram intersection (sum of min probabilies)
212 template <class T>
213 bool merge_hists(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb, bsta_histogram<T>& h_merged);
214 
215 //: Write histogram to stream
216 // \relatesalso bsta_histogram
217 template <class T>
218 std::ostream&  operator<<(std::ostream& s, bsta_histogram<T> const& h);
219 
220 //: Read histogram from stream
221 // \relatesalso bsta_histogram
222 template <class T>
223 std::istream&  operator>>(std::istream& is,  bsta_histogram<T>& h);
224 
225 //: Forward declaration of specialization
226 template <>
227 void bsta_histogram<char>::pretty_print(std::ostream& os) const;
228 #include <bsta/bsta_histogram_sptr.h>
229 #define BSTA_HISTOGRAM_INSTANTIATE(T) extern "Please #include <bsta/bsta_histogram.hxx>"
230 
231 #endif // bsta_histogram_h_
232