1 #ifndef bsta_histogram_hxx_
2 #define bsta_histogram_hxx_
3 //:
4 // \file
5 #include <iostream>
6 #include <cmath>
7 #include <limits>
8 #include "bsta_histogram.h"
9
10 #ifdef _MSC_VER
11 # include <vcl_msvc_warnings.h>
12 #endif
13 #include <cassert>
14 #include "bsta_gauss.h"
15 #include <vnl/vnl_math.h> // for log2e == 1/std::log(2.0)
16
17 template <class T>
bsta_histogram()18 bsta_histogram<T>::bsta_histogram()
19 : area_valid_(false), area_(0), nbins_(0), range_(0),
20 delta_(0),min_prob_(0), min_(0), max_(0)
21 {
22 bsta_histogram_base::type_ = bsta_histogram_traits<T>::type();
23 }
24
25 template <class T>
bsta_histogram(const T range,const unsigned int nbins,const T min_prob)26 bsta_histogram<T>::bsta_histogram(const T range, const unsigned int nbins,
27 const T min_prob)
28 : area_valid_(false), area_(0), nbins_(nbins), range_(range),
29 delta_(0),min_prob_(min_prob), min_(0), max_(range)
30 {
31 bsta_histogram_base::type_ = bsta_histogram_traits<T>::type();
32 if (nbins>0)
33 {
34 delta_ = range_/nbins;
35 counts_.resize(nbins, T(0));
36 }
37 }
38
39 template <class T>
bsta_histogram(const T min,const T max,const unsigned int nbins,const T min_prob)40 bsta_histogram<T>::bsta_histogram(const T min, const T max,
41 const unsigned int nbins,
42 const T min_prob)
43 : area_valid_(false), area_(0), nbins_(nbins), delta_(0),
44 min_prob_(min_prob), min_(min), max_(max)
45 {
46 bsta_histogram_base::type_ = bsta_histogram_traits<T>::type();
47 if (nbins>0)
48 {
49 range_ = max-min;
50 delta_ = range_/nbins;
51 counts_.resize(nbins, T(0));
52 }
53 else
54 {
55 range_ = 0;
56 delta_ = 0;
57 }
58 }
59
60 template <class T>
bsta_histogram(const unsigned int nbins,const T min,const T delta,const T min_prob)61 bsta_histogram<T>::bsta_histogram(const unsigned int nbins, const T min, const T delta,
62 const T min_prob)
63 : area_valid_(false), area_(0), nbins_(nbins), delta_(delta),
64 min_prob_(min_prob), min_(min), max_(min+nbins*delta)
65 {
66 bsta_histogram_base::type_ = bsta_histogram_traits<T>::type();
67 if (nbins>0)
68 {
69 range_ = max_-min_;
70 counts_.resize(nbins, T(0));
71 }
72 else
73 range_ = 0;
74 }
75
76 template <class T>
bsta_histogram(const T min,const T max,std::vector<T> const & data,const T min_prob)77 bsta_histogram<T>::bsta_histogram(const T min, const T max,
78 std::vector<T> const& data, const T min_prob)
79 : area_valid_(false), area_(0), delta_(0), min_prob_(min_prob),
80 min_(min), max_(max), counts_(data)
81 {
82 bsta_histogram_base::type_ = bsta_histogram_traits<T>::type();
83 nbins_ = data.size();
84 range_ = max-min;
85 if (nbins_>0)
86 delta_ = range_/nbins_;
87 else
88 delta_ = 0;
89 }
90
91 template <class T>
upcount(T x,T mag)92 void bsta_histogram<T>::upcount(T x, T mag)
93 {
94 if (x<min_||x>max_)
95 return;
96 for (unsigned int i = 0; i<nbins_; ++i)
97 if (T((i+1)*delta_) + min_ >= x)
98 {
99 counts_[i] += mag;
100 break;
101 }
102 area_valid_ = false;
103 }
104
105 template <class T>
106
bin_at_val(T x) const107 int bsta_histogram<T>::bin_at_val(T x) const
108 {
109 if (x<min_||x>max_)
110 return -1;
111 for (unsigned int i = 0; i<nbins_; ++i)
112 if (T((i+1)*delta_) + min_ >= x)
113 {
114 return i;
115 }
116 return -1;
117 }
118
119 template <class T>
compute_area() const120 void bsta_histogram<T>::compute_area() const
121 {
122 area_ = 0;
123 for (unsigned int i = 0; i < nbins_; ++i)
124 area_ += counts_[i];
125 area_valid_ = true;
126 }
127
128 template <class T>
cumulative_area(unsigned bin) const129 T bsta_histogram<T>::cumulative_area(unsigned bin) const
130 {
131 T area =0;
132 for (unsigned int i = 0; i<bin; ++i)
133 area += counts_[i];
134 return area;
135 }
136
137 template <class T>
p(unsigned int bin) const138 T bsta_histogram<T>::p(unsigned int bin) const
139 {
140 if (bin>=nbins_)
141 return 0;
142 if (!area_valid_)
143 compute_area();
144 if (area_ == T(0))
145 return 0;
146 else
147 return counts_[bin]/area_;
148 }
149
150 template <class T>
p(const T val) const151 T bsta_histogram<T>::p(const T val) const
152 {
153 if (val<min_||val>max_)
154 return 0;
155 if (!area_valid_)
156 compute_area();
157 if (area_ == T(0))
158 return 0;
159 else
160 for (unsigned int i = 0; i<nbins_; ++i)
161 if (T((i+1)*delta_) + min_ >= val)
162 return counts_[i]/area_;
163 return 0;
164 }
165
166 template <class T>
area() const167 T bsta_histogram<T>::area() const
168 {
169 if (!area_valid_)
170 compute_area();
171 return area_;
172 }
173
174 //: Mean of distribution
175 template <class T>
mean() const176 T bsta_histogram<T>::mean() const
177 {
178 return mean(0, nbins_-1);
179 }
180
181 //: Mean of distribution between bin indices
182 template <class T>
mean(const unsigned int lowbin,const unsigned int highbin) const183 T bsta_histogram<T>::mean(const unsigned int lowbin, const unsigned int highbin) const
184 {
185 assert(lowbin<=highbin);
186 assert(highbin<nbins_);
187 T sum = 0;
188 T sumx = 0;
189 for (unsigned i = lowbin; i<=highbin; ++i)
190 {
191 sum += counts_[i];
192 sumx += (i*delta_ + min_)*counts_[i];
193 }
194 if (sum==0)
195 return 0;
196 T result = sumx/sum;
197 return result;
198 }
199
200 template <class T>
mean_vals(const T low,const T high) const201 T bsta_histogram<T>::mean_vals(const T low, const T high) const
202 {
203 //find bin indices
204 T tlow=low, thigh=high;
205 if (tlow<min_) tlow = min_;
206 if (thigh>max_) thigh = max_;
207 unsigned low_bin=0, high_bin = 0;
208 for (unsigned int i = 0; i<nbins_; ++i)
209 if (T((i+1)*delta_) + min_ >= tlow) {
210 low_bin = i;
211 break;
212 }
213 for (unsigned int i = 0; i<nbins_; ++i)
214 if (T((i+1)*delta_) + min_ >= thigh) {
215 high_bin = i;
216 break;
217 }
218 return this->mean(low_bin, high_bin);
219 }
220
221 //: Variance of distribution
222 template <class T>
variance() const223 T bsta_histogram<T>::variance() const
224 {
225 return variance(0, nbins_-1);
226 }
227
228 //: Variance of distribution between bin indices
229 template <class T>
230 T bsta_histogram<T>::
variance(const unsigned int lowbin,const unsigned int highbin) const231 variance(const unsigned int lowbin, const unsigned int highbin) const
232 {
233 assert(lowbin<=highbin);
234 assert(highbin<nbins_);
235 T mean = this->mean(lowbin, highbin);
236 mean -= min_;
237 T sum = 0;
238 T sumx2 = 0;
239 for (unsigned i = lowbin; i<=highbin; ++i)
240 {
241 sum += counts_[i];
242 sumx2 += (i*delta_-mean)*(i*delta_-mean)*counts_[i];
243 }
244 if (sum==0)
245 return 0;
246 else
247 return sumx2/sum;
248 }
249
250 template <class T>
251 T bsta_histogram<T>::
variance_vals(const T low,const T high) const252 variance_vals(const T low, const T high) const
253 {
254 //find bin indices
255 T tlow=low, thigh=high;
256 if (tlow<min_) tlow = min_;
257 if (thigh>max_) thigh = max_;
258 unsigned low_bin=0, high_bin = 0;
259 for (unsigned int i = 0; i<nbins_; ++i)
260 if (T((i+1)*delta_) + min_ >= tlow) {
261 low_bin = i;
262 break;
263 }
264 for (unsigned int i = 0; i<nbins_; ++i)
265 if (T((i+1)*delta_) + min_ >= thigh) {
266 high_bin = i;
267 break;
268 }
269 return this->variance(low_bin, high_bin);
270 }
271
272 template <class T>
entropy() const273 T bsta_histogram<T>::entropy() const
274 {
275 double ent = 0;
276 for (unsigned int i = 0; i<nbins_; ++i)
277 {
278 double pi = this->p(i);
279 if (pi>min_prob_)
280 ent -= pi*std::log(pi);
281 }
282 ent *= vnl_math::log2e;
283 return T(ent);
284 }
285
286 template <class T>
renyi_entropy() const287 T bsta_histogram<T>::renyi_entropy() const
288 {
289 double sum = 0, ent = 0;
290 for (unsigned int i = 0; i<nbins_; ++i)
291 {
292 double pi = this->p(i);
293 sum += pi*pi;
294 }
295 if (sum>min_prob_)
296 ent = - std::log(sum)*vnl_math::log2e;
297 return T(ent);
298 }
299
300 template <class T>
parzen(const T sigma)301 void bsta_histogram<T>::parzen(const T sigma)
302 {
303 if (sigma<=0)
304 return;
305 double sd = (double)sigma;
306 std::vector<double> in(nbins_), out(nbins_);
307 for (unsigned int i=0; i<nbins_; ++i)
308 in[i]=counts_[i];
309 bsta_gauss::bsta_1d_gaussian(sd, in, out);
310 for (unsigned int i=0; i<nbins_; ++i)
311 counts_[i]=(T)out[i];
312 }
313
314 //The first non-zero bin starting at index = 0
315 template <class T>
low_bin()316 unsigned bsta_histogram<T>::low_bin()
317 {
318 unsigned lowbin=0;
319 for (; lowbin<nbins_&&counts_[lowbin]==0; ++lowbin) /*nothing*/;
320 return lowbin;
321 }
322
323 //The first non-zero bin starting at index = nbins-1
324 template <class T>
high_bin()325 unsigned bsta_histogram<T>::high_bin()
326 {
327 unsigned highbin=nbins_-1;
328 for (; highbin>0&&counts_[highbin]==0; --highbin) /*nothing*/;
329 return highbin;
330 }
331
332 // Fraction of area less than value
333 template <class T>
fraction_below(const T value) const334 T bsta_histogram<T>::fraction_below(const T value) const
335 {
336 if (value<min_||value>max_)
337 return 0;
338 if (!area_valid_)
339 compute_area();
340 if (area_ == T(0))
341 return 0;
342 T sum = 0, limit=(value-min_);
343 for (unsigned int i = 0; i<nbins_; ++i)
344 if (T((i+1)*delta_)<limit)
345 sum+=counts_[i];
346 else
347 return sum/area_;
348 return 0;
349 }
350
351 // Fraction of area greater than value
352 template <class T>
fraction_above(const T value) const353 T bsta_histogram<T>::fraction_above(const T value) const
354 {
355 if (value<min_||value>max_)
356 return 0;
357 if (!area_valid_)
358 compute_area();
359 if (area_ == T(0))
360 return 0;
361 T sum = 0, limit=(value-min_);
362 for (unsigned int i = 0; i<nbins_; ++i)
363 if (T((i+1)*delta_)>limit)
364 sum+=counts_[i];
365 return sum/area_;
366 }
367
368 // Value for area fraction below value
369 template <class T>
value_with_area_below(const T area_fraction) const370 T bsta_histogram<T>::value_with_area_below(const T area_fraction) const
371 {
372 if (area_fraction>T(1))
373 return 0;
374 if (!area_valid_)
375 compute_area();
376 if (area_ == T(0))
377 return 0;
378 T sum = 0;
379 for (unsigned int i=0; i<nbins_; ++i)
380 {
381 sum += counts_[i];
382 if (sum>=area_fraction*area_)
383 return (i+1)*delta_+min_;
384 }
385 return 0;
386 }
387
388 // Value for area fraction above value
389 template <class T>
value_with_area_above(const T area_fraction) const390 T bsta_histogram<T>::value_with_area_above(const T area_fraction) const
391 {
392 if (area_fraction>T(1))
393 return 0;
394 if (!area_valid_)
395 compute_area();
396 if (area_ == T(0))
397 return 0;
398 T sum = 0;
399 for (unsigned int i=nbins_-1; i!=0; i--)
400 {
401 sum += counts_[i];
402 if (sum>area_fraction*area_)
403 return (i+1)*delta_+min_;
404 }
405 return 0;
406 }
407
408 //function to erase all bin counts
409 template <class T>
clear()410 void bsta_histogram<T>::clear()
411 {
412 area_valid_ = false;
413 area_ = T(0);
414 counts_.assign(nbins_,T(0));
415 }
416
417 template <class T>
print(std::ostream & os) const418 void bsta_histogram<T>::print(std::ostream& os) const
419 {
420 for (unsigned int i=0; i<nbins_; ++i)
421 if (p(i) > 0)
422 os << "p[" << i << "]=" << p(i) << '\n';
423 }
424
425 template <class T>
pretty_print(std::ostream & os) const426 void bsta_histogram<T>::pretty_print(std::ostream& os) const
427 {
428 os << "area valid: " << area_valid_ << '\n'
429 << "area: " << area_ << '\n'
430 << "number of bins: " << nbins_ << '\n'
431 << "range: " << range_ << '\n'
432 << "delta: " << delta_ << '\n'
433 << "min_prob: " << min_prob_ << '\n'
434 << "min: " << min_ << '\n'
435 << "max: " << max_ << '\n'
436 << "counts: ";
437 for (unsigned i = 0; i < counts_.size() ; ++i)
438 os << counts_[i] << ' ';
439 os << '\n';
440 }
441
442 //: print as a matlab plot command
443 template <class T>
print_to_m(std::ostream & os) const444 void bsta_histogram<T>::print_to_m(std::ostream& os) const
445 {
446 os << "x = [" << min_;
447 for (unsigned int i=1; i<nbins_; ++i)
448 os << ", " << min_ + i*delta_;
449 os << "];\n"
450 << "y = [" << p((unsigned int)0);
451 for (unsigned int i=1; i<nbins_; ++i)
452 os << ", " << p(i);
453 os << "];\n"
454 << "bar(x,y,'r')\n";
455 }
456
457 //: print x and y arrays
458 template <class T>
print_to_arrays(std::ostream & os) const459 void bsta_histogram<T>::print_to_arrays(std::ostream& os) const
460 {
461 os << min_;
462 for (unsigned int i=1; i<nbins_; ++i)
463 os << ", " << min_ + i*delta_;
464 os << '\n'
465 << p((unsigned int)0);
466 for (unsigned int i=1; i<nbins_; ++i)
467 os << ", " << p(i);
468 os << '\n';
469 }
470
471 template <class T>
print_vals_prob(std::ostream & os) const472 void bsta_histogram<T>::print_vals_prob(std::ostream& os) const
473 {
474 for (unsigned i = 0; i<nbins_; ++i)
475 os << avg_bin_value(i) << ' ' << p(i) << '\n';
476 }
477
478 template <class T>
write(std::ostream & s) const479 std::ostream& bsta_histogram<T>::write(std::ostream& s) const
480 {
481 s << area_valid_ << ' '
482 << area_ << ' '
483 << nbins_ << ' '
484 << range_ << ' '
485 << delta_ << ' '
486 << min_prob_ << ' '
487 << min_ << ' '
488 << max_ << ' ';
489 for (unsigned i = 0; i < counts_.size() ; ++i)
490 s << counts_[i] << ' ';
491
492 return s << '\n';
493 }
494
495 template <class T>
read(std::istream & s)496 std::istream& bsta_histogram<T>::read(std::istream& s)
497 {
498 s >> area_valid_
499 >> area_
500 >> nbins_
501 >> range_
502 >> delta_
503 >> min_prob_
504 >> min_
505 >> max_;
506 counts_.resize(nbins_);
507 for (unsigned i = 0; i < counts_.size() ; ++i)
508 s >> counts_[i] ;
509 return s;
510 }
511 template <class T>
hist_intersect(bsta_histogram<T> const & ha,bsta_histogram<T> const & hb)512 T hist_intersect(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb){
513 unsigned na = ha.nbins(), nb = hb.nbins();
514 if(na != nb){
515 std::cout << "histograms do not have the same number of bins\n";
516 return std::numeric_limits<T>::max();
517 }
518 T sum = T(0);
519 for(unsigned i = 0; i<na; ++i){
520 T pa = ha.p(i), pb = hb.p(i);
521 if(pa<pb)
522 sum += pa;
523 else
524 sum+= pb;
525 }
526 return sum;
527 }
528 template <class T>
js_divergence(bsta_histogram<T> const & ha,bsta_histogram<T> const & hb)529 T js_divergence(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb){
530 unsigned na = ha.nbins(), nb = hb.nbins();
531 if(na != nb){
532 std::cout << "histograms do not have the same number of bins\n";
533 return std::numeric_limits<T>::max();
534 }
535 T sum = T(0);
536 for(unsigned i = 0; i<na; ++i){
537 T pa = ha.p(i), pb = hb.p(i);
538 T pavg = (pa + pb)/T(2);
539 T jsa = T(0), jsb = T(0);
540 if(pa != T(0))
541 jsa = pa*std::log(pa/pavg);
542 if(pb != T(0))
543 jsb = pb*std::log(pb/pavg);
544 sum += jsa + jsb;
545 }
546 return sum/T(2);
547 }
548 template <class T>
bhatt_distance(bsta_histogram<T> const & ha,bsta_histogram<T> const & hb)549 T bhatt_distance(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb) {
550 unsigned na = ha.nbins(), nb = hb.nbins();
551 if (na != nb) {
552 std::cout << "histograms do not have the same number of bins\n";
553 return std::numeric_limits<T>::max();
554 }
555 T sum = T(0);
556 for (unsigned i = 0; i<na; ++i) {
557 T pa = ha.p(i), pb = hb.p(i);
558 if (pa<T(0) || pb<T(0)) {
559 std::cout << "Fatal! - negative probablity\n";
560 return std::numeric_limits<T>::max();
561 }
562 sum += sqrt(pa*pb);
563 }
564 if (sum<T(0)) {
565 std::cout << "Fatal! - negative sum\n";
566 return std::numeric_limits<T>::max();
567 }
568 return -log(sum);
569
570 }
571 template <class T>
scale(bsta_histogram<T> const & h,T s)572 bsta_histogram<T> scale(bsta_histogram<T> const& h, T s){
573 T min = h.min(), max = h.max(), delta = h.delta();
574 unsigned nbins = h.nbins();
575 // create return hist with same number of bins and range
576 bsta_histogram<T> hret(min, max, nbins);
577 if(s>T(1)) s = T(1);
578 for(T x = max-(delta/2.0); x>=(min+(delta/2.0)); x -= delta)
579 {
580 unsigned x_bin = static_cast<unsigned>(h.bin_at_val(x));
581 T trans_x = (x-min)*s + min;
582 unsigned trans_bin = static_cast<unsigned>(hret.bin_at_val(trans_x));
583 T tbin_val = hret.avg_bin_value(trans_bin);
584 //amount to distribute to adjacent bins
585 T offset = T(2)*(trans_x - tbin_val)/delta;
586 T x_counts = h.counts(x_bin);
587 // cases
588 if( offset <= T(0)){
589 if(offset<-T(1)) offset = -T(1);
590 if(trans_bin == 0){// 100% of the counts go in trans_bin
591 T ret_counts = hret.counts(trans_bin);
592 hret.set_count(trans_bin, ret_counts+x_counts);
593 }else{// distribute to bin below
594 T ret_counts = hret.counts(trans_bin);
595 T ret_counts_minus = hret.counts(trans_bin-1);
596 T counts_for_bin = (T(1)+offset)*x_counts;
597 T counts_for_minus_bin = -offset*x_counts;
598 hret.set_count(trans_bin, counts_for_bin+ret_counts);
599 hret.set_count(trans_bin-1, counts_for_minus_bin+ret_counts_minus);
600 }
601 }else{ // offset > 0
602 if(offset>T(1)) offset = T(1);
603 if(trans_bin == nbins-1){ // 100% of counts go in trans_bin
604 T ret_counts = hret.counts(trans_bin);
605 hret.set_count(trans_bin, ret_counts+x_counts);
606 }else{ // distribute to bin above
607 T ret_counts = hret.counts(trans_bin);
608 T ret_counts_plus = hret.counts(trans_bin+1);
609 T counts_for_bin = (T(1)-offset)*x_counts;
610 T counts_for_plus_bin = offset*x_counts;
611 hret.set_count(trans_bin, counts_for_bin+ret_counts);
612 hret.set_count(trans_bin+1, counts_for_plus_bin+ret_counts_plus);
613 }
614 }
615 }
616 return hret;
617 }
618 // find scale to minimize js_dirvergence
619 template <class T>
minimum_js_divergence_scale(bsta_histogram<T> const & h_from,bsta_histogram<T> const & h_to,T min_scale)620 T minimum_js_divergence_scale(bsta_histogram<T> const& h_from, bsta_histogram<T> const& h_to,
621 T min_scale){
622 T min_js_d = std::numeric_limits<T>::max(), scale_min = T(1);
623 T ds = min_scale/T(10);
624 for(T s = (T(1)-ds); s>=min_scale; s-=ds){
625 bsta_histogram<T> hs = scale(h_from, s);
626 T jsd = js_divergence(h_to, hs);
627 if(jsd<min_js_d){
628 scale_min = s;
629 min_js_d = jsd;
630 }
631 }
632 return scale_min;
633 }
634
635 template <class T>
merge_hists(bsta_histogram<T> const & ha,bsta_histogram<T> const & hb,bsta_histogram<T> & h_merged)636 bool merge_hists(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb, bsta_histogram<T>& h_merged){
637 // ha and hb must have the same number of bins and min max values
638 if(ha.nbins() != hb.nbins()){
639 std::cout << "Can't merge histogams with different numbers of bins" << std::endl;
640 return false;
641 }
642 if(ha.min() != hb.min() || ha.max() != hb.max()){
643 std::cout << "Can't merge histogams with different value ranges" << std::endl;
644 return false;
645 }
646 bsta_histogram<T> temp(ha.min(), ha.max(), ha.nbins());
647 for(unsigned i = 0; i<ha.nbins(); ++i){
648 T counta = ha.counts(i), countb = hb.counts(i);
649 temp.set_count(i, counta + countb);
650 }
651 h_merged = temp;
652 return true;
653 }
654 //: Write to stream
655 template <class T>
operator <<(std::ostream & s,bsta_histogram<T> const & h)656 std::ostream& operator<<(std::ostream& s, bsta_histogram<T> const& h)
657 {
658 return h.write(s);
659 }
660
661 //: Read from stream
662 template <class T>
operator >>(std::istream & is,bsta_histogram<T> & h)663 std::istream& operator>>(std::istream& is, bsta_histogram<T>& h)
664 {
665 return h.read(is);
666 }
667
668
669 #undef BSTA_HISTOGRAM_INSTANTIATE
670 #define BSTA_HISTOGRAM_INSTANTIATE(T) \
671 template class bsta_histogram<T >; \
672 template T js_divergence(bsta_histogram<T> const& , bsta_histogram<T> const&); \
673 template T bhatt_distance(bsta_histogram<T> const& , bsta_histogram<T> const&); \
674 template bsta_histogram<T> scale(bsta_histogram<T> const&, T ); \
675 template T minimum_js_divergence_scale(bsta_histogram<T> const&, bsta_histogram<T> const&, T); \
676 template T hist_intersect(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb); \
677 template bool merge_hists(bsta_histogram<T> const& ha, bsta_histogram<T> const& hb, bsta_histogram<T>& h_merged); \
678 template std::istream& operator>>(std::istream&, bsta_histogram<T >&); \
679 template std::ostream& operator<<(std::ostream&, bsta_histogram<T > const&)
680
681 #endif // bsta_histogram_hxx_
682