1// Package histogram provides a Go implementation of BigML's histogram package 2// for Clojure/Java. It is currently experimental. 3package histogram 4 5import ( 6 "container/heap" 7 "math" 8 "sort" 9) 10 11type Bin struct { 12 Count int 13 Sum float64 14} 15 16func (b *Bin) Update(x *Bin) { 17 b.Count += x.Count 18 b.Sum += x.Sum 19} 20 21func (b *Bin) Mean() float64 { 22 return b.Sum / float64(b.Count) 23} 24 25type Bins []*Bin 26 27func (bs Bins) Len() int { return len(bs) } 28func (bs Bins) Less(i, j int) bool { return bs[i].Mean() < bs[j].Mean() } 29func (bs Bins) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] } 30 31func (bs *Bins) Push(x interface{}) { 32 *bs = append(*bs, x.(*Bin)) 33} 34 35func (bs *Bins) Pop() interface{} { 36 return bs.remove(len(*bs) - 1) 37} 38 39func (bs *Bins) remove(n int) *Bin { 40 if n < 0 || len(*bs) < n { 41 return nil 42 } 43 x := (*bs)[n] 44 *bs = append((*bs)[:n], (*bs)[n+1:]...) 45 return x 46} 47 48type Histogram struct { 49 res *reservoir 50} 51 52func New(maxBins int) *Histogram { 53 return &Histogram{res: newReservoir(maxBins)} 54} 55 56func (h *Histogram) Insert(f float64) { 57 h.res.insert(&Bin{1, f}) 58 h.res.compress() 59} 60 61func (h *Histogram) Bins() Bins { 62 return h.res.bins 63} 64 65type reservoir struct { 66 n int 67 maxBins int 68 bins Bins 69} 70 71func newReservoir(maxBins int) *reservoir { 72 return &reservoir{maxBins: maxBins} 73} 74 75func (r *reservoir) insert(bin *Bin) { 76 r.n += bin.Count 77 i := sort.Search(len(r.bins), func(i int) bool { 78 return r.bins[i].Mean() >= bin.Mean() 79 }) 80 if i < 0 || i == r.bins.Len() { 81 // TODO(blake): Maybe use an .insert(i, bin) instead of 82 // performing the extra work of a heap.Push. 83 heap.Push(&r.bins, bin) 84 return 85 } 86 r.bins[i].Update(bin) 87} 88 89func (r *reservoir) compress() { 90 for r.bins.Len() > r.maxBins { 91 minGapIndex := -1 92 minGap := math.MaxFloat64 93 for i := 0; i < r.bins.Len()-1; i++ { 94 gap := gapWeight(r.bins[i], r.bins[i+1]) 95 if minGap > gap { 96 minGap = gap 97 minGapIndex = i 98 } 99 } 100 prev := r.bins[minGapIndex] 101 next := r.bins.remove(minGapIndex + 1) 102 prev.Update(next) 103 } 104} 105 106func gapWeight(prev, next *Bin) float64 { 107 return next.Mean() - prev.Mean() 108} 109