1// Copyright 2016, Circonus, Inc. All rights reserved.
2// See the LICENSE file.
3
4// Package circllhist provides an implementation of Circonus' fixed log-linear
5// histogram data structure.  This allows tracking of histograms in a
6// composable way such that accurate error can be reasoned about.
7package circonusllhist
8
9import (
10	"bytes"
11	"errors"
12	"fmt"
13	"math"
14	"strconv"
15	"strings"
16	"sync"
17)
18
19const (
20	DEFAULT_HIST_SIZE = uint16(100)
21)
22
23var power_of_ten = [...]float64{
24	1, 10, 100, 1000, 10000, 100000, 1e+06, 1e+07, 1e+08, 1e+09, 1e+10,
25	1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20,
26	1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30,
27	1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
28	1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50,
29	1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
30	1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70,
31	1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
32	1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90,
33	1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
34	1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109,
35	1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118,
36	1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127,
37	1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120,
38	1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,
39	1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102,
40	1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96,
41	1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86,
42	1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76,
43	1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66,
44	1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56,
45	1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46,
46	1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36,
47	1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26,
48	1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16,
49	1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06,
50	1e-05, 0.0001, 0.001, 0.01, 0.1,
51}
52
53// A Bracket is a part of a cumulative distribution.
54type Bin struct {
55	val   int8
56	exp   int8
57	count uint64
58}
59
60func NewBinRaw(val int8, exp int8, count uint64) *Bin {
61	return &Bin{
62		val:   val,
63		exp:   exp,
64		count: count,
65	}
66}
67func NewBin() *Bin {
68	return NewBinRaw(0, 0, 0)
69}
70func NewBinFromFloat64(d float64) *Bin {
71	hb := NewBinRaw(0, 0, 0)
72	hb.SetFromFloat64(d)
73	return hb
74}
75
76type FastL2 struct {
77	l1, l2 int
78}
79
80func (hb *Bin) fastl2() FastL2 {
81	return FastL2{l1: int(uint8(hb.exp)), l2: int(uint8(hb.val))}
82}
83func (hb *Bin) SetFromFloat64(d float64) *Bin {
84	hb.val = -1
85	if math.IsInf(d, 0) || math.IsNaN(d) {
86		return hb
87	}
88	if d == 0.0 {
89		hb.val = 0
90		return hb
91	}
92	sign := 1
93	if math.Signbit(d) {
94		sign = -1
95	}
96	d = math.Abs(d)
97	big_exp := int(math.Floor(math.Log10(d)))
98	hb.exp = int8(big_exp)
99	if int(hb.exp) != big_exp { //rolled
100		hb.exp = 0
101		if big_exp < 0 {
102			hb.val = 0
103		}
104		return hb
105	}
106	d = d / hb.PowerOfTen()
107	d = d * 10
108	hb.val = int8(sign * int(math.Floor(d+1e-13)))
109	if hb.val == 100 || hb.val == -100 {
110		if hb.exp < 127 {
111			hb.val = hb.val / 10
112			hb.exp++
113		} else {
114			hb.val = 0
115			hb.exp = 0
116		}
117	}
118	if hb.val == 0 {
119		hb.exp = 0
120		return hb
121	}
122	if !((hb.val >= 10 && hb.val < 100) ||
123		(hb.val <= -10 && hb.val > -100)) {
124		hb.val = -1
125		hb.exp = 0
126	}
127	return hb
128}
129func (hb *Bin) PowerOfTen() float64 {
130	idx := int(uint8(hb.exp))
131	return power_of_ten[idx]
132}
133
134func (hb *Bin) IsNaN() bool {
135	if hb.val > 99 || hb.val < -99 {
136		return true
137	}
138	return false
139}
140func (hb *Bin) Val() int8 {
141	return hb.val
142}
143func (hb *Bin) Exp() int8 {
144	return hb.exp
145}
146func (hb *Bin) Count() uint64 {
147	return hb.count
148}
149func (hb *Bin) Value() float64 {
150	if hb.IsNaN() {
151		return math.NaN()
152	}
153	if hb.val < 10 && hb.val > -10 {
154		return 0.0
155	}
156	return (float64(hb.val) / 10.0) * hb.PowerOfTen()
157}
158func (hb *Bin) BinWidth() float64 {
159	if hb.IsNaN() {
160		return math.NaN()
161	}
162	if hb.val < 10 && hb.val > -10 {
163		return 0.0
164	}
165	return hb.PowerOfTen() / 10.0
166}
167func (hb *Bin) Midpoint() float64 {
168	if hb.IsNaN() {
169		return math.NaN()
170	}
171	out := hb.Value()
172	if out == 0 {
173		return 0
174	}
175	interval := hb.BinWidth()
176	if out < 0 {
177		interval = interval * -1
178	}
179	return out + interval/2.0
180}
181func (hb *Bin) Left() float64 {
182	if hb.IsNaN() {
183		return math.NaN()
184	}
185	out := hb.Value()
186	if out >= 0 {
187		return out
188	}
189	return out - hb.BinWidth()
190}
191
192func (h1 *Bin) Compare(h2 *Bin) int {
193	var v1, v2 int
194
195	//     slide exp positive,
196	//                         shift by size of val
197	//                              multiple by (val != 0)
198	//       then add or subtract val accordingly
199
200	if h1.val >= 0 {
201		v1 = ((int(h1.exp)+256)<<8)*int(((h1.val|(^h1.val+1))>>8)&1) + int(h1.val)
202	} else {
203		v1 = ((int(h1.exp)+256)<<8)*int(((h1.val|(^h1.val+1))>>8)&1) - int(h1.val)
204	}
205	if h2.val >= 0 {
206		v2 = ((int(h2.exp)+256)<<8)*int(((h2.val|(^h2.val+1))>>8)&1) + int(h2.val)
207	} else {
208		v2 = ((int(h2.exp)+256)<<8)*int(((h2.val|(^h2.val+1))>>8)&1) - int(h2.val)
209	}
210
211	// return the difference
212	return v2 - v1
213}
214
215// This histogram structure tracks values are two decimal digits of precision
216// with a bounded error that remains bounded upon composition
217type Histogram struct {
218	bvs    []Bin
219	used   uint16
220	allocd uint16
221
222	lookup [256][]uint16
223
224	mutex    sync.Mutex
225	useLocks bool
226}
227
228// New returns a new Histogram
229func New() *Histogram {
230	return &Histogram{
231		allocd:   DEFAULT_HIST_SIZE,
232		used:     0,
233		bvs:      make([]Bin, DEFAULT_HIST_SIZE),
234		useLocks: true,
235	}
236}
237
238// New returns a Histogram without locking
239func NewNoLocks() *Histogram {
240	return &Histogram{
241		allocd:   DEFAULT_HIST_SIZE,
242		used:     0,
243		bvs:      make([]Bin, DEFAULT_HIST_SIZE),
244		useLocks: false,
245	}
246}
247
248// NewFromStrings returns a Histogram created from DecStrings strings
249func NewFromStrings(strs []string, locks bool) (*Histogram, error) {
250
251	bin, err := stringsToBin(strs)
252	if err != nil {
253		return nil, err
254	}
255
256	return newFromBins(bin, locks), nil
257}
258
259// NewFromBins returns a Histogram created from a bins struct slice
260func newFromBins(bins []Bin, locks bool) *Histogram {
261	return &Histogram{
262		allocd:   uint16(len(bins) + 10), // pad it with 10
263		used:     uint16(len(bins)),
264		bvs:      bins,
265		useLocks: locks,
266	}
267}
268
269// Max returns the approximate maximum recorded value.
270func (h *Histogram) Max() float64 {
271	return h.ValueAtQuantile(1.0)
272}
273
274// Min returns the approximate minimum recorded value.
275func (h *Histogram) Min() float64 {
276	return h.ValueAtQuantile(0.0)
277}
278
279// Mean returns the approximate arithmetic mean of the recorded values.
280func (h *Histogram) Mean() float64 {
281	return h.ApproxMean()
282}
283
284// Reset forgets all bins in the histogram (they remain allocated)
285func (h *Histogram) Reset() {
286	if h.useLocks {
287		h.mutex.Lock()
288		defer h.mutex.Unlock()
289	}
290	for i := 0; i < 256; i++ {
291		if h.lookup[i] != nil {
292			for j := range h.lookup[i] {
293				h.lookup[i][j] = 0
294			}
295		}
296	}
297	h.used = 0
298}
299
300// RecordIntScale records an integer scaler value, returning an error if the
301// value is out of range.
302func (h *Histogram) RecordIntScale(val, scale int) error {
303	return h.RecordIntScales(val, scale, 1)
304}
305
306// RecordValue records the given value, returning an error if the value is out
307// of range.
308func (h *Histogram) RecordValue(v float64) error {
309	return h.RecordValues(v, 1)
310}
311
312// RecordCorrectedValue records the given value, correcting for stalls in the
313// recording process. This only works for processes which are recording values
314// at an expected interval (e.g., doing jitter analysis). Processes which are
315// recording ad-hoc values (e.g., latency for incoming requests) can't take
316// advantage of this.
317// CH Compat
318func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error {
319	if err := h.RecordValue(float64(v)); err != nil {
320		return err
321	}
322
323	if expectedInterval <= 0 || v <= expectedInterval {
324		return nil
325	}
326
327	missingValue := v - expectedInterval
328	for missingValue >= expectedInterval {
329		if err := h.RecordValue(float64(missingValue)); err != nil {
330			return err
331		}
332		missingValue -= expectedInterval
333	}
334
335	return nil
336}
337
338// find where a new bin should go
339func (h *Histogram) InternalFind(hb *Bin) (bool, uint16) {
340	if h.used == 0 {
341		return false, 0
342	}
343	f2 := hb.fastl2()
344	if h.lookup[f2.l1] != nil {
345		if idx := h.lookup[f2.l1][f2.l2]; idx != 0 {
346			return true, idx - 1
347		}
348	}
349	rv := -1
350	idx := uint16(0)
351	l := int(0)
352	r := int(h.used - 1)
353	for l < r {
354		check := (r + l) / 2
355		rv = h.bvs[check].Compare(hb)
356		if rv == 0 {
357			l = check
358			r = check
359		} else if rv > 0 {
360			l = check + 1
361		} else {
362			r = check - 1
363		}
364	}
365	if rv != 0 {
366		rv = h.bvs[l].Compare(hb)
367	}
368	idx = uint16(l)
369	if rv == 0 {
370		return true, idx
371	}
372	if rv < 0 {
373		return false, idx
374	}
375	idx++
376	return false, idx
377}
378
379func (h *Histogram) InsertBin(hb *Bin, count int64) uint64 {
380	if h.useLocks {
381		h.mutex.Lock()
382		defer h.mutex.Unlock()
383	}
384	found, idx := h.InternalFind(hb)
385	if !found {
386		if h.used == h.allocd {
387			new_bvs := make([]Bin, h.allocd+DEFAULT_HIST_SIZE)
388			if idx > 0 {
389				copy(new_bvs[0:], h.bvs[0:idx])
390			}
391			if idx < h.used {
392				copy(new_bvs[idx+1:], h.bvs[idx:])
393			}
394			h.allocd = h.allocd + DEFAULT_HIST_SIZE
395			h.bvs = new_bvs
396		} else {
397			copy(h.bvs[idx+1:], h.bvs[idx:h.used])
398		}
399		h.bvs[idx].val = hb.val
400		h.bvs[idx].exp = hb.exp
401		h.bvs[idx].count = uint64(count)
402		h.used++
403		for i := idx; i < h.used; i++ {
404			f2 := h.bvs[i].fastl2()
405			if h.lookup[f2.l1] == nil {
406				h.lookup[f2.l1] = make([]uint16, 256)
407			}
408			h.lookup[f2.l1][f2.l2] = uint16(i) + 1
409		}
410		return h.bvs[idx].count
411	}
412	var newval uint64
413	if count >= 0 {
414		newval = h.bvs[idx].count + uint64(count)
415	} else {
416		newval = h.bvs[idx].count - uint64(-count)
417	}
418	if newval < h.bvs[idx].count { //rolled
419		newval = ^uint64(0)
420	}
421	h.bvs[idx].count = newval
422	return newval - h.bvs[idx].count
423}
424
425// RecordIntScales records n occurrences of the given value, returning an error if
426// the value is out of range.
427func (h *Histogram) RecordIntScales(val, scale int, n int64) error {
428	sign := 1
429	if val == 0 {
430		scale = 0
431	} else {
432		if val < 0 {
433			val = 0 - val
434			sign = -1
435		}
436		if val < 10 {
437			val *= 10
438			scale -= 1
439		}
440		for val > 100 {
441			val /= 10
442			scale++
443		}
444	}
445	if scale < -128 {
446		val = 0
447		scale = 0
448	} else if scale > 127 {
449		val = 0xff
450		scale = 0
451	}
452	val *= sign
453	hb := Bin{val: int8(val), exp: int8(scale), count: 0}
454	h.InsertBin(&hb, n)
455	return nil
456}
457
458// RecordValues records n occurrences of the given value, returning an error if
459// the value is out of range.
460func (h *Histogram) RecordValues(v float64, n int64) error {
461	var hb Bin
462	hb.SetFromFloat64(v)
463	h.InsertBin(&hb, n)
464	return nil
465}
466
467// Approximate mean
468func (h *Histogram) ApproxMean() float64 {
469	if h.useLocks {
470		h.mutex.Lock()
471		defer h.mutex.Unlock()
472	}
473	divisor := 0.0
474	sum := 0.0
475	for i := uint16(0); i < h.used; i++ {
476		midpoint := h.bvs[i].Midpoint()
477		cardinality := float64(h.bvs[i].count)
478		divisor += cardinality
479		sum += midpoint * cardinality
480	}
481	if divisor == 0.0 {
482		return math.NaN()
483	}
484	return sum / divisor
485}
486
487// Approximate sum
488func (h *Histogram) ApproxSum() float64 {
489	if h.useLocks {
490		h.mutex.Lock()
491		defer h.mutex.Unlock()
492	}
493	sum := 0.0
494	for i := uint16(0); i < h.used; i++ {
495		midpoint := h.bvs[i].Midpoint()
496		cardinality := float64(h.bvs[i].count)
497		sum += midpoint * cardinality
498	}
499	return sum
500}
501
502func (h *Histogram) ApproxQuantile(q_in []float64) ([]float64, error) {
503	if h.useLocks {
504		h.mutex.Lock()
505		defer h.mutex.Unlock()
506	}
507	q_out := make([]float64, len(q_in))
508	i_q, i_b := 0, uint16(0)
509	total_cnt, bin_width, bin_left, lower_cnt, upper_cnt := 0.0, 0.0, 0.0, 0.0, 0.0
510	if len(q_in) == 0 {
511		return q_out, nil
512	}
513	// Make sure the requested quantiles are in order
514	for i_q = 1; i_q < len(q_in); i_q++ {
515		if q_in[i_q-1] > q_in[i_q] {
516			return nil, errors.New("out of order")
517		}
518	}
519	// Add up the bins
520	for i_b = 0; i_b < h.used; i_b++ {
521		if !h.bvs[i_b].IsNaN() {
522			total_cnt += float64(h.bvs[i_b].count)
523		}
524	}
525	if total_cnt == 0.0 {
526		return nil, errors.New("empty_histogram")
527	}
528
529	for i_q = 0; i_q < len(q_in); i_q++ {
530		if q_in[i_q] < 0.0 || q_in[i_q] > 1.0 {
531			return nil, errors.New("out of bound quantile")
532		}
533		q_out[i_q] = total_cnt * q_in[i_q]
534	}
535
536	for i_b = 0; i_b < h.used; i_b++ {
537		if h.bvs[i_b].IsNaN() {
538			continue
539		}
540		bin_width = h.bvs[i_b].BinWidth()
541		bin_left = h.bvs[i_b].Left()
542		lower_cnt = upper_cnt
543		upper_cnt = lower_cnt + float64(h.bvs[i_b].count)
544		break
545	}
546	for i_q = 0; i_q < len(q_in); i_q++ {
547		for i_b < (h.used-1) && upper_cnt < q_out[i_q] {
548			i_b++
549			bin_width = h.bvs[i_b].BinWidth()
550			bin_left = h.bvs[i_b].Left()
551			lower_cnt = upper_cnt
552			upper_cnt = lower_cnt + float64(h.bvs[i_b].count)
553		}
554		if lower_cnt == q_out[i_q] {
555			q_out[i_q] = bin_left
556		} else if upper_cnt == q_out[i_q] {
557			q_out[i_q] = bin_left + bin_width
558		} else {
559			if bin_width == 0 {
560				q_out[i_q] = bin_left
561			} else {
562				q_out[i_q] = bin_left + (q_out[i_q]-lower_cnt)/(upper_cnt-lower_cnt)*bin_width
563			}
564		}
565	}
566	return q_out, nil
567}
568
569// ValueAtQuantile returns the recorded value at the given quantile (0..1).
570func (h *Histogram) ValueAtQuantile(q float64) float64 {
571	if h.useLocks {
572		h.mutex.Lock()
573		defer h.mutex.Unlock()
574	}
575	q_in := make([]float64, 1)
576	q_in[0] = q
577	q_out, err := h.ApproxQuantile(q_in)
578	if err == nil && len(q_out) == 1 {
579		return q_out[0]
580	}
581	return math.NaN()
582}
583
584// SignificantFigures returns the significant figures used to create the
585// histogram
586// CH Compat
587func (h *Histogram) SignificantFigures() int64 {
588	return 2
589}
590
591// Equals returns true if the two Histograms are equivalent, false if not.
592func (h *Histogram) Equals(other *Histogram) bool {
593	if h.useLocks {
594		h.mutex.Lock()
595		defer h.mutex.Unlock()
596	}
597	if other.useLocks {
598		other.mutex.Lock()
599		defer other.mutex.Unlock()
600	}
601	switch {
602	case
603		h.used != other.used:
604		return false
605	default:
606		for i := uint16(0); i < h.used; i++ {
607			if h.bvs[i].Compare(&other.bvs[i]) != 0 {
608				return false
609			}
610			if h.bvs[i].count != other.bvs[i].count {
611				return false
612			}
613		}
614	}
615	return true
616}
617
618func (h *Histogram) CopyAndReset() *Histogram {
619	if h.useLocks {
620		h.mutex.Lock()
621		defer h.mutex.Unlock()
622	}
623	newhist := &Histogram{
624		allocd: h.allocd,
625		used:   h.used,
626		bvs:    h.bvs,
627	}
628	h.allocd = DEFAULT_HIST_SIZE
629	h.bvs = make([]Bin, DEFAULT_HIST_SIZE)
630	h.used = 0
631	for i := 0; i < 256; i++ {
632		if h.lookup[i] != nil {
633			for j := range h.lookup[i] {
634				h.lookup[i][j] = 0
635			}
636		}
637	}
638	return newhist
639}
640func (h *Histogram) DecStrings() []string {
641	if h.useLocks {
642		h.mutex.Lock()
643		defer h.mutex.Unlock()
644	}
645	out := make([]string, h.used)
646	for i, bin := range h.bvs[0:h.used] {
647		var buffer bytes.Buffer
648		buffer.WriteString("H[")
649		buffer.WriteString(fmt.Sprintf("%3.1e", bin.Value()))
650		buffer.WriteString("]=")
651		buffer.WriteString(fmt.Sprintf("%v", bin.count))
652		out[i] = buffer.String()
653	}
654	return out
655}
656
657// takes the output of DecStrings and deserializes it into a Bin struct slice
658func stringsToBin(strs []string) ([]Bin, error) {
659
660	bins := make([]Bin, len(strs))
661	for i, str := range strs {
662
663		// H[0.0e+00]=1
664
665		// H[0.0e+00]= <1>
666		countString := strings.Split(str, "=")[1]
667		countInt, err := strconv.ParseInt(countString, 10, 64)
668		if err != nil {
669			return nil, err
670		}
671
672		// H[ <0.0> e+00]=1
673		valString := strings.Split(strings.Split(strings.Split(str, "=")[0], "e")[0], "[")[1]
674		valInt, err := strconv.ParseFloat(valString, 64)
675		if err != nil {
676			return nil, err
677		}
678
679		// H[0.0e <+00> ]=1
680		expString := strings.Split(strings.Split(strings.Split(str, "=")[0], "e")[1], "]")[0]
681		expInt, err := strconv.ParseInt(expString, 10, 8)
682		if err != nil {
683			return nil, err
684		}
685		bins[i] = *NewBinRaw(int8(valInt*10), int8(expInt), uint64(countInt))
686	}
687
688	return bins, nil
689}
690