1// Copyright 2016, Circonus, Inc. All rights reserved.
2// See the LICENSE file.
3
4// Package circllhist provides an implementation of Circonus' fixed log-linear
5// histogram data structure.  This allows tracking of histograms in a
6// composable way such that accurate error can be reasoned about.
7package circonusllhist
8
9import (
10	"bytes"
11	"errors"
12	"fmt"
13	"math"
14	"sync"
15)
16
17const (
18	DEFAULT_HIST_SIZE = int16(100)
19)
20
21var power_of_ten = [...]float64{
22	1, 10, 100, 1000, 10000, 100000, 1e+06, 1e+07, 1e+08, 1e+09, 1e+10,
23	1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20,
24	1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30,
25	1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
26	1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50,
27	1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
28	1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70,
29	1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
30	1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90,
31	1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
32	1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109,
33	1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118,
34	1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127,
35	1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120,
36	1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,
37	1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102,
38	1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96,
39	1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86,
40	1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76,
41	1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66,
42	1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56,
43	1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46,
44	1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36,
45	1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26,
46	1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16,
47	1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06,
48	1e-05, 0.0001, 0.001, 0.01, 0.1,
49}
50
51// A Bracket is a part of a cumulative distribution.
52type Bin struct {
53	val   int8
54	exp   int8
55	count uint64
56}
57
58func NewBinRaw(val int8, exp int8, count uint64) *Bin {
59	return &Bin{
60		val:   val,
61		exp:   exp,
62		count: count,
63	}
64}
65func NewBin() *Bin {
66	return NewBinRaw(0, 0, 0)
67}
68func NewBinFromFloat64(d float64) *Bin {
69	hb := NewBinRaw(0, 0, 0)
70	hb.SetFromFloat64(d)
71	return hb
72}
73func (hb *Bin) SetFromFloat64(d float64) *Bin {
74	hb.val = -1
75	if math.IsInf(d, 0) || math.IsNaN(d) {
76		return hb
77	}
78	if d == 0.0 {
79		hb.val = 0
80		return hb
81	}
82	sign := 1
83	if math.Signbit(d) {
84		sign = -1
85	}
86	d = math.Abs(d)
87	big_exp := int(math.Floor(math.Log10(d)))
88	hb.exp = int8(big_exp)
89	if int(hb.exp) != big_exp { //rolled
90		hb.exp = 0
91		if big_exp < 0 {
92			hb.val = 0
93		}
94		return hb
95	}
96	d = d / hb.PowerOfTen()
97	d = d * 10
98	hb.val = int8(sign * int(math.Floor(d+1e-13)))
99	if hb.val == 100 || hb.val == -100 {
100		if hb.exp < 127 {
101			hb.val = hb.val / 10
102			hb.exp++
103		} else {
104			hb.val = 0
105			hb.exp = 0
106		}
107	}
108	if hb.val == 0 {
109		hb.exp = 0
110		return hb
111	}
112	if !((hb.val >= 10 && hb.val < 100) ||
113		(hb.val <= -10 && hb.val > -100)) {
114		hb.val = -1
115		hb.exp = 0
116	}
117	return hb
118}
119func (hb *Bin) PowerOfTen() float64 {
120	idx := int(hb.exp)
121	if idx < 0 {
122		idx = 256 + idx
123	}
124	return power_of_ten[idx]
125}
126
127func (hb *Bin) IsNaN() bool {
128	if hb.val > 99 || hb.val < -99 {
129		return true
130	}
131	return false
132}
133func (hb *Bin) Val() int8 {
134	return hb.val
135}
136func (hb *Bin) Exp() int8 {
137	return hb.exp
138}
139func (hb *Bin) Count() uint64 {
140	return hb.count
141}
142func (hb *Bin) Value() float64 {
143	if hb.IsNaN() {
144		return math.NaN()
145	}
146	if hb.val < 10 && hb.val > -10 {
147		return 0.0
148	}
149	return (float64(hb.val) / 10.0) * hb.PowerOfTen()
150}
151func (hb *Bin) BinWidth() float64 {
152	if hb.IsNaN() {
153		return math.NaN()
154	}
155	if hb.val < 10 && hb.val > -10 {
156		return 0.0
157	}
158	return hb.PowerOfTen() / 10.0
159}
160func (hb *Bin) Midpoint() float64 {
161	if hb.IsNaN() {
162		return math.NaN()
163	}
164	out := hb.Value()
165	if out == 0 {
166		return 0
167	}
168	interval := hb.BinWidth()
169	if out < 0 {
170		interval = interval * -1
171	}
172	return out + interval/2.0
173}
174func (hb *Bin) Left() float64 {
175	if hb.IsNaN() {
176		return math.NaN()
177	}
178	out := hb.Value()
179	if out >= 0 {
180		return out
181	}
182	return out - hb.BinWidth()
183}
184
185func (h1 *Bin) Compare(h2 *Bin) int {
186	if h1.val == h2.val && h1.exp == h2.exp {
187		return 0
188	}
189	if h1.val == -1 {
190		return 1
191	}
192	if h2.val == -1 {
193		return -1
194	}
195	if h1.val == 0 {
196		if h2.val > 0 {
197			return 1
198		}
199		return -1
200	}
201	if h2.val == 0 {
202		if h1.val < 0 {
203			return 1
204		}
205		return -1
206	}
207	if h1.val < 0 && h2.val > 0 {
208		return 1
209	}
210	if h1.val > 0 && h2.val < 0 {
211		return -1
212	}
213	if h1.exp == h2.exp {
214		if h1.val < h2.val {
215			return 1
216		}
217		return -1
218	}
219	if h1.exp > h2.exp {
220		if h1.val < 0 {
221			return 1
222		}
223		return -1
224	}
225	if h1.exp < h2.exp {
226		if h1.val < 0 {
227			return -1
228		}
229		return 1
230	}
231	return 0
232}
233
234// This histogram structure tracks values are two decimal digits of precision
235// with a bounded error that remains bounded upon composition
236type Histogram struct {
237	mutex  sync.Mutex
238	bvs    []Bin
239	used   int16
240	allocd int16
241}
242
243// New returns a new Histogram
244func New() *Histogram {
245	return &Histogram{
246		allocd: DEFAULT_HIST_SIZE,
247		used:   0,
248		bvs:    make([]Bin, DEFAULT_HIST_SIZE),
249	}
250}
251
252// Max returns the approximate maximum recorded value.
253func (h *Histogram) Max() float64 {
254	return h.ValueAtQuantile(1.0)
255}
256
257// Min returns the approximate minimum recorded value.
258func (h *Histogram) Min() float64 {
259	return h.ValueAtQuantile(0.0)
260}
261
262// Mean returns the approximate arithmetic mean of the recorded values.
263func (h *Histogram) Mean() float64 {
264	return h.ApproxMean()
265}
266
267// Reset forgets all bins in the histogram (they remain allocated)
268func (h *Histogram) Reset() {
269	h.mutex.Lock()
270	h.used = 0
271	h.mutex.Unlock()
272}
273
274// RecordValue records the given value, returning an error if the value is out
275// of range.
276func (h *Histogram) RecordValue(v float64) error {
277	return h.RecordValues(v, 1)
278}
279
280// RecordCorrectedValue records the given value, correcting for stalls in the
281// recording process. This only works for processes which are recording values
282// at an expected interval (e.g., doing jitter analysis). Processes which are
283// recording ad-hoc values (e.g., latency for incoming requests) can't take
284// advantage of this.
285// CH Compat
286func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error {
287	if err := h.RecordValue(float64(v)); err != nil {
288		return err
289	}
290
291	if expectedInterval <= 0 || v <= expectedInterval {
292		return nil
293	}
294
295	missingValue := v - expectedInterval
296	for missingValue >= expectedInterval {
297		if err := h.RecordValue(float64(missingValue)); err != nil {
298			return err
299		}
300		missingValue -= expectedInterval
301	}
302
303	return nil
304}
305
306// find where a new bin should go
307func (h *Histogram) InternalFind(hb *Bin) (bool, int16) {
308	if h.used == 0 {
309		return false, 0
310	}
311	rv := -1
312	idx := int16(0)
313	l := int16(0)
314	r := h.used - 1
315	for l < r {
316		check := (r + l) / 2
317		rv = h.bvs[check].Compare(hb)
318		if rv == 0 {
319			l = check
320			r = check
321		} else if rv > 0 {
322			l = check + 1
323		} else {
324			r = check - 1
325		}
326	}
327	if rv != 0 {
328		rv = h.bvs[l].Compare(hb)
329	}
330	idx = l
331	if rv == 0 {
332		return true, idx
333	}
334	if rv < 0 {
335		return false, idx
336	}
337	idx++
338	return false, idx
339}
340
341func (h *Histogram) InsertBin(hb *Bin, count int64) uint64 {
342	h.mutex.Lock()
343	defer h.mutex.Unlock()
344	if count == 0 {
345		return 0
346	}
347	found, idx := h.InternalFind(hb)
348	if !found {
349		if h.used == h.allocd {
350			new_bvs := make([]Bin, h.allocd+DEFAULT_HIST_SIZE)
351			if idx > 0 {
352				copy(new_bvs[0:], h.bvs[0:idx])
353			}
354			if idx < h.used {
355				copy(new_bvs[idx+1:], h.bvs[idx:])
356			}
357			h.allocd = h.allocd + DEFAULT_HIST_SIZE
358			h.bvs = new_bvs
359		} else {
360			copy(h.bvs[idx+1:], h.bvs[idx:h.used])
361		}
362		h.bvs[idx].val = hb.val
363		h.bvs[idx].exp = hb.exp
364		h.bvs[idx].count = uint64(count)
365		h.used++
366		return h.bvs[idx].count
367	}
368	var newval uint64
369	if count < 0 {
370		newval = h.bvs[idx].count - uint64(-count)
371	} else {
372		newval = h.bvs[idx].count + uint64(count)
373	}
374	if newval < h.bvs[idx].count { //rolled
375		newval = ^uint64(0)
376	}
377	h.bvs[idx].count = newval
378	return newval - h.bvs[idx].count
379}
380
381// RecordValues records n occurrences of the given value, returning an error if
382// the value is out of range.
383func (h *Histogram) RecordValues(v float64, n int64) error {
384	var hb Bin
385	hb.SetFromFloat64(v)
386	h.InsertBin(&hb, n)
387	return nil
388}
389
390// Approximate mean
391func (h *Histogram) ApproxMean() float64 {
392	h.mutex.Lock()
393	defer h.mutex.Unlock()
394	divisor := 0.0
395	sum := 0.0
396	for i := int16(0); i < h.used; i++ {
397		midpoint := h.bvs[i].Midpoint()
398		cardinality := float64(h.bvs[i].count)
399		divisor += cardinality
400		sum += midpoint * cardinality
401	}
402	if divisor == 0.0 {
403		return math.NaN()
404	}
405	return sum / divisor
406}
407
408// Approximate sum
409func (h *Histogram) ApproxSum() float64 {
410	h.mutex.Lock()
411	defer h.mutex.Unlock()
412	sum := 0.0
413	for i := int16(0); i < h.used; i++ {
414		midpoint := h.bvs[i].Midpoint()
415		cardinality := float64(h.bvs[i].count)
416		sum += midpoint * cardinality
417	}
418	return sum
419}
420
421func (h *Histogram) ApproxQuantile(q_in []float64) ([]float64, error) {
422	h.mutex.Lock()
423	defer h.mutex.Unlock()
424	q_out := make([]float64, len(q_in))
425	i_q, i_b := 0, int16(0)
426	total_cnt, bin_width, bin_left, lower_cnt, upper_cnt := 0.0, 0.0, 0.0, 0.0, 0.0
427	if len(q_in) == 0 {
428		return q_out, nil
429	}
430	// Make sure the requested quantiles are in order
431	for i_q = 1; i_q < len(q_in); i_q++ {
432		if q_in[i_q-1] > q_in[i_q] {
433			return nil, errors.New("out of order")
434		}
435	}
436	// Add up the bins
437	for i_b = 0; i_b < h.used; i_b++ {
438		if !h.bvs[i_b].IsNaN() {
439			total_cnt += float64(h.bvs[i_b].count)
440		}
441	}
442	if total_cnt == 0.0 {
443		return nil, errors.New("empty_histogram")
444	}
445
446	for i_q = 0; i_q < len(q_in); i_q++ {
447		if q_in[i_q] < 0.0 || q_in[i_q] > 1.0 {
448			return nil, errors.New("out of bound quantile")
449		}
450		q_out[i_q] = total_cnt * q_in[i_q]
451	}
452
453	for i_b = 0; i_b < h.used; i_b++ {
454		if h.bvs[i_b].IsNaN() {
455			continue
456		}
457		bin_width = h.bvs[i_b].BinWidth()
458		bin_left = h.bvs[i_b].Left()
459		lower_cnt = upper_cnt
460		upper_cnt = lower_cnt + float64(h.bvs[i_b].count)
461		break
462	}
463	for i_q = 0; i_q < len(q_in); i_q++ {
464		for i_b < (h.used-1) && upper_cnt < q_out[i_q] {
465			i_b++
466			bin_width = h.bvs[i_b].BinWidth()
467			bin_left = h.bvs[i_b].Left()
468			lower_cnt = upper_cnt
469			upper_cnt = lower_cnt + float64(h.bvs[i_b].count)
470		}
471		if lower_cnt == q_out[i_q] {
472			q_out[i_q] = bin_left
473		} else if upper_cnt == q_out[i_q] {
474			q_out[i_q] = bin_left + bin_width
475		} else {
476			if bin_width == 0 {
477				q_out[i_q] = bin_left
478			} else {
479				q_out[i_q] = bin_left + (q_out[i_q]-lower_cnt)/(upper_cnt-lower_cnt)*bin_width
480			}
481		}
482	}
483	return q_out, nil
484}
485
486// ValueAtQuantile returns the recorded value at the given quantile (0..1).
487func (h *Histogram) ValueAtQuantile(q float64) float64 {
488	h.mutex.Lock()
489	defer h.mutex.Unlock()
490	q_in := make([]float64, 1)
491	q_in[0] = q
492	q_out, err := h.ApproxQuantile(q_in)
493	if err == nil && len(q_out) == 1 {
494		return q_out[0]
495	}
496	return math.NaN()
497}
498
499// SignificantFigures returns the significant figures used to create the
500// histogram
501// CH Compat
502func (h *Histogram) SignificantFigures() int64 {
503	return 2
504}
505
506// Equals returns true if the two Histograms are equivalent, false if not.
507func (h *Histogram) Equals(other *Histogram) bool {
508	h.mutex.Lock()
509	other.mutex.Lock()
510	defer h.mutex.Unlock()
511	defer other.mutex.Unlock()
512	switch {
513	case
514		h.used != other.used:
515		return false
516	default:
517		for i := int16(0); i < h.used; i++ {
518			if h.bvs[i].Compare(&other.bvs[i]) != 0 {
519				return false
520			}
521			if h.bvs[i].count != other.bvs[i].count {
522				return false
523			}
524		}
525	}
526	return true
527}
528
529func (h *Histogram) CopyAndReset() *Histogram {
530	h.mutex.Lock()
531	defer h.mutex.Unlock()
532	newhist := &Histogram{
533		allocd: h.allocd,
534		used:   h.used,
535		bvs:    h.bvs,
536	}
537	h.allocd = DEFAULT_HIST_SIZE
538	h.bvs = make([]Bin, DEFAULT_HIST_SIZE)
539	h.used = 0
540	return newhist
541}
542func (h *Histogram) DecStrings() []string {
543	h.mutex.Lock()
544	defer h.mutex.Unlock()
545	out := make([]string, h.used)
546	for i, bin := range h.bvs[0:h.used] {
547		var buffer bytes.Buffer
548		buffer.WriteString("H[")
549		buffer.WriteString(fmt.Sprintf("%3.1e", bin.Value()))
550		buffer.WriteString("]=")
551		buffer.WriteString(fmt.Sprintf("%v", bin.count))
552		out[i] = buffer.String()
553	}
554	return out
555}
556