1// Copyright 2016, Circonus, Inc. All rights reserved. 2// See the LICENSE file. 3 4// Package circllhist provides an implementation of Circonus' fixed log-linear 5// histogram data structure. This allows tracking of histograms in a 6// composable way such that accurate error can be reasoned about. 7package circonusllhist 8 9import ( 10 "bytes" 11 "errors" 12 "fmt" 13 "math" 14 "strconv" 15 "strings" 16 "sync" 17) 18 19const ( 20 DEFAULT_HIST_SIZE = uint16(100) 21) 22 23var power_of_ten = [...]float64{ 24 1, 10, 100, 1000, 10000, 100000, 1e+06, 1e+07, 1e+08, 1e+09, 1e+10, 25 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 26 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 27 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 28 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 29 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 30 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 31 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, 32 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 33 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, 34 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109, 35 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 36 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 37 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 38 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 39 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 40 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 41 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 42 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 43 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 44 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 45 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 46 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 47 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 48 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 49 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 50 1e-05, 0.0001, 0.001, 0.01, 0.1, 51} 52 53// A Bracket is a part of a cumulative distribution. 54type Bin struct { 55 val int8 56 exp int8 57 count uint64 58} 59 60func NewBinRaw(val int8, exp int8, count uint64) *Bin { 61 return &Bin{ 62 val: val, 63 exp: exp, 64 count: count, 65 } 66} 67func NewBin() *Bin { 68 return NewBinRaw(0, 0, 0) 69} 70func NewBinFromFloat64(d float64) *Bin { 71 hb := NewBinRaw(0, 0, 0) 72 hb.SetFromFloat64(d) 73 return hb 74} 75 76type FastL2 struct { 77 l1, l2 int 78} 79 80func (hb *Bin) fastl2() FastL2 { 81 return FastL2{l1: int(uint8(hb.exp)), l2: int(uint8(hb.val))} 82} 83func (hb *Bin) SetFromFloat64(d float64) *Bin { 84 hb.val = -1 85 if math.IsInf(d, 0) || math.IsNaN(d) { 86 return hb 87 } 88 if d == 0.0 { 89 hb.val = 0 90 return hb 91 } 92 sign := 1 93 if math.Signbit(d) { 94 sign = -1 95 } 96 d = math.Abs(d) 97 big_exp := int(math.Floor(math.Log10(d))) 98 hb.exp = int8(big_exp) 99 if int(hb.exp) != big_exp { //rolled 100 hb.exp = 0 101 if big_exp < 0 { 102 hb.val = 0 103 } 104 return hb 105 } 106 d = d / hb.PowerOfTen() 107 d = d * 10 108 hb.val = int8(sign * int(math.Floor(d+1e-13))) 109 if hb.val == 100 || hb.val == -100 { 110 if hb.exp < 127 { 111 hb.val = hb.val / 10 112 hb.exp++ 113 } else { 114 hb.val = 0 115 hb.exp = 0 116 } 117 } 118 if hb.val == 0 { 119 hb.exp = 0 120 return hb 121 } 122 if !((hb.val >= 10 && hb.val < 100) || 123 (hb.val <= -10 && hb.val > -100)) { 124 hb.val = -1 125 hb.exp = 0 126 } 127 return hb 128} 129func (hb *Bin) PowerOfTen() float64 { 130 idx := int(uint8(hb.exp)) 131 return power_of_ten[idx] 132} 133 134func (hb *Bin) IsNaN() bool { 135 if hb.val > 99 || hb.val < -99 { 136 return true 137 } 138 return false 139} 140func (hb *Bin) Val() int8 { 141 return hb.val 142} 143func (hb *Bin) Exp() int8 { 144 return hb.exp 145} 146func (hb *Bin) Count() uint64 { 147 return hb.count 148} 149func (hb *Bin) Value() float64 { 150 if hb.IsNaN() { 151 return math.NaN() 152 } 153 if hb.val < 10 && hb.val > -10 { 154 return 0.0 155 } 156 return (float64(hb.val) / 10.0) * hb.PowerOfTen() 157} 158func (hb *Bin) BinWidth() float64 { 159 if hb.IsNaN() { 160 return math.NaN() 161 } 162 if hb.val < 10 && hb.val > -10 { 163 return 0.0 164 } 165 return hb.PowerOfTen() / 10.0 166} 167func (hb *Bin) Midpoint() float64 { 168 if hb.IsNaN() { 169 return math.NaN() 170 } 171 out := hb.Value() 172 if out == 0 { 173 return 0 174 } 175 interval := hb.BinWidth() 176 if out < 0 { 177 interval = interval * -1 178 } 179 return out + interval/2.0 180} 181func (hb *Bin) Left() float64 { 182 if hb.IsNaN() { 183 return math.NaN() 184 } 185 out := hb.Value() 186 if out >= 0 { 187 return out 188 } 189 return out - hb.BinWidth() 190} 191 192func (h1 *Bin) Compare(h2 *Bin) int { 193 var v1, v2 int 194 195 // slide exp positive, 196 // shift by size of val 197 // multiple by (val != 0) 198 // then add or subtract val accordingly 199 200 if h1.val >= 0 { 201 v1 = ((int(h1.exp)+256)<<8)*int(((h1.val|(^h1.val+1))>>8)&1) + int(h1.val) 202 } else { 203 v1 = ((int(h1.exp)+256)<<8)*int(((h1.val|(^h1.val+1))>>8)&1) - int(h1.val) 204 } 205 if h2.val >= 0 { 206 v2 = ((int(h2.exp)+256)<<8)*int(((h2.val|(^h2.val+1))>>8)&1) + int(h2.val) 207 } else { 208 v2 = ((int(h2.exp)+256)<<8)*int(((h2.val|(^h2.val+1))>>8)&1) - int(h2.val) 209 } 210 211 // return the difference 212 return v2 - v1 213} 214 215// This histogram structure tracks values are two decimal digits of precision 216// with a bounded error that remains bounded upon composition 217type Histogram struct { 218 bvs []Bin 219 used uint16 220 allocd uint16 221 222 lookup [256][]uint16 223 224 mutex sync.Mutex 225 useLocks bool 226} 227 228// New returns a new Histogram 229func New() *Histogram { 230 return &Histogram{ 231 allocd: DEFAULT_HIST_SIZE, 232 used: 0, 233 bvs: make([]Bin, DEFAULT_HIST_SIZE), 234 useLocks: true, 235 } 236} 237 238// New returns a Histogram without locking 239func NewNoLocks() *Histogram { 240 return &Histogram{ 241 allocd: DEFAULT_HIST_SIZE, 242 used: 0, 243 bvs: make([]Bin, DEFAULT_HIST_SIZE), 244 useLocks: false, 245 } 246} 247 248// NewFromStrings returns a Histogram created from DecStrings strings 249func NewFromStrings(strs []string, locks bool) (*Histogram, error) { 250 251 bin, err := stringsToBin(strs) 252 if err != nil { 253 return nil, err 254 } 255 256 return newFromBins(bin, locks), nil 257} 258 259// NewFromBins returns a Histogram created from a bins struct slice 260func newFromBins(bins []Bin, locks bool) *Histogram { 261 return &Histogram{ 262 allocd: uint16(len(bins) + 10), // pad it with 10 263 used: uint16(len(bins)), 264 bvs: bins, 265 useLocks: locks, 266 } 267} 268 269// Max returns the approximate maximum recorded value. 270func (h *Histogram) Max() float64 { 271 return h.ValueAtQuantile(1.0) 272} 273 274// Min returns the approximate minimum recorded value. 275func (h *Histogram) Min() float64 { 276 return h.ValueAtQuantile(0.0) 277} 278 279// Mean returns the approximate arithmetic mean of the recorded values. 280func (h *Histogram) Mean() float64 { 281 return h.ApproxMean() 282} 283 284// Reset forgets all bins in the histogram (they remain allocated) 285func (h *Histogram) Reset() { 286 if h.useLocks { 287 h.mutex.Lock() 288 defer h.mutex.Unlock() 289 } 290 for i := 0; i < 256; i++ { 291 if h.lookup[i] != nil { 292 for j := range h.lookup[i] { 293 h.lookup[i][j] = 0 294 } 295 } 296 } 297 h.used = 0 298} 299 300// RecordIntScale records an integer scaler value, returning an error if the 301// value is out of range. 302func (h *Histogram) RecordIntScale(val, scale int) error { 303 return h.RecordIntScales(val, scale, 1) 304} 305 306// RecordValue records the given value, returning an error if the value is out 307// of range. 308func (h *Histogram) RecordValue(v float64) error { 309 return h.RecordValues(v, 1) 310} 311 312// RecordCorrectedValue records the given value, correcting for stalls in the 313// recording process. This only works for processes which are recording values 314// at an expected interval (e.g., doing jitter analysis). Processes which are 315// recording ad-hoc values (e.g., latency for incoming requests) can't take 316// advantage of this. 317// CH Compat 318func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error { 319 if err := h.RecordValue(float64(v)); err != nil { 320 return err 321 } 322 323 if expectedInterval <= 0 || v <= expectedInterval { 324 return nil 325 } 326 327 missingValue := v - expectedInterval 328 for missingValue >= expectedInterval { 329 if err := h.RecordValue(float64(missingValue)); err != nil { 330 return err 331 } 332 missingValue -= expectedInterval 333 } 334 335 return nil 336} 337 338// find where a new bin should go 339func (h *Histogram) InternalFind(hb *Bin) (bool, uint16) { 340 if h.used == 0 { 341 return false, 0 342 } 343 f2 := hb.fastl2() 344 if h.lookup[f2.l1] != nil { 345 if idx := h.lookup[f2.l1][f2.l2]; idx != 0 { 346 return true, idx - 1 347 } 348 } 349 rv := -1 350 idx := uint16(0) 351 l := int(0) 352 r := int(h.used - 1) 353 for l < r { 354 check := (r + l) / 2 355 rv = h.bvs[check].Compare(hb) 356 if rv == 0 { 357 l = check 358 r = check 359 } else if rv > 0 { 360 l = check + 1 361 } else { 362 r = check - 1 363 } 364 } 365 if rv != 0 { 366 rv = h.bvs[l].Compare(hb) 367 } 368 idx = uint16(l) 369 if rv == 0 { 370 return true, idx 371 } 372 if rv < 0 { 373 return false, idx 374 } 375 idx++ 376 return false, idx 377} 378 379func (h *Histogram) InsertBin(hb *Bin, count int64) uint64 { 380 if h.useLocks { 381 h.mutex.Lock() 382 defer h.mutex.Unlock() 383 } 384 found, idx := h.InternalFind(hb) 385 if !found { 386 if h.used == h.allocd { 387 new_bvs := make([]Bin, h.allocd+DEFAULT_HIST_SIZE) 388 if idx > 0 { 389 copy(new_bvs[0:], h.bvs[0:idx]) 390 } 391 if idx < h.used { 392 copy(new_bvs[idx+1:], h.bvs[idx:]) 393 } 394 h.allocd = h.allocd + DEFAULT_HIST_SIZE 395 h.bvs = new_bvs 396 } else { 397 copy(h.bvs[idx+1:], h.bvs[idx:h.used]) 398 } 399 h.bvs[idx].val = hb.val 400 h.bvs[idx].exp = hb.exp 401 h.bvs[idx].count = uint64(count) 402 h.used++ 403 for i := idx; i < h.used; i++ { 404 f2 := h.bvs[i].fastl2() 405 if h.lookup[f2.l1] == nil { 406 h.lookup[f2.l1] = make([]uint16, 256) 407 } 408 h.lookup[f2.l1][f2.l2] = uint16(i) + 1 409 } 410 return h.bvs[idx].count 411 } 412 var newval uint64 413 if count >= 0 { 414 newval = h.bvs[idx].count + uint64(count) 415 } else { 416 newval = h.bvs[idx].count - uint64(-count) 417 } 418 if newval < h.bvs[idx].count { //rolled 419 newval = ^uint64(0) 420 } 421 h.bvs[idx].count = newval 422 return newval - h.bvs[idx].count 423} 424 425// RecordIntScales records n occurrences of the given value, returning an error if 426// the value is out of range. 427func (h *Histogram) RecordIntScales(val, scale int, n int64) error { 428 sign := 1 429 if val == 0 { 430 scale = 0 431 } else { 432 if val < 0 { 433 val = 0 - val 434 sign = -1 435 } 436 if val < 10 { 437 val *= 10 438 scale -= 1 439 } 440 for val > 100 { 441 val /= 10 442 scale++ 443 } 444 } 445 if scale < -128 { 446 val = 0 447 scale = 0 448 } else if scale > 127 { 449 val = 0xff 450 scale = 0 451 } 452 val *= sign 453 hb := Bin{val: int8(val), exp: int8(scale), count: 0} 454 h.InsertBin(&hb, n) 455 return nil 456} 457 458// RecordValues records n occurrences of the given value, returning an error if 459// the value is out of range. 460func (h *Histogram) RecordValues(v float64, n int64) error { 461 var hb Bin 462 hb.SetFromFloat64(v) 463 h.InsertBin(&hb, n) 464 return nil 465} 466 467// Approximate mean 468func (h *Histogram) ApproxMean() float64 { 469 if h.useLocks { 470 h.mutex.Lock() 471 defer h.mutex.Unlock() 472 } 473 divisor := 0.0 474 sum := 0.0 475 for i := uint16(0); i < h.used; i++ { 476 midpoint := h.bvs[i].Midpoint() 477 cardinality := float64(h.bvs[i].count) 478 divisor += cardinality 479 sum += midpoint * cardinality 480 } 481 if divisor == 0.0 { 482 return math.NaN() 483 } 484 return sum / divisor 485} 486 487// Approximate sum 488func (h *Histogram) ApproxSum() float64 { 489 if h.useLocks { 490 h.mutex.Lock() 491 defer h.mutex.Unlock() 492 } 493 sum := 0.0 494 for i := uint16(0); i < h.used; i++ { 495 midpoint := h.bvs[i].Midpoint() 496 cardinality := float64(h.bvs[i].count) 497 sum += midpoint * cardinality 498 } 499 return sum 500} 501 502func (h *Histogram) ApproxQuantile(q_in []float64) ([]float64, error) { 503 if h.useLocks { 504 h.mutex.Lock() 505 defer h.mutex.Unlock() 506 } 507 q_out := make([]float64, len(q_in)) 508 i_q, i_b := 0, uint16(0) 509 total_cnt, bin_width, bin_left, lower_cnt, upper_cnt := 0.0, 0.0, 0.0, 0.0, 0.0 510 if len(q_in) == 0 { 511 return q_out, nil 512 } 513 // Make sure the requested quantiles are in order 514 for i_q = 1; i_q < len(q_in); i_q++ { 515 if q_in[i_q-1] > q_in[i_q] { 516 return nil, errors.New("out of order") 517 } 518 } 519 // Add up the bins 520 for i_b = 0; i_b < h.used; i_b++ { 521 if !h.bvs[i_b].IsNaN() { 522 total_cnt += float64(h.bvs[i_b].count) 523 } 524 } 525 if total_cnt == 0.0 { 526 return nil, errors.New("empty_histogram") 527 } 528 529 for i_q = 0; i_q < len(q_in); i_q++ { 530 if q_in[i_q] < 0.0 || q_in[i_q] > 1.0 { 531 return nil, errors.New("out of bound quantile") 532 } 533 q_out[i_q] = total_cnt * q_in[i_q] 534 } 535 536 for i_b = 0; i_b < h.used; i_b++ { 537 if h.bvs[i_b].IsNaN() { 538 continue 539 } 540 bin_width = h.bvs[i_b].BinWidth() 541 bin_left = h.bvs[i_b].Left() 542 lower_cnt = upper_cnt 543 upper_cnt = lower_cnt + float64(h.bvs[i_b].count) 544 break 545 } 546 for i_q = 0; i_q < len(q_in); i_q++ { 547 for i_b < (h.used-1) && upper_cnt < q_out[i_q] { 548 i_b++ 549 bin_width = h.bvs[i_b].BinWidth() 550 bin_left = h.bvs[i_b].Left() 551 lower_cnt = upper_cnt 552 upper_cnt = lower_cnt + float64(h.bvs[i_b].count) 553 } 554 if lower_cnt == q_out[i_q] { 555 q_out[i_q] = bin_left 556 } else if upper_cnt == q_out[i_q] { 557 q_out[i_q] = bin_left + bin_width 558 } else { 559 if bin_width == 0 { 560 q_out[i_q] = bin_left 561 } else { 562 q_out[i_q] = bin_left + (q_out[i_q]-lower_cnt)/(upper_cnt-lower_cnt)*bin_width 563 } 564 } 565 } 566 return q_out, nil 567} 568 569// ValueAtQuantile returns the recorded value at the given quantile (0..1). 570func (h *Histogram) ValueAtQuantile(q float64) float64 { 571 if h.useLocks { 572 h.mutex.Lock() 573 defer h.mutex.Unlock() 574 } 575 q_in := make([]float64, 1) 576 q_in[0] = q 577 q_out, err := h.ApproxQuantile(q_in) 578 if err == nil && len(q_out) == 1 { 579 return q_out[0] 580 } 581 return math.NaN() 582} 583 584// SignificantFigures returns the significant figures used to create the 585// histogram 586// CH Compat 587func (h *Histogram) SignificantFigures() int64 { 588 return 2 589} 590 591// Equals returns true if the two Histograms are equivalent, false if not. 592func (h *Histogram) Equals(other *Histogram) bool { 593 if h.useLocks { 594 h.mutex.Lock() 595 defer h.mutex.Unlock() 596 } 597 if other.useLocks { 598 other.mutex.Lock() 599 defer other.mutex.Unlock() 600 } 601 switch { 602 case 603 h.used != other.used: 604 return false 605 default: 606 for i := uint16(0); i < h.used; i++ { 607 if h.bvs[i].Compare(&other.bvs[i]) != 0 { 608 return false 609 } 610 if h.bvs[i].count != other.bvs[i].count { 611 return false 612 } 613 } 614 } 615 return true 616} 617 618func (h *Histogram) CopyAndReset() *Histogram { 619 if h.useLocks { 620 h.mutex.Lock() 621 defer h.mutex.Unlock() 622 } 623 newhist := &Histogram{ 624 allocd: h.allocd, 625 used: h.used, 626 bvs: h.bvs, 627 } 628 h.allocd = DEFAULT_HIST_SIZE 629 h.bvs = make([]Bin, DEFAULT_HIST_SIZE) 630 h.used = 0 631 for i := 0; i < 256; i++ { 632 if h.lookup[i] != nil { 633 for j := range h.lookup[i] { 634 h.lookup[i][j] = 0 635 } 636 } 637 } 638 return newhist 639} 640func (h *Histogram) DecStrings() []string { 641 if h.useLocks { 642 h.mutex.Lock() 643 defer h.mutex.Unlock() 644 } 645 out := make([]string, h.used) 646 for i, bin := range h.bvs[0:h.used] { 647 var buffer bytes.Buffer 648 buffer.WriteString("H[") 649 buffer.WriteString(fmt.Sprintf("%3.1e", bin.Value())) 650 buffer.WriteString("]=") 651 buffer.WriteString(fmt.Sprintf("%v", bin.count)) 652 out[i] = buffer.String() 653 } 654 return out 655} 656 657// takes the output of DecStrings and deserializes it into a Bin struct slice 658func stringsToBin(strs []string) ([]Bin, error) { 659 660 bins := make([]Bin, len(strs)) 661 for i, str := range strs { 662 663 // H[0.0e+00]=1 664 665 // H[0.0e+00]= <1> 666 countString := strings.Split(str, "=")[1] 667 countInt, err := strconv.ParseInt(countString, 10, 64) 668 if err != nil { 669 return nil, err 670 } 671 672 // H[ <0.0> e+00]=1 673 valString := strings.Split(strings.Split(strings.Split(str, "=")[0], "e")[0], "[")[1] 674 valInt, err := strconv.ParseFloat(valString, 64) 675 if err != nil { 676 return nil, err 677 } 678 679 // H[0.0e <+00> ]=1 680 expString := strings.Split(strings.Split(strings.Split(str, "=")[0], "e")[1], "]")[0] 681 expInt, err := strconv.ParseInt(expString, 10, 8) 682 if err != nil { 683 return nil, err 684 } 685 bins[i] = *NewBinRaw(int8(valInt*10), int8(expInt), uint64(countInt)) 686 } 687 688 return bins, nil 689} 690