1// Copyright 2016, Circonus, Inc. All rights reserved. 2// See the LICENSE file. 3 4// Package circllhist provides an implementation of Circonus' fixed log-linear 5// histogram data structure. This allows tracking of histograms in a 6// composable way such that accurate error can be reasoned about. 7package circonusllhist 8 9import ( 10 "bytes" 11 "errors" 12 "fmt" 13 "math" 14 "sync" 15) 16 17const ( 18 DEFAULT_HIST_SIZE = int16(100) 19) 20 21var power_of_ten = [...]float64{ 22 1, 10, 100, 1000, 10000, 100000, 1e+06, 1e+07, 1e+08, 1e+09, 1e+10, 23 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 24 1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 25 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40, 26 1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 27 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60, 28 1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 29 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80, 30 1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 31 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100, 32 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109, 33 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 34 1e+119, 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 35 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, 36 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, 37 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 38 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 39 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 40 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 41 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 42 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, 1e-56, 43 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 44 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 45 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 46 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 47 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 48 1e-05, 0.0001, 0.001, 0.01, 0.1, 49} 50 51// A Bracket is a part of a cumulative distribution. 52type Bin struct { 53 val int8 54 exp int8 55 count uint64 56} 57 58func NewBinRaw(val int8, exp int8, count uint64) *Bin { 59 return &Bin{ 60 val: val, 61 exp: exp, 62 count: count, 63 } 64} 65func NewBin() *Bin { 66 return NewBinRaw(0, 0, 0) 67} 68func NewBinFromFloat64(d float64) *Bin { 69 hb := NewBinRaw(0, 0, 0) 70 hb.SetFromFloat64(d) 71 return hb 72} 73func (hb *Bin) SetFromFloat64(d float64) *Bin { 74 hb.val = -1 75 if math.IsInf(d, 0) || math.IsNaN(d) { 76 return hb 77 } 78 if d == 0.0 { 79 hb.val = 0 80 return hb 81 } 82 sign := 1 83 if math.Signbit(d) { 84 sign = -1 85 } 86 d = math.Abs(d) 87 big_exp := int(math.Floor(math.Log10(d))) 88 hb.exp = int8(big_exp) 89 if int(hb.exp) != big_exp { //rolled 90 hb.exp = 0 91 if big_exp < 0 { 92 hb.val = 0 93 } 94 return hb 95 } 96 d = d / hb.PowerOfTen() 97 d = d * 10 98 hb.val = int8(sign * int(math.Floor(d+1e-13))) 99 if hb.val == 100 || hb.val == -100 { 100 if hb.exp < 127 { 101 hb.val = hb.val / 10 102 hb.exp++ 103 } else { 104 hb.val = 0 105 hb.exp = 0 106 } 107 } 108 if hb.val == 0 { 109 hb.exp = 0 110 return hb 111 } 112 if !((hb.val >= 10 && hb.val < 100) || 113 (hb.val <= -10 && hb.val > -100)) { 114 hb.val = -1 115 hb.exp = 0 116 } 117 return hb 118} 119func (hb *Bin) PowerOfTen() float64 { 120 idx := int(hb.exp) 121 if idx < 0 { 122 idx = 256 + idx 123 } 124 return power_of_ten[idx] 125} 126 127func (hb *Bin) IsNaN() bool { 128 if hb.val > 99 || hb.val < -99 { 129 return true 130 } 131 return false 132} 133func (hb *Bin) Val() int8 { 134 return hb.val 135} 136func (hb *Bin) Exp() int8 { 137 return hb.exp 138} 139func (hb *Bin) Count() uint64 { 140 return hb.count 141} 142func (hb *Bin) Value() float64 { 143 if hb.IsNaN() { 144 return math.NaN() 145 } 146 if hb.val < 10 && hb.val > -10 { 147 return 0.0 148 } 149 return (float64(hb.val) / 10.0) * hb.PowerOfTen() 150} 151func (hb *Bin) BinWidth() float64 { 152 if hb.IsNaN() { 153 return math.NaN() 154 } 155 if hb.val < 10 && hb.val > -10 { 156 return 0.0 157 } 158 return hb.PowerOfTen() / 10.0 159} 160func (hb *Bin) Midpoint() float64 { 161 if hb.IsNaN() { 162 return math.NaN() 163 } 164 out := hb.Value() 165 if out == 0 { 166 return 0 167 } 168 interval := hb.BinWidth() 169 if out < 0 { 170 interval = interval * -1 171 } 172 return out + interval/2.0 173} 174func (hb *Bin) Left() float64 { 175 if hb.IsNaN() { 176 return math.NaN() 177 } 178 out := hb.Value() 179 if out >= 0 { 180 return out 181 } 182 return out - hb.BinWidth() 183} 184 185func (h1 *Bin) Compare(h2 *Bin) int { 186 if h1.val == h2.val && h1.exp == h2.exp { 187 return 0 188 } 189 if h1.val == -1 { 190 return 1 191 } 192 if h2.val == -1 { 193 return -1 194 } 195 if h1.val == 0 { 196 if h2.val > 0 { 197 return 1 198 } 199 return -1 200 } 201 if h2.val == 0 { 202 if h1.val < 0 { 203 return 1 204 } 205 return -1 206 } 207 if h1.val < 0 && h2.val > 0 { 208 return 1 209 } 210 if h1.val > 0 && h2.val < 0 { 211 return -1 212 } 213 if h1.exp == h2.exp { 214 if h1.val < h2.val { 215 return 1 216 } 217 return -1 218 } 219 if h1.exp > h2.exp { 220 if h1.val < 0 { 221 return 1 222 } 223 return -1 224 } 225 if h1.exp < h2.exp { 226 if h1.val < 0 { 227 return -1 228 } 229 return 1 230 } 231 return 0 232} 233 234// This histogram structure tracks values are two decimal digits of precision 235// with a bounded error that remains bounded upon composition 236type Histogram struct { 237 mutex sync.Mutex 238 bvs []Bin 239 used int16 240 allocd int16 241} 242 243// New returns a new Histogram 244func New() *Histogram { 245 return &Histogram{ 246 allocd: DEFAULT_HIST_SIZE, 247 used: 0, 248 bvs: make([]Bin, DEFAULT_HIST_SIZE), 249 } 250} 251 252// Max returns the approximate maximum recorded value. 253func (h *Histogram) Max() float64 { 254 return h.ValueAtQuantile(1.0) 255} 256 257// Min returns the approximate minimum recorded value. 258func (h *Histogram) Min() float64 { 259 return h.ValueAtQuantile(0.0) 260} 261 262// Mean returns the approximate arithmetic mean of the recorded values. 263func (h *Histogram) Mean() float64 { 264 return h.ApproxMean() 265} 266 267// Reset forgets all bins in the histogram (they remain allocated) 268func (h *Histogram) Reset() { 269 h.mutex.Lock() 270 h.used = 0 271 h.mutex.Unlock() 272} 273 274// RecordValue records the given value, returning an error if the value is out 275// of range. 276func (h *Histogram) RecordValue(v float64) error { 277 return h.RecordValues(v, 1) 278} 279 280// RecordCorrectedValue records the given value, correcting for stalls in the 281// recording process. This only works for processes which are recording values 282// at an expected interval (e.g., doing jitter analysis). Processes which are 283// recording ad-hoc values (e.g., latency for incoming requests) can't take 284// advantage of this. 285// CH Compat 286func (h *Histogram) RecordCorrectedValue(v, expectedInterval int64) error { 287 if err := h.RecordValue(float64(v)); err != nil { 288 return err 289 } 290 291 if expectedInterval <= 0 || v <= expectedInterval { 292 return nil 293 } 294 295 missingValue := v - expectedInterval 296 for missingValue >= expectedInterval { 297 if err := h.RecordValue(float64(missingValue)); err != nil { 298 return err 299 } 300 missingValue -= expectedInterval 301 } 302 303 return nil 304} 305 306// find where a new bin should go 307func (h *Histogram) InternalFind(hb *Bin) (bool, int16) { 308 if h.used == 0 { 309 return false, 0 310 } 311 rv := -1 312 idx := int16(0) 313 l := int16(0) 314 r := h.used - 1 315 for l < r { 316 check := (r + l) / 2 317 rv = h.bvs[check].Compare(hb) 318 if rv == 0 { 319 l = check 320 r = check 321 } else if rv > 0 { 322 l = check + 1 323 } else { 324 r = check - 1 325 } 326 } 327 if rv != 0 { 328 rv = h.bvs[l].Compare(hb) 329 } 330 idx = l 331 if rv == 0 { 332 return true, idx 333 } 334 if rv < 0 { 335 return false, idx 336 } 337 idx++ 338 return false, idx 339} 340 341func (h *Histogram) InsertBin(hb *Bin, count int64) uint64 { 342 h.mutex.Lock() 343 defer h.mutex.Unlock() 344 if count == 0 { 345 return 0 346 } 347 found, idx := h.InternalFind(hb) 348 if !found { 349 if h.used == h.allocd { 350 new_bvs := make([]Bin, h.allocd+DEFAULT_HIST_SIZE) 351 if idx > 0 { 352 copy(new_bvs[0:], h.bvs[0:idx]) 353 } 354 if idx < h.used { 355 copy(new_bvs[idx+1:], h.bvs[idx:]) 356 } 357 h.allocd = h.allocd + DEFAULT_HIST_SIZE 358 h.bvs = new_bvs 359 } else { 360 copy(h.bvs[idx+1:], h.bvs[idx:h.used]) 361 } 362 h.bvs[idx].val = hb.val 363 h.bvs[idx].exp = hb.exp 364 h.bvs[idx].count = uint64(count) 365 h.used++ 366 return h.bvs[idx].count 367 } 368 var newval uint64 369 if count < 0 { 370 newval = h.bvs[idx].count - uint64(-count) 371 } else { 372 newval = h.bvs[idx].count + uint64(count) 373 } 374 if newval < h.bvs[idx].count { //rolled 375 newval = ^uint64(0) 376 } 377 h.bvs[idx].count = newval 378 return newval - h.bvs[idx].count 379} 380 381// RecordValues records n occurrences of the given value, returning an error if 382// the value is out of range. 383func (h *Histogram) RecordValues(v float64, n int64) error { 384 var hb Bin 385 hb.SetFromFloat64(v) 386 h.InsertBin(&hb, n) 387 return nil 388} 389 390// Approximate mean 391func (h *Histogram) ApproxMean() float64 { 392 h.mutex.Lock() 393 defer h.mutex.Unlock() 394 divisor := 0.0 395 sum := 0.0 396 for i := int16(0); i < h.used; i++ { 397 midpoint := h.bvs[i].Midpoint() 398 cardinality := float64(h.bvs[i].count) 399 divisor += cardinality 400 sum += midpoint * cardinality 401 } 402 if divisor == 0.0 { 403 return math.NaN() 404 } 405 return sum / divisor 406} 407 408// Approximate sum 409func (h *Histogram) ApproxSum() float64 { 410 h.mutex.Lock() 411 defer h.mutex.Unlock() 412 sum := 0.0 413 for i := int16(0); i < h.used; i++ { 414 midpoint := h.bvs[i].Midpoint() 415 cardinality := float64(h.bvs[i].count) 416 sum += midpoint * cardinality 417 } 418 return sum 419} 420 421func (h *Histogram) ApproxQuantile(q_in []float64) ([]float64, error) { 422 h.mutex.Lock() 423 defer h.mutex.Unlock() 424 q_out := make([]float64, len(q_in)) 425 i_q, i_b := 0, int16(0) 426 total_cnt, bin_width, bin_left, lower_cnt, upper_cnt := 0.0, 0.0, 0.0, 0.0, 0.0 427 if len(q_in) == 0 { 428 return q_out, nil 429 } 430 // Make sure the requested quantiles are in order 431 for i_q = 1; i_q < len(q_in); i_q++ { 432 if q_in[i_q-1] > q_in[i_q] { 433 return nil, errors.New("out of order") 434 } 435 } 436 // Add up the bins 437 for i_b = 0; i_b < h.used; i_b++ { 438 if !h.bvs[i_b].IsNaN() { 439 total_cnt += float64(h.bvs[i_b].count) 440 } 441 } 442 if total_cnt == 0.0 { 443 return nil, errors.New("empty_histogram") 444 } 445 446 for i_q = 0; i_q < len(q_in); i_q++ { 447 if q_in[i_q] < 0.0 || q_in[i_q] > 1.0 { 448 return nil, errors.New("out of bound quantile") 449 } 450 q_out[i_q] = total_cnt * q_in[i_q] 451 } 452 453 for i_b = 0; i_b < h.used; i_b++ { 454 if h.bvs[i_b].IsNaN() { 455 continue 456 } 457 bin_width = h.bvs[i_b].BinWidth() 458 bin_left = h.bvs[i_b].Left() 459 lower_cnt = upper_cnt 460 upper_cnt = lower_cnt + float64(h.bvs[i_b].count) 461 break 462 } 463 for i_q = 0; i_q < len(q_in); i_q++ { 464 for i_b < (h.used-1) && upper_cnt < q_out[i_q] { 465 i_b++ 466 bin_width = h.bvs[i_b].BinWidth() 467 bin_left = h.bvs[i_b].Left() 468 lower_cnt = upper_cnt 469 upper_cnt = lower_cnt + float64(h.bvs[i_b].count) 470 } 471 if lower_cnt == q_out[i_q] { 472 q_out[i_q] = bin_left 473 } else if upper_cnt == q_out[i_q] { 474 q_out[i_q] = bin_left + bin_width 475 } else { 476 if bin_width == 0 { 477 q_out[i_q] = bin_left 478 } else { 479 q_out[i_q] = bin_left + (q_out[i_q]-lower_cnt)/(upper_cnt-lower_cnt)*bin_width 480 } 481 } 482 } 483 return q_out, nil 484} 485 486// ValueAtQuantile returns the recorded value at the given quantile (0..1). 487func (h *Histogram) ValueAtQuantile(q float64) float64 { 488 h.mutex.Lock() 489 defer h.mutex.Unlock() 490 q_in := make([]float64, 1) 491 q_in[0] = q 492 q_out, err := h.ApproxQuantile(q_in) 493 if err == nil && len(q_out) == 1 { 494 return q_out[0] 495 } 496 return math.NaN() 497} 498 499// SignificantFigures returns the significant figures used to create the 500// histogram 501// CH Compat 502func (h *Histogram) SignificantFigures() int64 { 503 return 2 504} 505 506// Equals returns true if the two Histograms are equivalent, false if not. 507func (h *Histogram) Equals(other *Histogram) bool { 508 h.mutex.Lock() 509 other.mutex.Lock() 510 defer h.mutex.Unlock() 511 defer other.mutex.Unlock() 512 switch { 513 case 514 h.used != other.used: 515 return false 516 default: 517 for i := int16(0); i < h.used; i++ { 518 if h.bvs[i].Compare(&other.bvs[i]) != 0 { 519 return false 520 } 521 if h.bvs[i].count != other.bvs[i].count { 522 return false 523 } 524 } 525 } 526 return true 527} 528 529func (h *Histogram) CopyAndReset() *Histogram { 530 h.mutex.Lock() 531 defer h.mutex.Unlock() 532 newhist := &Histogram{ 533 allocd: h.allocd, 534 used: h.used, 535 bvs: h.bvs, 536 } 537 h.allocd = DEFAULT_HIST_SIZE 538 h.bvs = make([]Bin, DEFAULT_HIST_SIZE) 539 h.used = 0 540 return newhist 541} 542func (h *Histogram) DecStrings() []string { 543 h.mutex.Lock() 544 defer h.mutex.Unlock() 545 out := make([]string, h.used) 546 for i, bin := range h.bvs[0:h.used] { 547 var buffer bytes.Buffer 548 buffer.WriteString("H[") 549 buffer.WriteString(fmt.Sprintf("%3.1e", bin.Value())) 550 buffer.WriteString("]=") 551 buffer.WriteString(fmt.Sprintf("%v", bin.count)) 552 out[i] = buffer.String() 553 } 554 return out 555} 556