1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package trace
6
7// This file implements histogramming for RPC statistics collection.
8
9import (
10	"bytes"
11	"fmt"
12	"html/template"
13	"log"
14	"math"
15	"sync"
16
17	"golang.org/x/net/internal/timeseries"
18)
19
20const (
21	bucketCount = 38
22)
23
24// histogram keeps counts of values in buckets that are spaced
25// out in powers of 2: 0-1, 2-3, 4-7...
26// histogram implements timeseries.Observable
27type histogram struct {
28	sum          int64   // running total of measurements
29	sumOfSquares float64 // square of running total
30	buckets      []int64 // bucketed values for histogram
31	value        int     // holds a single value as an optimization
32	valueCount   int64   // number of values recorded for single value
33}
34
35// AddMeasurement records a value measurement observation to the histogram.
36func (h *histogram) addMeasurement(value int64) {
37	// TODO: assert invariant
38	h.sum += value
39	h.sumOfSquares += float64(value) * float64(value)
40
41	bucketIndex := getBucket(value)
42
43	if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) {
44		h.value = bucketIndex
45		h.valueCount++
46	} else {
47		h.allocateBuckets()
48		h.buckets[bucketIndex]++
49	}
50}
51
52func (h *histogram) allocateBuckets() {
53	if h.buckets == nil {
54		h.buckets = make([]int64, bucketCount)
55		h.buckets[h.value] = h.valueCount
56		h.value = 0
57		h.valueCount = -1
58	}
59}
60
61func log2(i int64) int {
62	n := 0
63	for ; i >= 0x100; i >>= 8 {
64		n += 8
65	}
66	for ; i > 0; i >>= 1 {
67		n += 1
68	}
69	return n
70}
71
72func getBucket(i int64) (index int) {
73	index = log2(i) - 1
74	if index < 0 {
75		index = 0
76	}
77	if index >= bucketCount {
78		index = bucketCount - 1
79	}
80	return
81}
82
83// Total returns the number of recorded observations.
84func (h *histogram) total() (total int64) {
85	if h.valueCount >= 0 {
86		total = h.valueCount
87	}
88	for _, val := range h.buckets {
89		total += int64(val)
90	}
91	return
92}
93
94// Average returns the average value of recorded observations.
95func (h *histogram) average() float64 {
96	t := h.total()
97	if t == 0 {
98		return 0
99	}
100	return float64(h.sum) / float64(t)
101}
102
103// Variance returns the variance of recorded observations.
104func (h *histogram) variance() float64 {
105	t := float64(h.total())
106	if t == 0 {
107		return 0
108	}
109	s := float64(h.sum) / t
110	return h.sumOfSquares/t - s*s
111}
112
113// StandardDeviation returns the standard deviation of recorded observations.
114func (h *histogram) standardDeviation() float64 {
115	return math.Sqrt(h.variance())
116}
117
118// PercentileBoundary estimates the value that the given fraction of recorded
119// observations are less than.
120func (h *histogram) percentileBoundary(percentile float64) int64 {
121	total := h.total()
122
123	// Corner cases (make sure result is strictly less than Total())
124	if total == 0 {
125		return 0
126	} else if total == 1 {
127		return int64(h.average())
128	}
129
130	percentOfTotal := round(float64(total) * percentile)
131	var runningTotal int64
132
133	for i := range h.buckets {
134		value := h.buckets[i]
135		runningTotal += value
136		if runningTotal == percentOfTotal {
137			// We hit an exact bucket boundary. If the next bucket has data, it is a
138			// good estimate of the value. If the bucket is empty, we interpolate the
139			// midpoint between the next bucket's boundary and the next non-zero
140			// bucket. If the remaining buckets are all empty, then we use the
141			// boundary for the next bucket as the estimate.
142			j := uint8(i + 1)
143			min := bucketBoundary(j)
144			if runningTotal < total {
145				for h.buckets[j] == 0 {
146					j++
147				}
148			}
149			max := bucketBoundary(j)
150			return min + round(float64(max-min)/2)
151		} else if runningTotal > percentOfTotal {
152			// The value is in this bucket. Interpolate the value.
153			delta := runningTotal - percentOfTotal
154			percentBucket := float64(value-delta) / float64(value)
155			bucketMin := bucketBoundary(uint8(i))
156			nextBucketMin := bucketBoundary(uint8(i + 1))
157			bucketSize := nextBucketMin - bucketMin
158			return bucketMin + round(percentBucket*float64(bucketSize))
159		}
160	}
161	return bucketBoundary(bucketCount - 1)
162}
163
164// Median returns the estimated median of the observed values.
165func (h *histogram) median() int64 {
166	return h.percentileBoundary(0.5)
167}
168
169// Add adds other to h.
170func (h *histogram) Add(other timeseries.Observable) {
171	o := other.(*histogram)
172	if o.valueCount == 0 {
173		// Other histogram is empty
174	} else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value {
175		// Both have a single bucketed value, aggregate them
176		h.valueCount += o.valueCount
177	} else {
178		// Two different values necessitate buckets in this histogram
179		h.allocateBuckets()
180		if o.valueCount >= 0 {
181			h.buckets[o.value] += o.valueCount
182		} else {
183			for i := range h.buckets {
184				h.buckets[i] += o.buckets[i]
185			}
186		}
187	}
188	h.sumOfSquares += o.sumOfSquares
189	h.sum += o.sum
190}
191
192// Clear resets the histogram to an empty state, removing all observed values.
193func (h *histogram) Clear() {
194	h.buckets = nil
195	h.value = 0
196	h.valueCount = 0
197	h.sum = 0
198	h.sumOfSquares = 0
199}
200
201// CopyFrom copies from other, which must be a *histogram, into h.
202func (h *histogram) CopyFrom(other timeseries.Observable) {
203	o := other.(*histogram)
204	if o.valueCount == -1 {
205		h.allocateBuckets()
206		copy(h.buckets, o.buckets)
207	}
208	h.sum = o.sum
209	h.sumOfSquares = o.sumOfSquares
210	h.value = o.value
211	h.valueCount = o.valueCount
212}
213
214// Multiply scales the histogram by the specified ratio.
215func (h *histogram) Multiply(ratio float64) {
216	if h.valueCount == -1 {
217		for i := range h.buckets {
218			h.buckets[i] = int64(float64(h.buckets[i]) * ratio)
219		}
220	} else {
221		h.valueCount = int64(float64(h.valueCount) * ratio)
222	}
223	h.sum = int64(float64(h.sum) * ratio)
224	h.sumOfSquares = h.sumOfSquares * ratio
225}
226
227// New creates a new histogram.
228func (h *histogram) New() timeseries.Observable {
229	r := new(histogram)
230	r.Clear()
231	return r
232}
233
234func (h *histogram) String() string {
235	return fmt.Sprintf("%d, %f, %d, %d, %v",
236		h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets)
237}
238
239// round returns the closest int64 to the argument
240func round(in float64) int64 {
241	return int64(math.Floor(in + 0.5))
242}
243
244// bucketBoundary returns the first value in the bucket.
245func bucketBoundary(bucket uint8) int64 {
246	if bucket == 0 {
247		return 0
248	}
249	return 1 << bucket
250}
251
252// bucketData holds data about a specific bucket for use in distTmpl.
253type bucketData struct {
254	Lower, Upper       int64
255	N                  int64
256	Pct, CumulativePct float64
257	GraphWidth         int
258}
259
260// data holds data about a Distribution for use in distTmpl.
261type data struct {
262	Buckets                 []*bucketData
263	Count, Median           int64
264	Mean, StandardDeviation float64
265}
266
267// maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets.
268const maxHTMLBarWidth = 350.0
269
270// newData returns data representing h for use in distTmpl.
271func (h *histogram) newData() *data {
272	// Force the allocation of buckets to simplify the rendering implementation
273	h.allocateBuckets()
274	// We scale the bars on the right so that the largest bar is
275	// maxHTMLBarWidth pixels in width.
276	maxBucket := int64(0)
277	for _, n := range h.buckets {
278		if n > maxBucket {
279			maxBucket = n
280		}
281	}
282	total := h.total()
283	barsizeMult := maxHTMLBarWidth / float64(maxBucket)
284	var pctMult float64
285	if total == 0 {
286		pctMult = 1.0
287	} else {
288		pctMult = 100.0 / float64(total)
289	}
290
291	buckets := make([]*bucketData, len(h.buckets))
292	runningTotal := int64(0)
293	for i, n := range h.buckets {
294		if n == 0 {
295			continue
296		}
297		runningTotal += n
298		var upperBound int64
299		if i < bucketCount-1 {
300			upperBound = bucketBoundary(uint8(i + 1))
301		} else {
302			upperBound = math.MaxInt64
303		}
304		buckets[i] = &bucketData{
305			Lower:         bucketBoundary(uint8(i)),
306			Upper:         upperBound,
307			N:             n,
308			Pct:           float64(n) * pctMult,
309			CumulativePct: float64(runningTotal) * pctMult,
310			GraphWidth:    int(float64(n) * barsizeMult),
311		}
312	}
313	return &data{
314		Buckets:           buckets,
315		Count:             total,
316		Median:            h.median(),
317		Mean:              h.average(),
318		StandardDeviation: h.standardDeviation(),
319	}
320}
321
322func (h *histogram) html() template.HTML {
323	buf := new(bytes.Buffer)
324	if err := distTmpl().Execute(buf, h.newData()); err != nil {
325		buf.Reset()
326		log.Printf("net/trace: couldn't execute template: %v", err)
327	}
328	return template.HTML(buf.String())
329}
330
331var distTmplCache *template.Template
332var distTmplOnce sync.Once
333
334func distTmpl() *template.Template {
335	distTmplOnce.Do(func() {
336		// Input: data
337		distTmplCache = template.Must(template.New("distTmpl").Parse(`
338<table>
339<tr>
340    <td style="padding:0.25em">Count: {{.Count}}</td>
341    <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td>
342    <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td>
343    <td style="padding:0.25em">Median: {{.Median}}</td>
344</tr>
345</table>
346<hr>
347<table>
348{{range $b := .Buckets}}
349{{if $b}}
350  <tr>
351    <td style="padding:0 0 0 0.25em">[</td>
352    <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td>
353    <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td>
354    <td style="text-align:right;padding:0 0.25em">{{.N}}</td>
355    <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td>
356    <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td>
357    <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td>
358  </tr>
359{{end}}
360{{end}}
361</table>
362`))
363	})
364	return distTmplCache
365}
366