1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package trace
6
7import (
8	"math"
9	"sort"
10)
11
12// mud is an updatable mutator utilization distribution.
13//
14// This is a continuous distribution of duration over mutator
15// utilization. For example, the integral from mutator utilization a
16// to b is the total duration during which the mutator utilization was
17// in the range [a, b].
18//
19// This distribution is *not* normalized (it is not a probability
20// distribution). This makes it easier to work with as it's being
21// updated.
22//
23// It is represented as the sum of scaled uniform distribution
24// functions and Dirac delta functions (which are treated as
25// degenerate uniform distributions).
26type mud struct {
27	sorted, unsorted []edge
28
29	// trackMass is the inverse cumulative sum to track as the
30	// distribution is updated.
31	trackMass float64
32	// trackBucket is the bucket in which trackMass falls. If the
33	// total mass of the distribution is < trackMass, this is
34	// len(hist).
35	trackBucket int
36	// trackSum is the cumulative sum of hist[:trackBucket]. Once
37	// trackSum >= trackMass, trackBucket must be recomputed.
38	trackSum float64
39
40	// hist is a hierarchical histogram of distribution mass.
41	hist [mudDegree]float64
42}
43
44const (
45	// mudDegree is the number of buckets in the MUD summary
46	// histogram.
47	mudDegree = 1024
48)
49
50type edge struct {
51	// At x, the function increases by y.
52	x, delta float64
53	// Additionally at x is a Dirac delta function with area dirac.
54	dirac float64
55}
56
57// add adds a uniform function over [l, r] scaled so the total weight
58// of the uniform is area. If l==r, this adds a Dirac delta function.
59func (d *mud) add(l, r, area float64) {
60	if area == 0 {
61		return
62	}
63
64	if r < l {
65		l, r = r, l
66	}
67
68	// Add the edges.
69	if l == r {
70		d.unsorted = append(d.unsorted, edge{l, 0, area})
71	} else {
72		delta := area / (r - l)
73		d.unsorted = append(d.unsorted, edge{l, delta, 0}, edge{r, -delta, 0})
74	}
75
76	// Update the histogram.
77	h := &d.hist
78	lbFloat, lf := math.Modf(l * mudDegree)
79	lb := int(lbFloat)
80	if lb >= mudDegree {
81		lb, lf = mudDegree-1, 1
82	}
83	if l == r {
84		h[lb] += area
85	} else {
86		rbFloat, rf := math.Modf(r * mudDegree)
87		rb := int(rbFloat)
88		if rb >= mudDegree {
89			rb, rf = mudDegree-1, 1
90		}
91		if lb == rb {
92			h[lb] += area
93		} else {
94			perBucket := area / (r - l) / mudDegree
95			h[lb] += perBucket * (1 - lf)
96			h[rb] += perBucket * rf
97			for i := lb + 1; i < rb; i++ {
98				h[i] += perBucket
99			}
100		}
101	}
102
103	// Update mass tracking.
104	if thresh := float64(d.trackBucket) / mudDegree; l < thresh {
105		if r < thresh {
106			d.trackSum += area
107		} else {
108			d.trackSum += area * (thresh - l) / (r - l)
109		}
110		if d.trackSum >= d.trackMass {
111			// The tracked mass now falls in a different
112			// bucket. Recompute the inverse cumulative sum.
113			d.setTrackMass(d.trackMass)
114		}
115	}
116}
117
118// setTrackMass sets the mass to track the inverse cumulative sum for.
119//
120// Specifically, mass is a cumulative duration, and the mutator
121// utilization bounds for this duration can be queried using
122// approxInvCumulativeSum.
123func (d *mud) setTrackMass(mass float64) {
124	d.trackMass = mass
125
126	// Find the bucket currently containing trackMass by computing
127	// the cumulative sum.
128	sum := 0.0
129	for i, val := range d.hist[:] {
130		newSum := sum + val
131		if newSum > mass {
132			// mass falls in bucket i.
133			d.trackBucket = i
134			d.trackSum = sum
135			return
136		}
137		sum = newSum
138	}
139	d.trackBucket = len(d.hist)
140	d.trackSum = sum
141}
142
143// approxInvCumulativeSum is like invCumulativeSum, but specifically
144// operates on the tracked mass and returns an upper and lower bound
145// approximation of the inverse cumulative sum.
146//
147// The true inverse cumulative sum will be in the range [lower, upper).
148func (d *mud) approxInvCumulativeSum() (float64, float64, bool) {
149	if d.trackBucket == len(d.hist) {
150		return math.NaN(), math.NaN(), false
151	}
152	return float64(d.trackBucket) / mudDegree, float64(d.trackBucket+1) / mudDegree, true
153}
154
155// invCumulativeSum returns x such that the integral of d from -∞ to x
156// is y. If the total weight of d is less than y, it returns the
157// maximum of the distribution and false.
158//
159// Specifically, y is a cumulative duration, and invCumulativeSum
160// returns the mutator utilization x such that at least y time has
161// been spent with mutator utilization <= x.
162func (d *mud) invCumulativeSum(y float64) (float64, bool) {
163	if len(d.sorted) == 0 && len(d.unsorted) == 0 {
164		return math.NaN(), false
165	}
166
167	// Sort edges.
168	edges := d.unsorted
169	sort.Slice(edges, func(i, j int) bool {
170		return edges[i].x < edges[j].x
171	})
172	// Merge with sorted edges.
173	d.unsorted = nil
174	if d.sorted == nil {
175		d.sorted = edges
176	} else {
177		oldSorted := d.sorted
178		newSorted := make([]edge, len(oldSorted)+len(edges))
179		i, j := 0, 0
180		for o := range newSorted {
181			if i >= len(oldSorted) {
182				copy(newSorted[o:], edges[j:])
183				break
184			} else if j >= len(edges) {
185				copy(newSorted[o:], oldSorted[i:])
186				break
187			} else if oldSorted[i].x < edges[j].x {
188				newSorted[o] = oldSorted[i]
189				i++
190			} else {
191				newSorted[o] = edges[j]
192				j++
193			}
194		}
195		d.sorted = newSorted
196	}
197
198	// Traverse edges in order computing a cumulative sum.
199	csum, rate, prevX := 0.0, 0.0, 0.0
200	for _, e := range d.sorted {
201		newCsum := csum + (e.x-prevX)*rate
202		if newCsum >= y {
203			// y was exceeded between the previous edge
204			// and this one.
205			if rate == 0 {
206				// Anywhere between prevX and
207				// e.x will do. We return e.x
208				// because that takes care of
209				// the y==0 case naturally.
210				return e.x, true
211			}
212			return (y-csum)/rate + prevX, true
213		}
214		newCsum += e.dirac
215		if newCsum >= y {
216			// y was exceeded by the Dirac delta at e.x.
217			return e.x, true
218		}
219		csum, prevX = newCsum, e.x
220		rate += e.delta
221	}
222	return prevX, false
223}
224