1// Copyright 2015 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package promql
15
16import (
17	"math"
18	"sort"
19
20	"github.com/prometheus/prometheus/pkg/labels"
21)
22
23// Helpers to calculate quantiles.
24
25// excludedLabels are the labels to exclude from signature calculation for
26// quantiles.
27var excludedLabels = []string{
28	labels.MetricName,
29	labels.BucketLabel,
30}
31
32type bucket struct {
33	upperBound float64
34	count      float64
35}
36
37// buckets implements sort.Interface.
38type buckets []bucket
39
40func (b buckets) Len() int           { return len(b) }
41func (b buckets) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
42func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound }
43
44type metricWithBuckets struct {
45	metric  labels.Labels
46	buckets buckets
47}
48
49// bucketQuantile calculates the quantile 'q' based on the given buckets. The
50// buckets will be sorted by upperBound by this function (i.e. no sorting
51// needed before calling this function). The quantile value is interpolated
52// assuming a linear distribution within a bucket. However, if the quantile
53// falls into the highest bucket, the upper bound of the 2nd highest bucket is
54// returned. A natural lower bound of 0 is assumed if the upper bound of the
55// lowest bucket is greater 0. In that case, interpolation in the lowest bucket
56// happens linearly between 0 and the upper bound of the lowest bucket.
57// However, if the lowest bucket has an upper bound less or equal 0, this upper
58// bound is returned if the quantile falls into the lowest bucket.
59//
60// There are a number of special cases (once we have a way to report errors
61// happening during evaluations of AST functions, we should report those
62// explicitly):
63//
64// If 'buckets' has 0 observations, NaN is returned.
65//
66// If 'buckets' has fewer than 2 elements, NaN is returned.
67//
68// If the highest bucket is not +Inf, NaN is returned.
69//
70// If q<0, -Inf is returned.
71//
72// If q>1, +Inf is returned.
73func bucketQuantile(q float64, buckets buckets) float64 {
74	if q < 0 {
75		return math.Inf(-1)
76	}
77	if q > 1 {
78		return math.Inf(+1)
79	}
80	sort.Sort(buckets)
81	if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
82		return math.NaN()
83	}
84
85	buckets = coalesceBuckets(buckets)
86	ensureMonotonic(buckets)
87
88	if len(buckets) < 2 {
89		return math.NaN()
90	}
91	observations := buckets[len(buckets)-1].count
92	if observations == 0 {
93		return math.NaN()
94	}
95	rank := q * observations
96	b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
97
98	if b == len(buckets)-1 {
99		return buckets[len(buckets)-2].upperBound
100	}
101	if b == 0 && buckets[0].upperBound <= 0 {
102		return buckets[0].upperBound
103	}
104	var (
105		bucketStart float64
106		bucketEnd   = buckets[b].upperBound
107		count       = buckets[b].count
108	)
109	if b > 0 {
110		bucketStart = buckets[b-1].upperBound
111		count -= buckets[b-1].count
112		rank -= buckets[b-1].count
113	}
114	return bucketStart + (bucketEnd-bucketStart)*(rank/count)
115}
116
117// coalesceBuckets merges buckets with the same upper bound.
118//
119// The input buckets must be sorted.
120func coalesceBuckets(buckets buckets) buckets {
121	last := buckets[0]
122	i := 0
123	for _, b := range buckets[1:] {
124		if b.upperBound == last.upperBound {
125			last.count += b.count
126		} else {
127			buckets[i] = last
128			last = b
129			i++
130		}
131	}
132	buckets[i] = last
133	return buckets[:i+1]
134}
135
136// The assumption that bucket counts increase monotonically with increasing
137// upperBound may be violated during:
138//
139//   * Recording rule evaluation of histogram_quantile, especially when rate()
140//      has been applied to the underlying bucket timeseries.
141//   * Evaluation of histogram_quantile computed over federated bucket
142//      timeseries, especially when rate() has been applied.
143//
144// This is because scraped data is not made available to rule evaluation or
145// federation atomically, so some buckets are computed with data from the
146// most recent scrapes, but the other buckets are missing data from the most
147// recent scrape.
148//
149// Monotonicity is usually guaranteed because if a bucket with upper bound
150// u1 has count c1, then any bucket with a higher upper bound u > u1 must
151// have counted all c1 observations and perhaps more, so that c  >= c1.
152//
153// Randomly interspersed partial sampling breaks that guarantee, and rate()
154// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from
155// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The
156// monotonicity is broken. It is exacerbated by rate() because under normal
157// operation, cumulative counting of buckets will cause the bucket counts to
158// diverge such that small differences from missing samples are not a problem.
159// rate() removes this divergence.)
160//
161// bucketQuantile depends on that monotonicity to do a binary search for the
162// bucket with the φ-quantile count, so breaking the monotonicity
163// guarantee causes bucketQuantile() to return undefined (nonsense) results.
164//
165// As a somewhat hacky solution until ingestion is atomic per scrape, we
166// calculate the "envelope" of the histogram buckets, essentially removing
167// any decreases in the count between successive buckets.
168
169func ensureMonotonic(buckets buckets) {
170	max := buckets[0].count
171	for i := 1; i < len(buckets); i++ {
172		switch {
173		case buckets[i].count > max:
174			max = buckets[i].count
175		case buckets[i].count < max:
176			buckets[i].count = max
177		}
178	}
179}
180
181// quantile calculates the given quantile of a vector of samples.
182//
183// The Vector will be sorted.
184// If 'values' has zero elements, NaN is returned.
185// If q<0, -Inf is returned.
186// If q>1, +Inf is returned.
187func quantile(q float64, values vectorByValueHeap) float64 {
188	if len(values) == 0 {
189		return math.NaN()
190	}
191	if q < 0 {
192		return math.Inf(-1)
193	}
194	if q > 1 {
195		return math.Inf(+1)
196	}
197	sort.Sort(values)
198
199	n := float64(len(values))
200	// When the quantile lies between two samples,
201	// we use a weighted average of the two samples.
202	rank := q * (n - 1)
203
204	lowerIndex := math.Max(0, math.Floor(rank))
205	upperIndex := math.Min(n-1, lowerIndex+1)
206
207	weight := rank - math.Floor(rank)
208	return values[int(lowerIndex)].V*(1-weight) + values[int(upperIndex)].V*weight
209}
210