1// Copyright 2015 The Prometheus Authors 2// Licensed under the Apache License, Version 2.0 (the "License"); 3// you may not use this file except in compliance with the License. 4// You may obtain a copy of the License at 5// 6// http://www.apache.org/licenses/LICENSE-2.0 7// 8// Unless required by applicable law or agreed to in writing, software 9// distributed under the License is distributed on an "AS IS" BASIS, 10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11// See the License for the specific language governing permissions and 12// limitations under the License. 13 14package promql 15 16import ( 17 "math" 18 "sort" 19 20 "github.com/prometheus/prometheus/pkg/labels" 21) 22 23// Helpers to calculate quantiles. 24 25// excludedLabels are the labels to exclude from signature calculation for 26// quantiles. 27var excludedLabels = []string{ 28 labels.MetricName, 29 labels.BucketLabel, 30} 31 32type bucket struct { 33 upperBound float64 34 count float64 35} 36 37// buckets implements sort.Interface. 38type buckets []bucket 39 40func (b buckets) Len() int { return len(b) } 41func (b buckets) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 42func (b buckets) Less(i, j int) bool { return b[i].upperBound < b[j].upperBound } 43 44type metricWithBuckets struct { 45 metric labels.Labels 46 buckets buckets 47} 48 49// bucketQuantile calculates the quantile 'q' based on the given buckets. The 50// buckets will be sorted by upperBound by this function (i.e. no sorting 51// needed before calling this function). The quantile value is interpolated 52// assuming a linear distribution within a bucket. However, if the quantile 53// falls into the highest bucket, the upper bound of the 2nd highest bucket is 54// returned. A natural lower bound of 0 is assumed if the upper bound of the 55// lowest bucket is greater 0. In that case, interpolation in the lowest bucket 56// happens linearly between 0 and the upper bound of the lowest bucket. 57// However, if the lowest bucket has an upper bound less or equal 0, this upper 58// bound is returned if the quantile falls into the lowest bucket. 59// 60// There are a number of special cases (once we have a way to report errors 61// happening during evaluations of AST functions, we should report those 62// explicitly): 63// 64// If 'buckets' has 0 observations, NaN is returned. 65// 66// If 'buckets' has fewer than 2 elements, NaN is returned. 67// 68// If the highest bucket is not +Inf, NaN is returned. 69// 70// If q<0, -Inf is returned. 71// 72// If q>1, +Inf is returned. 73func bucketQuantile(q float64, buckets buckets) float64 { 74 if q < 0 { 75 return math.Inf(-1) 76 } 77 if q > 1 { 78 return math.Inf(+1) 79 } 80 sort.Sort(buckets) 81 if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) { 82 return math.NaN() 83 } 84 85 buckets = coalesceBuckets(buckets) 86 ensureMonotonic(buckets) 87 88 if len(buckets) < 2 { 89 return math.NaN() 90 } 91 observations := buckets[len(buckets)-1].count 92 if observations == 0 { 93 return math.NaN() 94 } 95 rank := q * observations 96 b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank }) 97 98 if b == len(buckets)-1 { 99 return buckets[len(buckets)-2].upperBound 100 } 101 if b == 0 && buckets[0].upperBound <= 0 { 102 return buckets[0].upperBound 103 } 104 var ( 105 bucketStart float64 106 bucketEnd = buckets[b].upperBound 107 count = buckets[b].count 108 ) 109 if b > 0 { 110 bucketStart = buckets[b-1].upperBound 111 count -= buckets[b-1].count 112 rank -= buckets[b-1].count 113 } 114 return bucketStart + (bucketEnd-bucketStart)*(rank/count) 115} 116 117// coalesceBuckets merges buckets with the same upper bound. 118// 119// The input buckets must be sorted. 120func coalesceBuckets(buckets buckets) buckets { 121 last := buckets[0] 122 i := 0 123 for _, b := range buckets[1:] { 124 if b.upperBound == last.upperBound { 125 last.count += b.count 126 } else { 127 buckets[i] = last 128 last = b 129 i++ 130 } 131 } 132 buckets[i] = last 133 return buckets[:i+1] 134} 135 136// The assumption that bucket counts increase monotonically with increasing 137// upperBound may be violated during: 138// 139// * Recording rule evaluation of histogram_quantile, especially when rate() 140// has been applied to the underlying bucket timeseries. 141// * Evaluation of histogram_quantile computed over federated bucket 142// timeseries, especially when rate() has been applied. 143// 144// This is because scraped data is not made available to rule evaluation or 145// federation atomically, so some buckets are computed with data from the 146// most recent scrapes, but the other buckets are missing data from the most 147// recent scrape. 148// 149// Monotonicity is usually guaranteed because if a bucket with upper bound 150// u1 has count c1, then any bucket with a higher upper bound u > u1 must 151// have counted all c1 observations and perhaps more, so that c >= c1. 152// 153// Randomly interspersed partial sampling breaks that guarantee, and rate() 154// exacerbates it. Specifically, suppose bucket le=1000 has a count of 10 from 155// 4 samples but the bucket with le=2000 has a count of 7 from 3 samples. The 156// monotonicity is broken. It is exacerbated by rate() because under normal 157// operation, cumulative counting of buckets will cause the bucket counts to 158// diverge such that small differences from missing samples are not a problem. 159// rate() removes this divergence.) 160// 161// bucketQuantile depends on that monotonicity to do a binary search for the 162// bucket with the φ-quantile count, so breaking the monotonicity 163// guarantee causes bucketQuantile() to return undefined (nonsense) results. 164// 165// As a somewhat hacky solution until ingestion is atomic per scrape, we 166// calculate the "envelope" of the histogram buckets, essentially removing 167// any decreases in the count between successive buckets. 168 169func ensureMonotonic(buckets buckets) { 170 max := buckets[0].count 171 for i := 1; i < len(buckets); i++ { 172 switch { 173 case buckets[i].count > max: 174 max = buckets[i].count 175 case buckets[i].count < max: 176 buckets[i].count = max 177 } 178 } 179} 180 181// quantile calculates the given quantile of a vector of samples. 182// 183// The Vector will be sorted. 184// If 'values' has zero elements, NaN is returned. 185// If q<0, -Inf is returned. 186// If q>1, +Inf is returned. 187func quantile(q float64, values vectorByValueHeap) float64 { 188 if len(values) == 0 { 189 return math.NaN() 190 } 191 if q < 0 { 192 return math.Inf(-1) 193 } 194 if q > 1 { 195 return math.Inf(+1) 196 } 197 sort.Sort(values) 198 199 n := float64(len(values)) 200 // When the quantile lies between two samples, 201 // we use a weighted average of the two samples. 202 rank := q * (n - 1) 203 204 lowerIndex := math.Max(0, math.Floor(rank)) 205 upperIndex := math.Min(n-1, lowerIndex+1) 206 207 weight := rank - math.Floor(rank) 208 return values[int(lowerIndex)].V*(1-weight) + values[int(upperIndex)].V*weight 209} 210