prometheus
450 строк · 14.6 Кб
1// Copyright 2015 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package promql
15
16import (
17"math"
18"slices"
19"sort"
20
21"github.com/prometheus/prometheus/model/histogram"
22"github.com/prometheus/prometheus/model/labels"
23"github.com/prometheus/prometheus/util/almost"
24)
25
26// smallDeltaTolerance is the threshold for relative deltas between classic
27// histogram buckets that will be ignored by the histogram_quantile function
28// because they are most likely artifacts of floating point precision issues.
29// Testing on 2 sets of real data with bugs arising from small deltas,
30// the safe ranges were from:
31// - 1e-05 to 1e-15
32// - 1e-06 to 1e-15
33// Anything to the left of that would cause non-query-sharded data to have
34// small deltas ignored (unnecessary and we should avoid this), and anything
35// to the right of that would cause query-sharded data to not have its small
36// deltas ignored (so the problem won't be fixed).
37// For context, query sharding triggers these float precision errors in Mimir.
38// To illustrate, with a relative deviation of 1e-12, we need to have 1e12
39// observations in the bucket so that the change of one observation is small
40// enough to get ignored. With the usual observation rate even of very busy
41// services, this will hardly be reached in timeframes that matters for
42// monitoring.
43const smallDeltaTolerance = 1e-12
44
45// Helpers to calculate quantiles.
46
47// excludedLabels are the labels to exclude from signature calculation for
48// quantiles.
49var excludedLabels = []string{
50labels.MetricName,
51labels.BucketLabel,
52}
53
54type bucket struct {
55upperBound float64
56count float64
57}
58
59// buckets implements sort.Interface.
60type buckets []bucket
61
62type metricWithBuckets struct {
63metric labels.Labels
64buckets buckets
65}
66
67// bucketQuantile calculates the quantile 'q' based on the given buckets. The
68// buckets will be sorted by upperBound by this function (i.e. no sorting
69// needed before calling this function). The quantile value is interpolated
70// assuming a linear distribution within a bucket. However, if the quantile
71// falls into the highest bucket, the upper bound of the 2nd highest bucket is
72// returned. A natural lower bound of 0 is assumed if the upper bound of the
73// lowest bucket is greater 0. In that case, interpolation in the lowest bucket
74// happens linearly between 0 and the upper bound of the lowest bucket.
75// However, if the lowest bucket has an upper bound less or equal 0, this upper
76// bound is returned if the quantile falls into the lowest bucket.
77//
78// There are a number of special cases (once we have a way to report errors
79// happening during evaluations of AST functions, we should report those
80// explicitly):
81//
82// If 'buckets' has 0 observations, NaN is returned.
83//
84// If 'buckets' has fewer than 2 elements, NaN is returned.
85//
86// If the highest bucket is not +Inf, NaN is returned.
87//
88// If q==NaN, NaN is returned.
89//
90// If q<0, -Inf is returned.
91//
92// If q>1, +Inf is returned.
93//
94// We also return a bool to indicate if monotonicity needed to be forced,
95// and another bool to indicate if small differences between buckets (that
96// are likely artifacts of floating point precision issues) have been
97// ignored.
98func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) {
99if math.IsNaN(q) {
100return math.NaN(), false, false
101}
102if q < 0 {
103return math.Inf(-1), false, false
104}
105if q > 1 {
106return math.Inf(+1), false, false
107}
108slices.SortFunc(buckets, func(a, b bucket) int {
109// We don't expect the bucket boundary to be a NaN.
110if a.upperBound < b.upperBound {
111return -1
112}
113if a.upperBound > b.upperBound {
114return +1
115}
116return 0
117})
118if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
119return math.NaN(), false, false
120}
121
122buckets = coalesceBuckets(buckets)
123forcedMonotonic, fixedPrecision := ensureMonotonicAndIgnoreSmallDeltas(buckets, smallDeltaTolerance)
124
125if len(buckets) < 2 {
126return math.NaN(), false, false
127}
128observations := buckets[len(buckets)-1].count
129if observations == 0 {
130return math.NaN(), false, false
131}
132rank := q * observations
133b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
134
135if b == len(buckets)-1 {
136return buckets[len(buckets)-2].upperBound, forcedMonotonic, fixedPrecision
137}
138if b == 0 && buckets[0].upperBound <= 0 {
139return buckets[0].upperBound, forcedMonotonic, fixedPrecision
140}
141var (
142bucketStart float64
143bucketEnd = buckets[b].upperBound
144count = buckets[b].count
145)
146if b > 0 {
147bucketStart = buckets[b-1].upperBound
148count -= buckets[b-1].count
149rank -= buckets[b-1].count
150}
151return bucketStart + (bucketEnd-bucketStart)*(rank/count), forcedMonotonic, fixedPrecision
152}
153
154// histogramQuantile calculates the quantile 'q' based on the given histogram.
155//
156// The quantile value is interpolated assuming a linear distribution within a
157// bucket.
158// TODO(beorn7): Find an interpolation method that is a better fit for
159// exponential buckets (and think about configurable interpolation).
160//
161// A natural lower bound of 0 is assumed if the histogram has only positive
162// buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has
163// only negative buckets.
164// TODO(beorn7): Come to terms if we want that.
165//
166// There are a number of special cases (once we have a way to report errors
167// happening during evaluations of AST functions, we should report those
168// explicitly):
169//
170// If the histogram has 0 observations, NaN is returned.
171//
172// If q<0, -Inf is returned.
173//
174// If q>1, +Inf is returned.
175//
176// If q is NaN, NaN is returned.
177func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
178if q < 0 {
179return math.Inf(-1)
180}
181if q > 1 {
182return math.Inf(+1)
183}
184
185if h.Count == 0 || math.IsNaN(q) {
186return math.NaN()
187}
188
189var (
190bucket histogram.Bucket[float64]
191count float64
192it histogram.BucketIterator[float64]
193rank float64
194)
195
196// if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator
197// if the q < 0.5, use the forward iterator
198// if the q >= 0.5, use the reverse iterator
199if math.IsNaN(h.Sum) || q < 0.5 {
200it = h.AllBucketIterator()
201rank = q * h.Count
202} else {
203it = h.AllReverseBucketIterator()
204rank = (1 - q) * h.Count
205}
206
207for it.Next() {
208bucket = it.At()
209count += bucket.Count
210if count >= rank {
211break
212}
213}
214if bucket.Lower < 0 && bucket.Upper > 0 {
215switch {
216case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0:
217// The result is in the zero bucket and the histogram has only
218// positive buckets. So we consider 0 to be the lower bound.
219bucket.Lower = 0
220case len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0:
221// The result is in the zero bucket and the histogram has only
222// negative buckets. So we consider 0 to be the upper bound.
223bucket.Upper = 0
224}
225}
226// Due to numerical inaccuracies, we could end up with a higher count
227// than h.Count. Thus, make sure count is never higher than h.Count.
228if count > h.Count {
229count = h.Count
230}
231// We could have hit the highest bucket without even reaching the rank
232// (this should only happen if the histogram contains observations of
233// the value NaN), in which case we simply return the upper limit of the
234// highest explicit bucket.
235if count < rank {
236return bucket.Upper
237}
238
239// NaN observations increase h.Count but not the total number of
240// observations in the buckets. Therefore, we have to use the forward
241// iterator to find percentiles. We recognize histograms containing NaN
242// observations by checking if their h.Sum is NaN.
243if math.IsNaN(h.Sum) || q < 0.5 {
244rank -= count - bucket.Count
245} else {
246rank = count - rank
247}
248
249// TODO(codesome): Use a better estimation than linear.
250return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
251}
252
253// histogramFraction calculates the fraction of observations between the
254// provided lower and upper bounds, based on the provided histogram.
255//
256// histogramFraction is in a certain way the inverse of histogramQuantile. If
257// histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h)
258// returns 0.9.
259//
260// The same notes (and TODOs) with regard to interpolation and assumptions about
261// the zero bucket boundaries apply as for histogramQuantile.
262//
263// Whether either boundary is inclusive or exclusive doesn’t actually matter as
264// long as interpolation has to be performed anyway. In the case of a boundary
265// coinciding with a bucket boundary, the inclusive or exclusive nature of the
266// boundary determines the exact behavior of the threshold. With the current
267// implementation, that means that lower is exclusive for positive values and
268// inclusive for negative values, while upper is inclusive for positive values
269// and exclusive for negative values.
270//
271// Special cases:
272//
273// If the histogram has 0 observations, NaN is returned.
274//
275// Use a lower bound of -Inf to get the fraction of all observations below the
276// upper bound.
277//
278// Use an upper bound of +Inf to get the fraction of all observations above the
279// lower bound.
280//
281// If lower or upper is NaN, NaN is returned.
282//
283// If lower >= upper and the histogram has at least 1 observation, zero is returned.
284func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float64 {
285if h.Count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
286return math.NaN()
287}
288if lower >= upper {
289return 0
290}
291
292var (
293rank, lowerRank, upperRank float64
294lowerSet, upperSet bool
295it = h.AllBucketIterator()
296)
297for it.Next() {
298b := it.At()
299if b.Lower < 0 && b.Upper > 0 {
300switch {
301case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0:
302// This is the zero bucket and the histogram has only
303// positive buckets. So we consider 0 to be the lower
304// bound.
305b.Lower = 0
306case len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0:
307// This is in the zero bucket and the histogram has only
308// negative buckets. So we consider 0 to be the upper
309// bound.
310b.Upper = 0
311}
312}
313if !lowerSet && b.Lower >= lower {
314lowerRank = rank
315lowerSet = true
316}
317if !upperSet && b.Lower >= upper {
318upperRank = rank
319upperSet = true
320}
321if lowerSet && upperSet {
322break
323}
324if !lowerSet && b.Lower < lower && b.Upper > lower {
325lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower)
326lowerSet = true
327}
328if !upperSet && b.Lower < upper && b.Upper > upper {
329upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower)
330upperSet = true
331}
332if lowerSet && upperSet {
333break
334}
335rank += b.Count
336}
337if !lowerSet || lowerRank > h.Count {
338lowerRank = h.Count
339}
340if !upperSet || upperRank > h.Count {
341upperRank = h.Count
342}
343
344return (upperRank - lowerRank) / h.Count
345}
346
347// coalesceBuckets merges buckets with the same upper bound.
348//
349// The input buckets must be sorted.
350func coalesceBuckets(buckets buckets) buckets {
351last := buckets[0]
352i := 0
353for _, b := range buckets[1:] {
354if b.upperBound == last.upperBound {
355last.count += b.count
356} else {
357buckets[i] = last
358last = b
359i++
360}
361}
362buckets[i] = last
363return buckets[:i+1]
364}
365
366// The assumption that bucket counts increase monotonically with increasing
367// upperBound may be violated during:
368//
369// - Circumstances where data is already inconsistent at the target's side.
370// - Ingestion via the remote write receiver that Prometheus implements.
371// - Optimisation of query execution where precision is sacrificed for other
372// benefits, not by Prometheus but by systems built on top of it.
373// - Circumstances where floating point precision errors accumulate.
374//
375// Monotonicity is usually guaranteed because if a bucket with upper bound
376// u1 has count c1, then any bucket with a higher upper bound u > u1 must
377// have counted all c1 observations and perhaps more, so that c >= c1.
378//
379// bucketQuantile depends on that monotonicity to do a binary search for the
380// bucket with the φ-quantile count, so breaking the monotonicity
381// guarantee causes bucketQuantile() to return undefined (nonsense) results.
382//
383// As a somewhat hacky solution, we first silently ignore any numerically
384// insignificant (relative delta below the requested tolerance and likely to
385// be from floating point precision errors) differences between successive
386// buckets regardless of the direction. Then we calculate the "envelope" of
387// the histogram buckets, essentially removing any decreases in the count
388// between successive buckets.
389//
390// We return a bool to indicate if this monotonicity was forced or not, and
391// another bool to indicate if small deltas were ignored or not.
392func ensureMonotonicAndIgnoreSmallDeltas(buckets buckets, tolerance float64) (bool, bool) {
393var forcedMonotonic, fixedPrecision bool
394prev := buckets[0].count
395for i := 1; i < len(buckets); i++ {
396curr := buckets[i].count // Assumed always positive.
397if curr == prev {
398// No correction needed if the counts are identical between buckets.
399continue
400}
401if almost.Equal(prev, curr, tolerance) {
402// Silently correct numerically insignificant differences from floating
403// point precision errors, regardless of direction.
404// Do not update the 'prev' value as we are ignoring the difference.
405buckets[i].count = prev
406fixedPrecision = true
407continue
408}
409if curr < prev {
410// Force monotonicity by removing any decreases regardless of magnitude.
411// Do not update the 'prev' value as we are ignoring the decrease.
412buckets[i].count = prev
413forcedMonotonic = true
414continue
415}
416prev = curr
417}
418return forcedMonotonic, fixedPrecision
419}
420
421// quantile calculates the given quantile of a vector of samples.
422//
423// The Vector will be sorted.
424// If 'values' has zero elements, NaN is returned.
425// If q==NaN, NaN is returned.
426// If q<0, -Inf is returned.
427// If q>1, +Inf is returned.
428func quantile(q float64, values vectorByValueHeap) float64 {
429if len(values) == 0 || math.IsNaN(q) {
430return math.NaN()
431}
432if q < 0 {
433return math.Inf(-1)
434}
435if q > 1 {
436return math.Inf(+1)
437}
438sort.Sort(values)
439
440n := float64(len(values))
441// When the quantile lies between two samples,
442// we use a weighted average of the two samples.
443rank := q * (n - 1)
444
445lowerIndex := math.Max(0, math.Floor(rank))
446upperIndex := math.Min(n-1, lowerIndex+1)
447
448weight := rank - math.Floor(rank)
449return values[int(lowerIndex)].F*(1-weight) + values[int(upperIndex)].F*weight
450}
451