prometheus

quantile.go
450 строк · 14.6 Кб
Перенос по словам
1
// Copyright 2015 The Prometheus Authors
2
// Licensed under the Apache License, Version 2.0 (the "License");
3
// you may not use this file except in compliance with the License.
4
// You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software
9
// distributed under the License is distributed on an "AS IS" BASIS,
10
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
// See the License for the specific language governing permissions and
12
// limitations under the License.
13

14
package promql
15

16
import (
17
	"math"
18
	"slices"
19
	"sort"
20

21
	"github.com/prometheus/prometheus/model/histogram"
22
	"github.com/prometheus/prometheus/model/labels"
23
	"github.com/prometheus/prometheus/util/almost"
24
)
25

26
// smallDeltaTolerance is the threshold for relative deltas between classic
27
// histogram buckets that will be ignored by the histogram_quantile function
28
// because they are most likely artifacts of floating point precision issues.
29
// Testing on 2 sets of real data with bugs arising from small deltas,
30
// the safe ranges were from:
31
// - 1e-05 to 1e-15
32
// - 1e-06 to 1e-15
33
// Anything to the left of that would cause non-query-sharded data to have
34
// small deltas ignored (unnecessary and we should avoid this), and anything
35
// to the right of that would cause query-sharded data to not have its small
36
// deltas ignored (so the problem won't be fixed).
37
// For context, query sharding triggers these float precision errors in Mimir.
38
// To illustrate, with a relative deviation of 1e-12, we need to have 1e12
39
// observations in the bucket so that the change of one observation is small
40
// enough to get ignored. With the usual observation rate even of very busy
41
// services, this will hardly be reached in timeframes that matters for
42
// monitoring.
43
const smallDeltaTolerance = 1e-12
44

45
// Helpers to calculate quantiles.
46

47
// excludedLabels are the labels to exclude from signature calculation for
48
// quantiles.
49
var excludedLabels = []string{
50
	labels.MetricName,
51
	labels.BucketLabel,
52
}
53

54
type bucket struct {
55
	upperBound float64
56
	count      float64
57
}
58

59
// buckets implements sort.Interface.
60
type buckets []bucket
61

62
type metricWithBuckets struct {
63
	metric  labels.Labels
64
	buckets buckets
65
}
66

67
// bucketQuantile calculates the quantile 'q' based on the given buckets. The
68
// buckets will be sorted by upperBound by this function (i.e. no sorting
69
// needed before calling this function). The quantile value is interpolated
70
// assuming a linear distribution within a bucket. However, if the quantile
71
// falls into the highest bucket, the upper bound of the 2nd highest bucket is
72
// returned. A natural lower bound of 0 is assumed if the upper bound of the
73
// lowest bucket is greater 0. In that case, interpolation in the lowest bucket
74
// happens linearly between 0 and the upper bound of the lowest bucket.
75
// However, if the lowest bucket has an upper bound less or equal 0, this upper
76
// bound is returned if the quantile falls into the lowest bucket.
77
//
78
// There are a number of special cases (once we have a way to report errors
79
// happening during evaluations of AST functions, we should report those
80
// explicitly):
81
//
82
// If 'buckets' has 0 observations, NaN is returned.
83
//
84
// If 'buckets' has fewer than 2 elements, NaN is returned.
85
//
86
// If the highest bucket is not +Inf, NaN is returned.
87
//
88
// If q==NaN, NaN is returned.
89
//
90
// If q<0, -Inf is returned.
91
//
92
// If q>1, +Inf is returned.
93
//
94
// We also return a bool to indicate if monotonicity needed to be forced,
95
// and another bool to indicate if small differences between buckets (that
96
// are likely artifacts of floating point precision issues) have been
97
// ignored.
98
func bucketQuantile(q float64, buckets buckets) (float64, bool, bool) {
99
	if math.IsNaN(q) {
100
		return math.NaN(), false, false
101
	}
102
	if q < 0 {
103
		return math.Inf(-1), false, false
104
	}
105
	if q > 1 {
106
		return math.Inf(+1), false, false
107
	}
108
	slices.SortFunc(buckets, func(a, b bucket) int {
109
		// We don't expect the bucket boundary to be a NaN.
110
		if a.upperBound < b.upperBound {
111
			return -1
112
		}
113
		if a.upperBound > b.upperBound {
114
			return +1
115
		}
116
		return 0
117
	})
118
	if !math.IsInf(buckets[len(buckets)-1].upperBound, +1) {
119
		return math.NaN(), false, false
120
	}
121

122
	buckets = coalesceBuckets(buckets)
123
	forcedMonotonic, fixedPrecision := ensureMonotonicAndIgnoreSmallDeltas(buckets, smallDeltaTolerance)
124

125
	if len(buckets) < 2 {
126
		return math.NaN(), false, false
127
	}
128
	observations := buckets[len(buckets)-1].count
129
	if observations == 0 {
130
		return math.NaN(), false, false
131
	}
132
	rank := q * observations
133
	b := sort.Search(len(buckets)-1, func(i int) bool { return buckets[i].count >= rank })
134

135
	if b == len(buckets)-1 {
136
		return buckets[len(buckets)-2].upperBound, forcedMonotonic, fixedPrecision
137
	}
138
	if b == 0 && buckets[0].upperBound <= 0 {
139
		return buckets[0].upperBound, forcedMonotonic, fixedPrecision
140
	}
141
	var (
142
		bucketStart float64
143
		bucketEnd   = buckets[b].upperBound
144
		count       = buckets[b].count
145
	)
146
	if b > 0 {
147
		bucketStart = buckets[b-1].upperBound
148
		count -= buckets[b-1].count
149
		rank -= buckets[b-1].count
150
	}
151
	return bucketStart + (bucketEnd-bucketStart)*(rank/count), forcedMonotonic, fixedPrecision
152
}
153

154
// histogramQuantile calculates the quantile 'q' based on the given histogram.
155
//
156
// The quantile value is interpolated assuming a linear distribution within a
157
// bucket.
158
// TODO(beorn7): Find an interpolation method that is a better fit for
159
// exponential buckets (and think about configurable interpolation).
160
//
161
// A natural lower bound of 0 is assumed if the histogram has only positive
162
// buckets. Likewise, a natural upper bound of 0 is assumed if the histogram has
163
// only negative buckets.
164
// TODO(beorn7): Come to terms if we want that.
165
//
166
// There are a number of special cases (once we have a way to report errors
167
// happening during evaluations of AST functions, we should report those
168
// explicitly):
169
//
170
// If the histogram has 0 observations, NaN is returned.
171
//
172
// If q<0, -Inf is returned.
173
//
174
// If q>1, +Inf is returned.
175
//
176
// If q is NaN, NaN is returned.
177
func histogramQuantile(q float64, h *histogram.FloatHistogram) float64 {
178
	if q < 0 {
179
		return math.Inf(-1)
180
	}
181
	if q > 1 {
182
		return math.Inf(+1)
183
	}
184

185
	if h.Count == 0 || math.IsNaN(q) {
186
		return math.NaN()
187
	}
188

189
	var (
190
		bucket histogram.Bucket[float64]
191
		count  float64
192
		it     histogram.BucketIterator[float64]
193
		rank   float64
194
	)
195

196
	// if there are NaN observations in the histogram (h.Sum is NaN), use the forward iterator
197
	// if the q < 0.5, use the forward iterator
198
	// if the q >= 0.5, use the reverse iterator
199
	if math.IsNaN(h.Sum) || q < 0.5 {
200
		it = h.AllBucketIterator()
201
		rank = q * h.Count
202
	} else {
203
		it = h.AllReverseBucketIterator()
204
		rank = (1 - q) * h.Count
205
	}
206

207
	for it.Next() {
208
		bucket = it.At()
209
		count += bucket.Count
210
		if count >= rank {
211
			break
212
		}
213
	}
214
	if bucket.Lower < 0 && bucket.Upper > 0 {
215
		switch {
216
		case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0:
217
			// The result is in the zero bucket and the histogram has only
218
			// positive buckets. So we consider 0 to be the lower bound.
219
			bucket.Lower = 0
220
		case len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0:
221
			// The result is in the zero bucket and the histogram has only
222
			// negative buckets. So we consider 0 to be the upper bound.
223
			bucket.Upper = 0
224
		}
225
	}
226
	// Due to numerical inaccuracies, we could end up with a higher count
227
	// than h.Count. Thus, make sure count is never higher than h.Count.
228
	if count > h.Count {
229
		count = h.Count
230
	}
231
	// We could have hit the highest bucket without even reaching the rank
232
	// (this should only happen if the histogram contains observations of
233
	// the value NaN), in which case we simply return the upper limit of the
234
	// highest explicit bucket.
235
	if count < rank {
236
		return bucket.Upper
237
	}
238

239
	// NaN observations increase h.Count but not the total number of
240
	// observations in the buckets. Therefore, we have to use the forward
241
	// iterator to find percentiles. We recognize histograms containing NaN
242
	// observations by checking if their h.Sum is NaN.
243
	if math.IsNaN(h.Sum) || q < 0.5 {
244
		rank -= count - bucket.Count
245
	} else {
246
		rank = count - rank
247
	}
248

249
	// TODO(codesome): Use a better estimation than linear.
250
	return bucket.Lower + (bucket.Upper-bucket.Lower)*(rank/bucket.Count)
251
}
252

253
// histogramFraction calculates the fraction of observations between the
254
// provided lower and upper bounds, based on the provided histogram.
255
//
256
// histogramFraction is in a certain way the inverse of histogramQuantile.  If
257
// histogramQuantile(0.9, h) returns 123.4, then histogramFraction(-Inf, 123.4, h)
258
// returns 0.9.
259
//
260
// The same notes (and TODOs) with regard to interpolation and assumptions about
261
// the zero bucket boundaries apply as for histogramQuantile.
262
//
263
// Whether either boundary is inclusive or exclusive doesn’t actually matter as
264
// long as interpolation has to be performed anyway. In the case of a boundary
265
// coinciding with a bucket boundary, the inclusive or exclusive nature of the
266
// boundary determines the exact behavior of the threshold. With the current
267
// implementation, that means that lower is exclusive for positive values and
268
// inclusive for negative values, while upper is inclusive for positive values
269
// and exclusive for negative values.
270
//
271
// Special cases:
272
//
273
// If the histogram has 0 observations, NaN is returned.
274
//
275
// Use a lower bound of -Inf to get the fraction of all observations below the
276
// upper bound.
277
//
278
// Use an upper bound of +Inf to get the fraction of all observations above the
279
// lower bound.
280
//
281
// If lower or upper is NaN, NaN is returned.
282
//
283
// If lower >= upper and the histogram has at least 1 observation, zero is returned.
284
func histogramFraction(lower, upper float64, h *histogram.FloatHistogram) float64 {
285
	if h.Count == 0 || math.IsNaN(lower) || math.IsNaN(upper) {
286
		return math.NaN()
287
	}
288
	if lower >= upper {
289
		return 0
290
	}
291

292
	var (
293
		rank, lowerRank, upperRank float64
294
		lowerSet, upperSet         bool
295
		it                         = h.AllBucketIterator()
296
	)
297
	for it.Next() {
298
		b := it.At()
299
		if b.Lower < 0 && b.Upper > 0 {
300
			switch {
301
			case len(h.NegativeBuckets) == 0 && len(h.PositiveBuckets) > 0:
302
				// This is the zero bucket and the histogram has only
303
				// positive buckets. So we consider 0 to be the lower
304
				// bound.
305
				b.Lower = 0
306
			case len(h.PositiveBuckets) == 0 && len(h.NegativeBuckets) > 0:
307
				// This is in the zero bucket and the histogram has only
308
				// negative buckets. So we consider 0 to be the upper
309
				// bound.
310
				b.Upper = 0
311
			}
312
		}
313
		if !lowerSet && b.Lower >= lower {
314
			lowerRank = rank
315
			lowerSet = true
316
		}
317
		if !upperSet && b.Lower >= upper {
318
			upperRank = rank
319
			upperSet = true
320
		}
321
		if lowerSet && upperSet {
322
			break
323
		}
324
		if !lowerSet && b.Lower < lower && b.Upper > lower {
325
			lowerRank = rank + b.Count*(lower-b.Lower)/(b.Upper-b.Lower)
326
			lowerSet = true
327
		}
328
		if !upperSet && b.Lower < upper && b.Upper > upper {
329
			upperRank = rank + b.Count*(upper-b.Lower)/(b.Upper-b.Lower)
330
			upperSet = true
331
		}
332
		if lowerSet && upperSet {
333
			break
334
		}
335
		rank += b.Count
336
	}
337
	if !lowerSet || lowerRank > h.Count {
338
		lowerRank = h.Count
339
	}
340
	if !upperSet || upperRank > h.Count {
341
		upperRank = h.Count
342
	}
343

344
	return (upperRank - lowerRank) / h.Count
345
}
346

347
// coalesceBuckets merges buckets with the same upper bound.
348
//
349
// The input buckets must be sorted.
350
func coalesceBuckets(buckets buckets) buckets {
351
	last := buckets[0]
352
	i := 0
353
	for _, b := range buckets[1:] {
354
		if b.upperBound == last.upperBound {
355
			last.count += b.count
356
		} else {
357
			buckets[i] = last
358
			last = b
359
			i++
360
		}
361
	}
362
	buckets[i] = last
363
	return buckets[:i+1]
364
}
365

366
// The assumption that bucket counts increase monotonically with increasing
367
// upperBound may be violated during:
368
//
369
//   - Circumstances where data is already inconsistent at the target's side.
370
//   - Ingestion via the remote write receiver that Prometheus implements.
371
//   - Optimisation of query execution where precision is sacrificed for other
372
//     benefits, not by Prometheus but by systems built on top of it.
373
//   - Circumstances where floating point precision errors accumulate.
374
//
375
// Monotonicity is usually guaranteed because if a bucket with upper bound
376
// u1 has count c1, then any bucket with a higher upper bound u > u1 must
377
// have counted all c1 observations and perhaps more, so that c >= c1.
378
//
379
// bucketQuantile depends on that monotonicity to do a binary search for the
380
// bucket with the φ-quantile count, so breaking the monotonicity
381
// guarantee causes bucketQuantile() to return undefined (nonsense) results.
382
//
383
// As a somewhat hacky solution, we first silently ignore any numerically
384
// insignificant (relative delta below the requested tolerance and likely to
385
// be from floating point precision errors) differences between successive
386
// buckets regardless of the direction. Then we calculate the "envelope" of
387
// the histogram buckets, essentially removing any decreases in the count
388
// between successive buckets.
389
//
390
// We return a bool to indicate if this monotonicity was forced or not, and
391
// another bool to indicate if small deltas were ignored or not.
392
func ensureMonotonicAndIgnoreSmallDeltas(buckets buckets, tolerance float64) (bool, bool) {
393
	var forcedMonotonic, fixedPrecision bool
394
	prev := buckets[0].count
395
	for i := 1; i < len(buckets); i++ {
396
		curr := buckets[i].count // Assumed always positive.
397
		if curr == prev {
398
			// No correction needed if the counts are identical between buckets.
399
			continue
400
		}
401
		if almost.Equal(prev, curr, tolerance) {
402
			// Silently correct numerically insignificant differences from floating
403
			// point precision errors, regardless of direction.
404
			// Do not update the 'prev' value as we are ignoring the difference.
405
			buckets[i].count = prev
406
			fixedPrecision = true
407
			continue
408
		}
409
		if curr < prev {
410
			// Force monotonicity by removing any decreases regardless of magnitude.
411
			// Do not update the 'prev' value as we are ignoring the decrease.
412
			buckets[i].count = prev
413
			forcedMonotonic = true
414
			continue
415
		}
416
		prev = curr
417
	}
418
	return forcedMonotonic, fixedPrecision
419
}
420

421
// quantile calculates the given quantile of a vector of samples.
422
//
423
// The Vector will be sorted.
424
// If 'values' has zero elements, NaN is returned.
425
// If q==NaN, NaN is returned.
426
// If q<0, -Inf is returned.
427
// If q>1, +Inf is returned.
428
func quantile(q float64, values vectorByValueHeap) float64 {
429
	if len(values) == 0 || math.IsNaN(q) {
430
		return math.NaN()
431
	}
432
	if q < 0 {
433
		return math.Inf(-1)
434
	}
435
	if q > 1 {
436
		return math.Inf(+1)
437
	}
438
	sort.Sort(values)
439

440
	n := float64(len(values))
441
	// When the quantile lies between two samples,
442
	// we use a weighted average of the two samples.
443
	rank := q * (n - 1)
444

445
	lowerIndex := math.Max(0, math.Floor(rank))
446
	upperIndex := math.Min(n-1, lowerIndex+1)
447

448
	weight := rank - math.Floor(rank)
449
	return values[int(lowerIndex)].F*(1-weight) + values[int(upperIndex)].F*weight
450
}
451
prometheus

Использование cookies