cubefs

Форк
0
/
prometheus_metric.go 
257 строк · 8.5 Кб
1
// Copyright 2022 The CubeFS Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12
// implied. See the License for the specific language governing
13
// permissions and limitations under the License.
14

15
package auditlog
16

17
import (
18
	"fmt"
19
	"os"
20
	"sort"
21
	"strconv"
22
	"strings"
23

24
	"github.com/cubefs/cubefs/blobstore/util/log"
25
	"github.com/prometheus/client_golang/prometheus"
26
)
27

28
var (
29
	Buckets = []float64{1, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000}
30

31
	hostLabel = "host"
32
	uidKey    = "uid"
33
)
34

35
var MetricLabelNames = []string{hostLabel, "service", "team", "tag", "api", "method", "code", "reqlength", "resplength", "deleteAfterDays", "idc", "xwarn", "country", "region", "isp"}
36

37
type PrometheusConfig struct {
38
	Idc     string `json:"idc"`
39
	Service string `json:"service"`
40
	Tag     string `json:"tag"`
41
	Team    string `json:"team"`
42

43
	SetDefaultSwitch    bool            `json:"set_default_switch"`
44
	EnableHttpMethod    bool            `json:"enable_http_method"`
45
	DisableApi          bool            `json:"disable_api"`
46
	EnableReqLengthCnt  bool            `json:"enable_req_length_cnt"`
47
	EnableRespLengthCnt bool            `json:"enable_resp_length_cnt"`
48
	EnableRespDuration  bool            `json:"enable_resp_duration"`
49
	EnableXWarnCnt      bool            `json:"enable_xwarn_cnt"`
50
	MaxApiLevel         int             `json:"max_api_level"`
51
	XWarns              []string        `json:"xwarns"`
52
	ErrCodes            map[string]bool `json:"resp_err_codes"`
53
	SizeBuckets         []int64         `json:"size_buckets"`
54
}
55

56
type PrometheusSender struct {
57
	hostname                string
58
	logParser               func(string) (LogEntry, error)
59
	responseCodeCounter     *prometheus.CounterVec
60
	responseErrCodeCounter  *prometheus.CounterVec
61
	responseDurationCounter *prometheus.HistogramVec
62
	responseLengthCounter   *prometheus.CounterVec
63
	requestLengthCounter    *prometheus.CounterVec
64
	xwarnCounter            *prometheus.CounterVec
65

66
	PrometheusConfig
67
}
68

69
func NewPrometheusSender(conf PrometheusConfig) (ps *PrometheusSender) {
70
	constLabels := map[string]string{"idc": conf.Idc}
71
	subsystem := "service"
72
	responseCodeCounter := getResponseCounterVec(subsystem, constLabels)
73
	responseErrCodeCounter := getResponseErrCounterVec(subsystem, constLabels)
74
	responseDurationCounter := getResponseDurationVec(subsystem, constLabels)
75
	responseLengthCounter := getResponseLengthCounterVec(subsystem, constLabels)
76
	requestLengthCounter := getRequestLengthCounterVec(subsystem, constLabels)
77
	xwarnCounter := getXwarnCountVec(constLabels)
78

79
	prometheus.MustRegister(responseCodeCounter)
80
	prometheus.MustRegister(responseErrCodeCounter)
81
	prometheus.MustRegister(responseDurationCounter)
82
	prometheus.MustRegister(responseLengthCounter)
83
	prometheus.MustRegister(requestLengthCounter)
84
	prometheus.MustRegister(xwarnCounter)
85

86
	hostname, err := os.Hostname()
87
	if err != nil {
88
		panic(fmt.Sprintf("get hostname failed, err: %s", err.Error()))
89
	}
90

91
	ps = &PrometheusSender{
92
		hostname:                hostname,
93
		PrometheusConfig:        conf,
94
		logParser:               ParseReqlog,
95
		responseCodeCounter:     responseCodeCounter,
96
		responseErrCodeCounter:  responseErrCodeCounter,
97
		responseDurationCounter: responseDurationCounter,
98
		responseLengthCounter:   responseLengthCounter,
99
		requestLengthCounter:    requestLengthCounter,
100
		xwarnCounter:            xwarnCounter,
101
	}
102
	return ps
103
}
104

105
// Send inherit from Sender
106
func (ps *PrometheusSender) Send(raw []byte) error {
107
	line := string(raw)
108
	ps.parseLine(line, ps.hostname)
109
	return nil
110
}
111

112
func (ps *PrometheusSender) parseLine(line, host string) {
113
	entry, err := ps.logParser(line)
114
	if err != nil {
115
		log.Debugf("logParser failed, %s, %s", line, err.Error())
116
		return
117
	}
118

119
	method := entry.Method()
120
	service := entry.Service()
121
	api := ""
122
	if !ps.DisableApi {
123
		params := entry.ReqParams()
124
		api = apiName(service, method, entry.Path(), entry.ReqHost(), params, ps.MaxApiLevel, entry.ApiName())
125
	}
126

127
	var tags []string
128
	if ps.Tag != "" {
129
		tags = strings.Split(ps.Tag, ",")
130
	}
131
	tmp := genXlogTags(service, entry.Xlogs(), entry.RespLength())
132
	tags = append(tags, tmp...)
133
	tags = sortAndUniq(tags)
134
	tag := strings.Join(tags, ",")
135
	statusCode := entry.Code()
136

137
	ps.responseCodeCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, statusCode).Inc()
138
	if ps.isErrCode(statusCode) {
139
		uid := strconv.FormatUint(uint64(entry.Uid()), 10)
140
		ps.responseErrCodeCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, uid, statusCode).Inc()
141
	}
142

143
	requestLength := entry.ReqLength()
144
	responseLength := entry.RespLength()
145
	if ps.EnableReqLengthCnt {
146
		ps.requestLengthCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, statusCode).Add(float64(requestLength))
147
	}
148
	if ps.EnableRespLengthCnt {
149
		ps.responseLengthCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, statusCode).Add(float64(responseLength))
150
	}
151
	if ps.EnableRespDuration && (strings.HasPrefix(statusCode, "2") || statusCode == "499") {
152
		reqlengthTag := ps.getSizeTag(requestLength)
153
		resplengthTag := ps.getSizeTag(responseLength)
154
		respTimeMs := float64(entry.RespTime()) / 1e4
155
		ps.responseDurationCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, statusCode, reqlengthTag, resplengthTag).Observe(respTimeMs)
156
	}
157
	if ps.EnableXWarnCnt {
158
		adRow := entry.(*RequestRow)
159
		xwarns := adRow.XWarns()
160
		if len(xwarns) != 0 {
161
			hits := hitXWarns(xwarns, ps.XWarns)
162
			if len(hits) != 0 {
163
				for _, hit := range hits {
164
					ps.xwarnCounter.WithLabelValues(host, ps.Service, ps.Team, hit, statusCode).Add(1)
165
				}
166
			}
167
		}
168
	}
169
}
170

171
func (ps *PrometheusSender) isErrCode(code string) bool {
172
	return ps.ErrCodes[code]
173
}
174

175
func (ps *PrometheusSender) getSizeTag(size int64) string {
176
	if len(ps.SizeBuckets) == 0 {
177
		return ""
178
	}
179
	i := sort.Search(len(ps.SizeBuckets), func(i int) bool {
180
		return ps.SizeBuckets[i] >= size
181
	})
182
	if i == 0 {
183
		return "0"
184
	}
185
	if i >= len(ps.SizeBuckets) {
186
		return strconv.FormatInt(ps.SizeBuckets[i-1], 10) + "_"
187
	}
188
	return strconv.FormatInt(ps.SizeBuckets[i-1], 10) + "_" + strconv.FormatInt(ps.SizeBuckets[i], 10)
189
}
190

191
func getResponseCounterVec(logtype string, constLabels map[string]string) *prometheus.CounterVec {
192
	return prometheus.NewCounterVec(
193
		prometheus.CounterOpts{
194
			Name:        logtype + "_response_code",
195
			Help:        logtype + " response code",
196
			ConstLabels: constLabels,
197
		},
198
		[]string{hostLabel, "service", "team", "tag", "api", "method", "code"},
199
	)
200
}
201

202
func getResponseErrCounterVec(logtype string, constLabels map[string]string) *prometheus.CounterVec {
203
	return prometheus.NewCounterVec(
204
		prometheus.CounterOpts{
205
			Name:        logtype + "_response_err_code",
206
			Help:        logtype + " response err code",
207
			ConstLabels: constLabels,
208
		},
209
		[]string{hostLabel, "service", "team", "tag", "api", "method", uidKey, "code"},
210
	)
211
}
212

213
func getRequestLengthCounterVec(logtype string, constLabels map[string]string) *prometheus.CounterVec {
214
	return prometheus.NewCounterVec(
215
		prometheus.CounterOpts{
216
			Name:        logtype + "_request_length",
217
			Help:        logtype + " request length",
218
			ConstLabels: constLabels,
219
		},
220
		[]string{hostLabel, "service", "team", "tag", "api", "method", "code"},
221
	)
222
}
223

224
func getResponseLengthCounterVec(logtype string, constLabels map[string]string) *prometheus.CounterVec {
225
	return prometheus.NewCounterVec(
226
		prometheus.CounterOpts{
227
			Name:        logtype + "_response_length",
228
			Help:        logtype + " response length",
229
			ConstLabels: constLabels,
230
		},
231
		[]string{hostLabel, "service", "team", "tag", "api", "method", "code"},
232
	)
233
}
234

235
func getResponseDurationVec(logtype string, constLabels map[string]string) *prometheus.HistogramVec {
236
	buckets := Buckets
237
	return prometheus.NewHistogramVec(
238
		prometheus.HistogramOpts{
239
			Name:        logtype + "_response_duration_ms",
240
			Help:        logtype + " response duration ms",
241
			Buckets:     buckets,
242
			ConstLabels: constLabels,
243
		},
244
		[]string{hostLabel, "service", "team", "tag", "api", "method", "code", "reqlength", "resplength"},
245
	)
246
}
247

248
func getXwarnCountVec(constLabels map[string]string) *prometheus.CounterVec {
249
	return prometheus.NewCounterVec(
250
		prometheus.CounterOpts{
251
			Name:        "service_xwarn_count",
252
			Help:        "service xwarn count",
253
			ConstLabels: constLabels,
254
		},
255
		[]string{hostLabel, "service", "team", "xwarn", "code"},
256
	)
257
}
258

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.