1
// Copyright 2022 The CubeFS Authors.
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
7
// http://www.apache.org/licenses/LICENSE-2.0
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12
// implied. See the License for the specific language governing
13
// permissions and limitations under the License.
24
"github.com/cubefs/cubefs/blobstore/util/log"
25
"github.com/prometheus/client_golang/prometheus"
29
Buckets = []float64{1, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000}
35
var MetricLabelNames = []string{hostLabel, "service", "team", "tag", "api", "method", "code", "reqlength", "resplength", "deleteAfterDays", "idc", "xwarn", "country", "region", "isp"}
37
type PrometheusConfig struct {
38
Idc string `json:"idc"`
39
Service string `json:"service"`
40
Tag string `json:"tag"`
41
Team string `json:"team"`
43
SetDefaultSwitch bool `json:"set_default_switch"`
44
EnableHttpMethod bool `json:"enable_http_method"`
45
DisableApi bool `json:"disable_api"`
46
EnableReqLengthCnt bool `json:"enable_req_length_cnt"`
47
EnableRespLengthCnt bool `json:"enable_resp_length_cnt"`
48
EnableRespDuration bool `json:"enable_resp_duration"`
49
EnableXWarnCnt bool `json:"enable_xwarn_cnt"`
50
MaxApiLevel int `json:"max_api_level"`
51
XWarns []string `json:"xwarns"`
52
ErrCodes map[string]bool `json:"resp_err_codes"`
53
SizeBuckets []int64 `json:"size_buckets"`
56
type PrometheusSender struct {
58
logParser func(string) (LogEntry, error)
59
responseCodeCounter *prometheus.CounterVec
60
responseErrCodeCounter *prometheus.CounterVec
61
responseDurationCounter *prometheus.HistogramVec
62
responseLengthCounter *prometheus.CounterVec
63
requestLengthCounter *prometheus.CounterVec
64
xwarnCounter *prometheus.CounterVec
69
func NewPrometheusSender(conf PrometheusConfig) (ps *PrometheusSender) {
70
constLabels := map[string]string{"idc": conf.Idc}
71
subsystem := "service"
72
responseCodeCounter := getResponseCounterVec(subsystem, constLabels)
73
responseErrCodeCounter := getResponseErrCounterVec(subsystem, constLabels)
74
responseDurationCounter := getResponseDurationVec(subsystem, constLabels)
75
responseLengthCounter := getResponseLengthCounterVec(subsystem, constLabels)
76
requestLengthCounter := getRequestLengthCounterVec(subsystem, constLabels)
77
xwarnCounter := getXwarnCountVec(constLabels)
79
prometheus.MustRegister(responseCodeCounter)
80
prometheus.MustRegister(responseErrCodeCounter)
81
prometheus.MustRegister(responseDurationCounter)
82
prometheus.MustRegister(responseLengthCounter)
83
prometheus.MustRegister(requestLengthCounter)
84
prometheus.MustRegister(xwarnCounter)
86
hostname, err := os.Hostname()
88
panic(fmt.Sprintf("get hostname failed, err: %s", err.Error()))
91
ps = &PrometheusSender{
93
PrometheusConfig: conf,
94
logParser: ParseReqlog,
95
responseCodeCounter: responseCodeCounter,
96
responseErrCodeCounter: responseErrCodeCounter,
97
responseDurationCounter: responseDurationCounter,
98
responseLengthCounter: responseLengthCounter,
99
requestLengthCounter: requestLengthCounter,
100
xwarnCounter: xwarnCounter,
105
// Send inherit from Sender
106
func (ps *PrometheusSender) Send(raw []byte) error {
108
ps.parseLine(line, ps.hostname)
112
func (ps *PrometheusSender) parseLine(line, host string) {
113
entry, err := ps.logParser(line)
115
log.Debugf("logParser failed, %s, %s", line, err.Error())
119
method := entry.Method()
120
service := entry.Service()
123
params := entry.ReqParams()
124
api = apiName(service, method, entry.Path(), entry.ReqHost(), params, ps.MaxApiLevel, entry.ApiName())
129
tags = strings.Split(ps.Tag, ",")
131
tmp := genXlogTags(service, entry.Xlogs(), entry.RespLength())
132
tags = append(tags, tmp...)
133
tags = sortAndUniq(tags)
134
tag := strings.Join(tags, ",")
135
statusCode := entry.Code()
137
ps.responseCodeCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, statusCode).Inc()
138
if ps.isErrCode(statusCode) {
139
uid := strconv.FormatUint(uint64(entry.Uid()), 10)
140
ps.responseErrCodeCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, uid, statusCode).Inc()
143
requestLength := entry.ReqLength()
144
responseLength := entry.RespLength()
145
if ps.EnableReqLengthCnt {
146
ps.requestLengthCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, statusCode).Add(float64(requestLength))
148
if ps.EnableRespLengthCnt {
149
ps.responseLengthCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, statusCode).Add(float64(responseLength))
151
if ps.EnableRespDuration && (strings.HasPrefix(statusCode, "2") || statusCode == "499") {
152
reqlengthTag := ps.getSizeTag(requestLength)
153
resplengthTag := ps.getSizeTag(responseLength)
154
respTimeMs := float64(entry.RespTime()) / 1e4
155
ps.responseDurationCounter.WithLabelValues(host, ps.Service, ps.Team, tag, api, method, statusCode, reqlengthTag, resplengthTag).Observe(respTimeMs)
157
if ps.EnableXWarnCnt {
158
adRow := entry.(*RequestRow)
159
xwarns := adRow.XWarns()
160
if len(xwarns) != 0 {
161
hits := hitXWarns(xwarns, ps.XWarns)
163
for _, hit := range hits {
164
ps.xwarnCounter.WithLabelValues(host, ps.Service, ps.Team, hit, statusCode).Add(1)
171
func (ps *PrometheusSender) isErrCode(code string) bool {
172
return ps.ErrCodes[code]
175
func (ps *PrometheusSender) getSizeTag(size int64) string {
176
if len(ps.SizeBuckets) == 0 {
179
i := sort.Search(len(ps.SizeBuckets), func(i int) bool {
180
return ps.SizeBuckets[i] >= size
185
if i >= len(ps.SizeBuckets) {
186
return strconv.FormatInt(ps.SizeBuckets[i-1], 10) + "_"
188
return strconv.FormatInt(ps.SizeBuckets[i-1], 10) + "_" + strconv.FormatInt(ps.SizeBuckets[i], 10)
191
func getResponseCounterVec(logtype string, constLabels map[string]string) *prometheus.CounterVec {
192
return prometheus.NewCounterVec(
193
prometheus.CounterOpts{
194
Name: logtype + "_response_code",
195
Help: logtype + " response code",
196
ConstLabels: constLabels,
198
[]string{hostLabel, "service", "team", "tag", "api", "method", "code"},
202
func getResponseErrCounterVec(logtype string, constLabels map[string]string) *prometheus.CounterVec {
203
return prometheus.NewCounterVec(
204
prometheus.CounterOpts{
205
Name: logtype + "_response_err_code",
206
Help: logtype + " response err code",
207
ConstLabels: constLabels,
209
[]string{hostLabel, "service", "team", "tag", "api", "method", uidKey, "code"},
213
func getRequestLengthCounterVec(logtype string, constLabels map[string]string) *prometheus.CounterVec {
214
return prometheus.NewCounterVec(
215
prometheus.CounterOpts{
216
Name: logtype + "_request_length",
217
Help: logtype + " request length",
218
ConstLabels: constLabels,
220
[]string{hostLabel, "service", "team", "tag", "api", "method", "code"},
224
func getResponseLengthCounterVec(logtype string, constLabels map[string]string) *prometheus.CounterVec {
225
return prometheus.NewCounterVec(
226
prometheus.CounterOpts{
227
Name: logtype + "_response_length",
228
Help: logtype + " response length",
229
ConstLabels: constLabels,
231
[]string{hostLabel, "service", "team", "tag", "api", "method", "code"},
235
func getResponseDurationVec(logtype string, constLabels map[string]string) *prometheus.HistogramVec {
237
return prometheus.NewHistogramVec(
238
prometheus.HistogramOpts{
239
Name: logtype + "_response_duration_ms",
240
Help: logtype + " response duration ms",
242
ConstLabels: constLabels,
244
[]string{hostLabel, "service", "team", "tag", "api", "method", "code", "reqlength", "resplength"},
248
func getXwarnCountVec(constLabels map[string]string) *prometheus.CounterVec {
249
return prometheus.NewCounterVec(
250
prometheus.CounterOpts{
251
Name: "service_xwarn_count",
252
Help: "service xwarn count",
253
ConstLabels: constLabels,
255
[]string{hostLabel, "service", "team", "xwarn", "code"},