Dragonfly2
454 строки · 15.6 Кб
1/*
2* Copyright 2020 The Dragonfly Authors
3*
4* Licensed under the Apache License, Version 2.0 (the "License");
5* you may not use this file except in compliance with the License.
6* You may obtain a copy of the License at
7*
8* http://www.apache.org/licenses/LICENSE-2.0
9*
10* Unless required by applicable law or agreed to in writing, software
11* distributed under the License is distributed on an "AS IS" BASIS,
12* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13* See the License for the specific language governing permissions and
14* limitations under the License.
15*/
16
17package metrics18
19import (20"net/http"21
22grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"23"github.com/prometheus/client_golang/prometheus"24"github.com/prometheus/client_golang/prometheus/promauto"25"github.com/prometheus/client_golang/prometheus/promhttp"26"google.golang.org/grpc"27
28"d7y.io/dragonfly/v2/pkg/types"29"d7y.io/dragonfly/v2/scheduler/config"30"d7y.io/dragonfly/v2/version"31)
32
33var (34// HostTrafficUploadType is upload traffic type for host traffic metrics.35HostTrafficUploadType = "upload"36
37// HostTrafficDownloadType is download traffic type for host traffic metrics.38HostTrafficDownloadType = "download"39)
40
41// Variables declared for metrics.
42var (43AnnouncePeerCount = promauto.NewCounter(prometheus.CounterOpts{44Namespace: types.MetricsNamespace,45Subsystem: types.SchedulerMetricsName,46Name: "announce_peer_total",47Help: "Counter of the number of the announcing peer.",48})49
50AnnouncePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{51Namespace: types.MetricsNamespace,52Subsystem: types.SchedulerMetricsName,53Name: "announce_peer_failure_total",54Help: "Counter of the number of failed of the announcing peer.",55})56
57StatPeerCount = promauto.NewCounter(prometheus.CounterOpts{58Namespace: types.MetricsNamespace,59Subsystem: types.SchedulerMetricsName,60Name: "stat_peer_total",61Help: "Counter of the number of the stat peer.",62})63
64StatPeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{65Namespace: types.MetricsNamespace,66Subsystem: types.SchedulerMetricsName,67Name: "stat_peer_failure_total",68Help: "Counter of the number of failed of the stat peer.",69})70
71LeavePeerCount = promauto.NewCounter(prometheus.CounterOpts{72Namespace: types.MetricsNamespace,73Subsystem: types.SchedulerMetricsName,74Name: "leave_peer_total",75Help: "Counter of the number of the leaving peer.",76})77
78LeavePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{79Namespace: types.MetricsNamespace,80Subsystem: types.SchedulerMetricsName,81Name: "leave_peer_failure_total",82Help: "Counter of the number of failed of the leaving peer.",83})84
85ExchangePeerCount = promauto.NewCounter(prometheus.CounterOpts{86Namespace: types.MetricsNamespace,87Subsystem: types.SchedulerMetricsName,88Name: "exchange_peer_total",89Help: "Counter of the number of the exchanging peer.",90})91
92ExchangePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{93Namespace: types.MetricsNamespace,94Subsystem: types.SchedulerMetricsName,95Name: "exchange_peer_failure_total",96Help: "Counter of the number of failed of the exchanging peer.",97})98
99RegisterPeerCount = promauto.NewCounterVec(prometheus.CounterOpts{100Namespace: types.MetricsNamespace,101Subsystem: types.SchedulerMetricsName,102Name: "register_peer_total",103Help: "Counter of the number of the register peer.",104}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})105
106RegisterPeerFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{107Namespace: types.MetricsNamespace,108Subsystem: types.SchedulerMetricsName,109Name: "register_peer_failure_total",110Help: "Counter of the number of failed of the register peer.",111}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})112
113DownloadPeerStartedCount = promauto.NewCounterVec(prometheus.CounterOpts{114Namespace: types.MetricsNamespace,115Subsystem: types.SchedulerMetricsName,116Name: "download_peer_started_total",117Help: "Counter of the number of the download peer started.",118}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})119
120DownloadPeerStartedFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{121Namespace: types.MetricsNamespace,122Subsystem: types.SchedulerMetricsName,123Name: "download_peer_started_failure_total",124Help: "Counter of the number of failed of the download peer started.",125}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})126
127DownloadPeerBackToSourceStartedCount = promauto.NewCounterVec(prometheus.CounterOpts{128Namespace: types.MetricsNamespace,129Subsystem: types.SchedulerMetricsName,130Name: "download_peer_back_to_source_started_total",131Help: "Counter of the number of the download peer back-to-source started.",132}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})133
134DownloadPeerBackToSourceStartedFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{135Namespace: types.MetricsNamespace,136Subsystem: types.SchedulerMetricsName,137Name: "download_peer_back_to_source_started_failure_total",138Help: "Counter of the number of failed of the download peer back-to-source started.",139}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})140
141DownloadPeerCount = promauto.NewCounterVec(prometheus.CounterOpts{142Namespace: types.MetricsNamespace,143Subsystem: types.SchedulerMetricsName,144Name: "download_peer_finished_total",145Help: "Counter of the number of the download peer.",146}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})147
148DownloadPeerFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{149Namespace: types.MetricsNamespace,150Subsystem: types.SchedulerMetricsName,151Name: "download_peer_finished_failure_total",152Help: "Counter of the number of failed of the download peer.",153}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})154
155DownloadPeerBackToSourceFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{156Namespace: types.MetricsNamespace,157Subsystem: types.SchedulerMetricsName,158Name: "download_peer_back_to_source_finished_failure_total",159Help: "Counter of the number of failed of the download peer back-to-source.",160}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})161
162DownloadPieceCount = promauto.NewCounterVec(prometheus.CounterOpts{163Namespace: types.MetricsNamespace,164Subsystem: types.SchedulerMetricsName,165Name: "download_piece_finished_total",166Help: "Counter of the number of the download piece.",167}, []string{"traffic_type", "task_type", "task_tag", "task_app", "host_type"})168
169DownloadPieceFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{170Namespace: types.MetricsNamespace,171Subsystem: types.SchedulerMetricsName,172Name: "download_piece_finished_failure_total",173Help: "Counter of the number of failed of the download piece.",174}, []string{"traffic_type", "task_type", "task_tag", "task_app", "host_type"})175
176DownloadPieceBackToSourceFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{177Namespace: types.MetricsNamespace,178Subsystem: types.SchedulerMetricsName,179Name: "download_piece_back_to_source_finished_failure_total",180Help: "Counter of the number of failed of the download piece back-to-source.",181}, []string{"traffic_type", "task_type", "task_tag", "task_app", "host_type"})182
183StatTaskCount = promauto.NewCounter(prometheus.CounterOpts{184Namespace: types.MetricsNamespace,185Subsystem: types.SchedulerMetricsName,186Name: "stat_task_total",187Help: "Counter of the number of the stat task.",188})189
190StatTaskFailureCount = promauto.NewCounter(prometheus.CounterOpts{191Namespace: types.MetricsNamespace,192Subsystem: types.SchedulerMetricsName,193Name: "stat_task_failure_total",194Help: "Counter of the number of failed of the stat task.",195})196
197AnnounceHostCount = promauto.NewCounterVec(prometheus.CounterOpts{198Namespace: types.MetricsNamespace,199Subsystem: types.SchedulerMetricsName,200Name: "announce_host_total",201Help: "Counter of the number of the announce host.",202}, []string{"os", "platform", "platform_family", "platform_version",203"kernel_version", "git_version", "git_commit", "go_version", "build_platform"})204
205AnnounceHostFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{206Namespace: types.MetricsNamespace,207Subsystem: types.SchedulerMetricsName,208Name: "announce_host_failure_total",209Help: "Counter of the number of failed of the announce host.",210}, []string{"os", "platform", "platform_family", "platform_version",211"kernel_version", "git_version", "git_commit", "go_version", "build_platform"})212
213LeaveHostCount = promauto.NewCounter(prometheus.CounterOpts{214Namespace: types.MetricsNamespace,215Subsystem: types.SchedulerMetricsName,216Name: "leave_host_total",217Help: "Counter of the number of the leaving host.",218})219
220LeaveHostFailureCount = promauto.NewCounter(prometheus.CounterOpts{221Namespace: types.MetricsNamespace,222Subsystem: types.SchedulerMetricsName,223Name: "leave_host_failure_total",224Help: "Counter of the number of failed of the leaving host.",225})226
227SyncProbesCount = promauto.NewCounter(prometheus.CounterOpts{228Namespace: types.MetricsNamespace,229Subsystem: types.SchedulerMetricsName,230Name: "sync_probes_total",231Help: "Counter of the number of the synchronizing probes.",232})233
234SyncProbesFailureCount = promauto.NewCounter(prometheus.CounterOpts{235Namespace: types.MetricsNamespace,236Subsystem: types.SchedulerMetricsName,237Name: "sync_probes_failure_total",238Help: "Counter of the number of failed of the synchronizing probes.",239})240
241Traffic = promauto.NewCounterVec(prometheus.CounterOpts{242Namespace: types.MetricsNamespace,243Subsystem: types.SchedulerMetricsName,244Name: "traffic",245Help: "Counter of the number of traffic.",246}, []string{"type", "task_type", "task_tag", "task_app", "host_type"})247
248HostTraffic = promauto.NewCounterVec(prometheus.CounterOpts{249Namespace: types.MetricsNamespace,250Subsystem: types.SchedulerMetricsName,251Name: "host_traffic",252Help: "Counter of the number of per host traffic.",253}, []string{"type", "task_type", "task_tag", "task_app", "host_type", "host_id", "host_ip", "host_name"})254
255DownloadPeerDuration = promauto.NewSummaryVec(prometheus.SummaryOpts{256Namespace: types.MetricsNamespace,257Subsystem: types.SchedulerMetricsName,258Name: "download_peer_duration_milliseconds",259Help: "Summary of the time each peer downloading.",260Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},261}, []string{"task_size_level"})262
263ConcurrentScheduleGauge = promauto.NewGauge(prometheus.GaugeOpts{264Namespace: types.MetricsNamespace,265Subsystem: types.SchedulerMetricsName,266Name: "concurrent_schedule_total",267Help: "Gauge of the number of concurrent of the scheduling.",268})269
270VersionGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{271Namespace: types.MetricsNamespace,272Subsystem: types.SchedulerMetricsName,273Name: "version",274Help: "Version info of the service.",275}, []string{"major", "minor", "git_version", "git_commit", "platform", "build_time", "go_version", "go_tags", "go_gcflags"})276)
277
278func New(cfg *config.MetricsConfig, svr *grpc.Server) *http.Server {279grpc_prometheus.Register(svr)280
281mux := http.NewServeMux()282mux.Handle("/metrics", promhttp.Handler())283
284VersionGauge.WithLabelValues(version.Major, version.Minor, version.GitVersion, version.GitCommit, version.Platform, version.BuildTime, version.GoVersion, version.Gotags, version.Gogcflags).Set(1)285return &http.Server{286Addr: cfg.Addr,287Handler: mux,288}289}
290
291// TaskSizeLevel is the level of the task size.
292type TaskSizeLevel int293
294// String returns the string representation of the TaskSizeLevel.
295func (t TaskSizeLevel) String() string {296switch t {297case TaskSizeLevel0:298return "0"299case TaskSizeLevel1:300return "1"301case TaskSizeLevel2:302return "2"303case TaskSizeLevel3:304return "3"305case TaskSizeLevel4:306return "4"307case TaskSizeLevel5:308return "5"309case TaskSizeLevel6:310return "6"311case TaskSizeLevel7:312return "7"313case TaskSizeLevel8:314return "8"315case TaskSizeLevel9:316return "9"317case TaskSizeLevel10:318return "10"319case TaskSizeLevel11:320return "11"321case TaskSizeLevel12:322return "12"323case TaskSizeLevel13:324return "13"325case TaskSizeLevel14:326return "14"327case TaskSizeLevel15:328return "15"329case TaskSizeLevel16:330return "16"331case TaskSizeLevel17:332return "17"333case TaskSizeLevel18:334return "18"335case TaskSizeLevel19:336return "19"337case TaskSizeLevel20:338return "20"339default:340return "0"341}342}
343
344const (345// TaskSizeLevel0 represents unknow size.346TaskSizeLevel0 TaskSizeLevel = iota347
348// TaskSizeLevel0 represents size range is from 0 to 1M.349TaskSizeLevel1
350
351// TaskSizeLevel1 represents size range is from 1M to 4M.352TaskSizeLevel2
353
354// TaskSizeLevel2 represents size range is from 4M to 8M.355TaskSizeLevel3
356
357// TaskSizeLevel3 represents size range is from 8M to 16M.358TaskSizeLevel4
359
360// TaskSizeLevel4 represents size range is from 16M to 32M.361TaskSizeLevel5
362
363// TaskSizeLevel5 represents size range is from 32M to 64M.364TaskSizeLevel6
365
366// TaskSizeLevel6 represents size range is from 64M to 128M.367TaskSizeLevel7
368
369// TaskSizeLevel7 represents size range is from 128M to 256M.370TaskSizeLevel8
371
372// TaskSizeLevel8 represents size range is from 256M to 512M.373TaskSizeLevel9
374
375// TaskSizeLevel9 represents size range is from 512M to 1G.376TaskSizeLevel10
377
378// TaskSizeLevel10 represents size range is from 1G to 4G.379TaskSizeLevel11
380
381// TaskSizeLevel11 represents size range is from 4G to 8G.382TaskSizeLevel12
383
384// TaskSizeLevel12 represents size range is from 8G to 16G.385TaskSizeLevel13
386
387// TaskSizeLevel13 represents size range is from 16G to 32G.388TaskSizeLevel14
389
390// TaskSizeLevel14 represents size range is from 32G to 64G.391TaskSizeLevel15
392
393// TaskSizeLevel15 represents size range is from 64G to 128G.394TaskSizeLevel16
395
396// TaskSizeLevel16 represents size range is from 128G to 256G.397TaskSizeLevel17
398
399// TaskSizeLevel17 represents size range is from 256G to 512G.400TaskSizeLevel18
401
402// TaskSizeLevel18 represents size range is from 512G to 1T.403TaskSizeLevel19
404
405// TaskSizeLevel20 represents size is greater than 1T.406TaskSizeLevel20
407)
408
409// CalculateSizeLevel calculates the size level according to the size.
410func CalculateSizeLevel(size int64) TaskSizeLevel {411if size <= 0 {412return TaskSizeLevel0413} else if size < 1024*1024 {414return TaskSizeLevel1415} else if size < 4*1024*1024 {416return TaskSizeLevel2417} else if size < 8*1024*1024 {418return TaskSizeLevel3419} else if size < 16*1024*1024 {420return TaskSizeLevel4421} else if size < 32*1024*1024 {422return TaskSizeLevel5423} else if size < 64*1024*1024 {424return TaskSizeLevel6425} else if size < 128*1024*1024 {426return TaskSizeLevel7427} else if size < 256*1024*1024 {428return TaskSizeLevel8429} else if size < 512*1024*1024 {430return TaskSizeLevel9431} else if size < 1024*1024*1024 {432return TaskSizeLevel10433} else if size < 4*1024*1024*1024 {434return TaskSizeLevel11435} else if size < 8*1024*1024*1024 {436return TaskSizeLevel12437} else if size < 16*1024*1024*1024 {438return TaskSizeLevel13439} else if size < 32*1024*1024*1024 {440return TaskSizeLevel14441} else if size < 64*1024*1024*1024 {442return TaskSizeLevel15443} else if size < 128*1024*1024*1024 {444return TaskSizeLevel16445} else if size < 256*1024*1024*1024 {446return TaskSizeLevel17447} else if size < 512*1024*1024*1024 {448return TaskSizeLevel18449} else if size < 1024*1024*1024*1024 {450return TaskSizeLevel19451} else {452return TaskSizeLevel20453}454}
455