Dragonfly2

Форк
0
454 строки · 15.6 Кб
1
/*
2
 *     Copyright 2020 The Dragonfly Authors
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at
7
 *
8
 *      http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16

17
package metrics
18

19
import (
20
	"net/http"
21

22
	grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
23
	"github.com/prometheus/client_golang/prometheus"
24
	"github.com/prometheus/client_golang/prometheus/promauto"
25
	"github.com/prometheus/client_golang/prometheus/promhttp"
26
	"google.golang.org/grpc"
27

28
	"d7y.io/dragonfly/v2/pkg/types"
29
	"d7y.io/dragonfly/v2/scheduler/config"
30
	"d7y.io/dragonfly/v2/version"
31
)
32

33
var (
34
	// HostTrafficUploadType is upload traffic type for host traffic metrics.
35
	HostTrafficUploadType = "upload"
36

37
	// HostTrafficDownloadType is download traffic type for host traffic metrics.
38
	HostTrafficDownloadType = "download"
39
)
40

41
// Variables declared for metrics.
42
var (
43
	AnnouncePeerCount = promauto.NewCounter(prometheus.CounterOpts{
44
		Namespace: types.MetricsNamespace,
45
		Subsystem: types.SchedulerMetricsName,
46
		Name:      "announce_peer_total",
47
		Help:      "Counter of the number of the announcing peer.",
48
	})
49

50
	AnnouncePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{
51
		Namespace: types.MetricsNamespace,
52
		Subsystem: types.SchedulerMetricsName,
53
		Name:      "announce_peer_failure_total",
54
		Help:      "Counter of the number of failed of the announcing peer.",
55
	})
56

57
	StatPeerCount = promauto.NewCounter(prometheus.CounterOpts{
58
		Namespace: types.MetricsNamespace,
59
		Subsystem: types.SchedulerMetricsName,
60
		Name:      "stat_peer_total",
61
		Help:      "Counter of the number of the stat peer.",
62
	})
63

64
	StatPeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{
65
		Namespace: types.MetricsNamespace,
66
		Subsystem: types.SchedulerMetricsName,
67
		Name:      "stat_peer_failure_total",
68
		Help:      "Counter of the number of failed of the stat peer.",
69
	})
70

71
	LeavePeerCount = promauto.NewCounter(prometheus.CounterOpts{
72
		Namespace: types.MetricsNamespace,
73
		Subsystem: types.SchedulerMetricsName,
74
		Name:      "leave_peer_total",
75
		Help:      "Counter of the number of the leaving peer.",
76
	})
77

78
	LeavePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{
79
		Namespace: types.MetricsNamespace,
80
		Subsystem: types.SchedulerMetricsName,
81
		Name:      "leave_peer_failure_total",
82
		Help:      "Counter of the number of failed of the leaving peer.",
83
	})
84

85
	ExchangePeerCount = promauto.NewCounter(prometheus.CounterOpts{
86
		Namespace: types.MetricsNamespace,
87
		Subsystem: types.SchedulerMetricsName,
88
		Name:      "exchange_peer_total",
89
		Help:      "Counter of the number of the exchanging peer.",
90
	})
91

92
	ExchangePeerFailureCount = promauto.NewCounter(prometheus.CounterOpts{
93
		Namespace: types.MetricsNamespace,
94
		Subsystem: types.SchedulerMetricsName,
95
		Name:      "exchange_peer_failure_total",
96
		Help:      "Counter of the number of failed of the exchanging peer.",
97
	})
98

99
	RegisterPeerCount = promauto.NewCounterVec(prometheus.CounterOpts{
100
		Namespace: types.MetricsNamespace,
101
		Subsystem: types.SchedulerMetricsName,
102
		Name:      "register_peer_total",
103
		Help:      "Counter of the number of the register peer.",
104
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
105

106
	RegisterPeerFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{
107
		Namespace: types.MetricsNamespace,
108
		Subsystem: types.SchedulerMetricsName,
109
		Name:      "register_peer_failure_total",
110
		Help:      "Counter of the number of failed of the register peer.",
111
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
112

113
	DownloadPeerStartedCount = promauto.NewCounterVec(prometheus.CounterOpts{
114
		Namespace: types.MetricsNamespace,
115
		Subsystem: types.SchedulerMetricsName,
116
		Name:      "download_peer_started_total",
117
		Help:      "Counter of the number of the download peer started.",
118
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
119

120
	DownloadPeerStartedFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{
121
		Namespace: types.MetricsNamespace,
122
		Subsystem: types.SchedulerMetricsName,
123
		Name:      "download_peer_started_failure_total",
124
		Help:      "Counter of the number of failed of the download peer started.",
125
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
126

127
	DownloadPeerBackToSourceStartedCount = promauto.NewCounterVec(prometheus.CounterOpts{
128
		Namespace: types.MetricsNamespace,
129
		Subsystem: types.SchedulerMetricsName,
130
		Name:      "download_peer_back_to_source_started_total",
131
		Help:      "Counter of the number of the download peer back-to-source started.",
132
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
133

134
	DownloadPeerBackToSourceStartedFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{
135
		Namespace: types.MetricsNamespace,
136
		Subsystem: types.SchedulerMetricsName,
137
		Name:      "download_peer_back_to_source_started_failure_total",
138
		Help:      "Counter of the number of failed of the download peer back-to-source started.",
139
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
140

141
	DownloadPeerCount = promauto.NewCounterVec(prometheus.CounterOpts{
142
		Namespace: types.MetricsNamespace,
143
		Subsystem: types.SchedulerMetricsName,
144
		Name:      "download_peer_finished_total",
145
		Help:      "Counter of the number of the download peer.",
146
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
147

148
	DownloadPeerFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{
149
		Namespace: types.MetricsNamespace,
150
		Subsystem: types.SchedulerMetricsName,
151
		Name:      "download_peer_finished_failure_total",
152
		Help:      "Counter of the number of failed of the download peer.",
153
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
154

155
	DownloadPeerBackToSourceFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{
156
		Namespace: types.MetricsNamespace,
157
		Subsystem: types.SchedulerMetricsName,
158
		Name:      "download_peer_back_to_source_finished_failure_total",
159
		Help:      "Counter of the number of failed of the download peer back-to-source.",
160
	}, []string{"priority", "task_type", "task_tag", "task_app", "host_type"})
161

162
	DownloadPieceCount = promauto.NewCounterVec(prometheus.CounterOpts{
163
		Namespace: types.MetricsNamespace,
164
		Subsystem: types.SchedulerMetricsName,
165
		Name:      "download_piece_finished_total",
166
		Help:      "Counter of the number of the download piece.",
167
	}, []string{"traffic_type", "task_type", "task_tag", "task_app", "host_type"})
168

169
	DownloadPieceFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{
170
		Namespace: types.MetricsNamespace,
171
		Subsystem: types.SchedulerMetricsName,
172
		Name:      "download_piece_finished_failure_total",
173
		Help:      "Counter of the number of failed of the download piece.",
174
	}, []string{"traffic_type", "task_type", "task_tag", "task_app", "host_type"})
175

176
	DownloadPieceBackToSourceFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{
177
		Namespace: types.MetricsNamespace,
178
		Subsystem: types.SchedulerMetricsName,
179
		Name:      "download_piece_back_to_source_finished_failure_total",
180
		Help:      "Counter of the number of failed of the download piece back-to-source.",
181
	}, []string{"traffic_type", "task_type", "task_tag", "task_app", "host_type"})
182

183
	StatTaskCount = promauto.NewCounter(prometheus.CounterOpts{
184
		Namespace: types.MetricsNamespace,
185
		Subsystem: types.SchedulerMetricsName,
186
		Name:      "stat_task_total",
187
		Help:      "Counter of the number of the stat task.",
188
	})
189

190
	StatTaskFailureCount = promauto.NewCounter(prometheus.CounterOpts{
191
		Namespace: types.MetricsNamespace,
192
		Subsystem: types.SchedulerMetricsName,
193
		Name:      "stat_task_failure_total",
194
		Help:      "Counter of the number of failed of the stat task.",
195
	})
196

197
	AnnounceHostCount = promauto.NewCounterVec(prometheus.CounterOpts{
198
		Namespace: types.MetricsNamespace,
199
		Subsystem: types.SchedulerMetricsName,
200
		Name:      "announce_host_total",
201
		Help:      "Counter of the number of the announce host.",
202
	}, []string{"os", "platform", "platform_family", "platform_version",
203
		"kernel_version", "git_version", "git_commit", "go_version", "build_platform"})
204

205
	AnnounceHostFailureCount = promauto.NewCounterVec(prometheus.CounterOpts{
206
		Namespace: types.MetricsNamespace,
207
		Subsystem: types.SchedulerMetricsName,
208
		Name:      "announce_host_failure_total",
209
		Help:      "Counter of the number of failed of the announce host.",
210
	}, []string{"os", "platform", "platform_family", "platform_version",
211
		"kernel_version", "git_version", "git_commit", "go_version", "build_platform"})
212

213
	LeaveHostCount = promauto.NewCounter(prometheus.CounterOpts{
214
		Namespace: types.MetricsNamespace,
215
		Subsystem: types.SchedulerMetricsName,
216
		Name:      "leave_host_total",
217
		Help:      "Counter of the number of the leaving host.",
218
	})
219

220
	LeaveHostFailureCount = promauto.NewCounter(prometheus.CounterOpts{
221
		Namespace: types.MetricsNamespace,
222
		Subsystem: types.SchedulerMetricsName,
223
		Name:      "leave_host_failure_total",
224
		Help:      "Counter of the number of failed of the leaving host.",
225
	})
226

227
	SyncProbesCount = promauto.NewCounter(prometheus.CounterOpts{
228
		Namespace: types.MetricsNamespace,
229
		Subsystem: types.SchedulerMetricsName,
230
		Name:      "sync_probes_total",
231
		Help:      "Counter of the number of the synchronizing probes.",
232
	})
233

234
	SyncProbesFailureCount = promauto.NewCounter(prometheus.CounterOpts{
235
		Namespace: types.MetricsNamespace,
236
		Subsystem: types.SchedulerMetricsName,
237
		Name:      "sync_probes_failure_total",
238
		Help:      "Counter of the number of failed of the synchronizing probes.",
239
	})
240

241
	Traffic = promauto.NewCounterVec(prometheus.CounterOpts{
242
		Namespace: types.MetricsNamespace,
243
		Subsystem: types.SchedulerMetricsName,
244
		Name:      "traffic",
245
		Help:      "Counter of the number of traffic.",
246
	}, []string{"type", "task_type", "task_tag", "task_app", "host_type"})
247

248
	HostTraffic = promauto.NewCounterVec(prometheus.CounterOpts{
249
		Namespace: types.MetricsNamespace,
250
		Subsystem: types.SchedulerMetricsName,
251
		Name:      "host_traffic",
252
		Help:      "Counter of the number of per host traffic.",
253
	}, []string{"type", "task_type", "task_tag", "task_app", "host_type", "host_id", "host_ip", "host_name"})
254

255
	DownloadPeerDuration = promauto.NewSummaryVec(prometheus.SummaryOpts{
256
		Namespace:  types.MetricsNamespace,
257
		Subsystem:  types.SchedulerMetricsName,
258
		Name:       "download_peer_duration_milliseconds",
259
		Help:       "Summary of the time each peer downloading.",
260
		Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
261
	}, []string{"task_size_level"})
262

263
	ConcurrentScheduleGauge = promauto.NewGauge(prometheus.GaugeOpts{
264
		Namespace: types.MetricsNamespace,
265
		Subsystem: types.SchedulerMetricsName,
266
		Name:      "concurrent_schedule_total",
267
		Help:      "Gauge of the number of concurrent of the scheduling.",
268
	})
269

270
	VersionGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{
271
		Namespace: types.MetricsNamespace,
272
		Subsystem: types.SchedulerMetricsName,
273
		Name:      "version",
274
		Help:      "Version info of the service.",
275
	}, []string{"major", "minor", "git_version", "git_commit", "platform", "build_time", "go_version", "go_tags", "go_gcflags"})
276
)
277

278
func New(cfg *config.MetricsConfig, svr *grpc.Server) *http.Server {
279
	grpc_prometheus.Register(svr)
280

281
	mux := http.NewServeMux()
282
	mux.Handle("/metrics", promhttp.Handler())
283

284
	VersionGauge.WithLabelValues(version.Major, version.Minor, version.GitVersion, version.GitCommit, version.Platform, version.BuildTime, version.GoVersion, version.Gotags, version.Gogcflags).Set(1)
285
	return &http.Server{
286
		Addr:    cfg.Addr,
287
		Handler: mux,
288
	}
289
}
290

291
// TaskSizeLevel is the level of the task size.
292
type TaskSizeLevel int
293

294
// String returns the string representation of the TaskSizeLevel.
295
func (t TaskSizeLevel) String() string {
296
	switch t {
297
	case TaskSizeLevel0:
298
		return "0"
299
	case TaskSizeLevel1:
300
		return "1"
301
	case TaskSizeLevel2:
302
		return "2"
303
	case TaskSizeLevel3:
304
		return "3"
305
	case TaskSizeLevel4:
306
		return "4"
307
	case TaskSizeLevel5:
308
		return "5"
309
	case TaskSizeLevel6:
310
		return "6"
311
	case TaskSizeLevel7:
312
		return "7"
313
	case TaskSizeLevel8:
314
		return "8"
315
	case TaskSizeLevel9:
316
		return "9"
317
	case TaskSizeLevel10:
318
		return "10"
319
	case TaskSizeLevel11:
320
		return "11"
321
	case TaskSizeLevel12:
322
		return "12"
323
	case TaskSizeLevel13:
324
		return "13"
325
	case TaskSizeLevel14:
326
		return "14"
327
	case TaskSizeLevel15:
328
		return "15"
329
	case TaskSizeLevel16:
330
		return "16"
331
	case TaskSizeLevel17:
332
		return "17"
333
	case TaskSizeLevel18:
334
		return "18"
335
	case TaskSizeLevel19:
336
		return "19"
337
	case TaskSizeLevel20:
338
		return "20"
339
	default:
340
		return "0"
341
	}
342
}
343

344
const (
345
	// TaskSizeLevel0 represents unknow size.
346
	TaskSizeLevel0 TaskSizeLevel = iota
347

348
	// TaskSizeLevel0 represents size range is from 0 to 1M.
349
	TaskSizeLevel1
350

351
	// TaskSizeLevel1 represents size range is from 1M to 4M.
352
	TaskSizeLevel2
353

354
	// TaskSizeLevel2 represents size range is from 4M to 8M.
355
	TaskSizeLevel3
356

357
	// TaskSizeLevel3 represents size range is from 8M to 16M.
358
	TaskSizeLevel4
359

360
	// TaskSizeLevel4 represents size range is from 16M to 32M.
361
	TaskSizeLevel5
362

363
	// TaskSizeLevel5 represents size range is from 32M to 64M.
364
	TaskSizeLevel6
365

366
	// TaskSizeLevel6 represents size range is from 64M to 128M.
367
	TaskSizeLevel7
368

369
	// TaskSizeLevel7 represents size range is from 128M to 256M.
370
	TaskSizeLevel8
371

372
	// TaskSizeLevel8 represents size range is from 256M to 512M.
373
	TaskSizeLevel9
374

375
	// TaskSizeLevel9 represents size range is from 512M to 1G.
376
	TaskSizeLevel10
377

378
	// TaskSizeLevel10 represents size range is from 1G to 4G.
379
	TaskSizeLevel11
380

381
	// TaskSizeLevel11 represents size range is from 4G to 8G.
382
	TaskSizeLevel12
383

384
	// TaskSizeLevel12 represents size range is from 8G to 16G.
385
	TaskSizeLevel13
386

387
	// TaskSizeLevel13 represents size range is from 16G to 32G.
388
	TaskSizeLevel14
389

390
	// TaskSizeLevel14 represents size range is from 32G to 64G.
391
	TaskSizeLevel15
392

393
	// TaskSizeLevel15 represents size range is from 64G to 128G.
394
	TaskSizeLevel16
395

396
	// TaskSizeLevel16 represents size range is from 128G to 256G.
397
	TaskSizeLevel17
398

399
	// TaskSizeLevel17 represents size range is from 256G to 512G.
400
	TaskSizeLevel18
401

402
	// TaskSizeLevel18 represents size range is from 512G to 1T.
403
	TaskSizeLevel19
404

405
	// TaskSizeLevel20 represents size is greater than 1T.
406
	TaskSizeLevel20
407
)
408

409
// CalculateSizeLevel calculates the size level according to the size.
410
func CalculateSizeLevel(size int64) TaskSizeLevel {
411
	if size <= 0 {
412
		return TaskSizeLevel0
413
	} else if size < 1024*1024 {
414
		return TaskSizeLevel1
415
	} else if size < 4*1024*1024 {
416
		return TaskSizeLevel2
417
	} else if size < 8*1024*1024 {
418
		return TaskSizeLevel3
419
	} else if size < 16*1024*1024 {
420
		return TaskSizeLevel4
421
	} else if size < 32*1024*1024 {
422
		return TaskSizeLevel5
423
	} else if size < 64*1024*1024 {
424
		return TaskSizeLevel6
425
	} else if size < 128*1024*1024 {
426
		return TaskSizeLevel7
427
	} else if size < 256*1024*1024 {
428
		return TaskSizeLevel8
429
	} else if size < 512*1024*1024 {
430
		return TaskSizeLevel9
431
	} else if size < 1024*1024*1024 {
432
		return TaskSizeLevel10
433
	} else if size < 4*1024*1024*1024 {
434
		return TaskSizeLevel11
435
	} else if size < 8*1024*1024*1024 {
436
		return TaskSizeLevel12
437
	} else if size < 16*1024*1024*1024 {
438
		return TaskSizeLevel13
439
	} else if size < 32*1024*1024*1024 {
440
		return TaskSizeLevel14
441
	} else if size < 64*1024*1024*1024 {
442
		return TaskSizeLevel15
443
	} else if size < 128*1024*1024*1024 {
444
		return TaskSizeLevel16
445
	} else if size < 256*1024*1024*1024 {
446
		return TaskSizeLevel17
447
	} else if size < 512*1024*1024*1024 {
448
		return TaskSizeLevel18
449
	} else if size < 1024*1024*1024*1024 {
450
		return TaskSizeLevel19
451
	} else {
452
		return TaskSizeLevel20
453
	}
454
}
455

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.