moira
/
datatypes.go
750 строк · 22.2 Кб
1package moira
2
3import (
4"bytes"
5"encoding/json"
6"fmt"
7"reflect"
8"sort"
9"strings"
10"time"
11)
12
13// Default moira triggers states
14const (
15OK = "OK"
16WARN = "WARN"
17ERROR = "ERROR"
18NODATA = "NODATA"
19EXCEPTION = "EXCEPTION"
20DEL = "DEL"
21TEST = "TEST"
22)
23
24// Events' mute reasons
25const (
26EventMutedNone = 0
27EventMutedSchedule = 1
28EventMutedMaintenance = 2
29EventMutedSilent = 3
30)
31
32// fixed offset in minutes and seconds
33const (
34FixedTzOffsetMinutes = int64(-180)
35FixedTzOffsetSeconds = FixedTzOffsetMinutes * 60
36)
37
38// used as the name of metric to indicate the whole trigger
39const (
40WildcardMetric = "*"
41)
42
43// limit (in seconds) of trigger's sole check iteration
44const (
45TriggerCheckLimit = 40
46TriggerCheckThreshold = 30
47)
48
49const (
50SatTakeScreen SaturationType = "take-screenshot"
51
52SatGetCMDBDeviceData SaturationType = "cmdb-device"
53SatCheckPort SaturationType = "check-port"
54SatGetDBaaSService SaturationType = "dbaas-storage-owner-service"
55SatGetDBaaSUnit SaturationType = "dbaas-storage-owner-unit"
56SatGetDeploys SaturationType = "get-service-deploys"
57SatGetAllDeployStatuses SaturationType = "get-all-deploy-statuses"
58SatRenderDescription SaturationType = "render-description"
59)
60
61var (
62eventStates = [...]string{OK, WARN, ERROR, NODATA, TEST}
63
64scores = map[string]int64{
65OK: 0,
66DEL: 0,
67WARN: 1,
68ERROR: 100,
69NODATA: 0,
70EXCEPTION: 100000,
71}
72)
73
74// NotificationEvent represents trigger state changes event
75type NotificationEvent struct {
76ID string `json:"id"`
77IsForceSent bool `json:"force_sent"`
78IsTriggerEvent bool `json:"trigger_event"`
79Timestamp int64 `json:"timestamp"`
80Metric string `json:"metric"`
81State string `json:"state"`
82OldState string `json:"old_state"`
83Value *float64 `json:"value,omitempty"`
84OldValue *float64 `json:"old_value,omitempty"`
85TriggerID string `json:"trigger_id"`
86SubscriptionID *string `json:"sub_id,omitempty"`
87ContactID string `json:"contactId,omitempty"`
88Message *string `json:"msg,omitempty"`
89Batch *EventsBatch `json:"batch"`
90
91HasSaturations bool `json:"has_saturations,omitempty"`
92
93OverriddenByAncestor bool `json:"overridden,omitempty"`
94DelayedForAncestor bool `json:"delayed_for_ancestor,omitempty"`
95AncestorTriggerID string `json:"ancestor_trigger_id,omitempty"`
96AncestorMetric string `json:"ancestor_metric,omitempty"`
97
98// properties related to Fan
99FanTaskID string `json:"fan_task_id,omitempty"`
100WaitingForFanSince int64 `json:"waiting_for_fan_since"` // this is a timestamp
101Context *NotificationEventContext `json:"context,omitempty"`
102}
103
104// IdempotencyKey is supposed to rule out the possibility of repeated processing NotificationEvent
105func (event *NotificationEvent) IdempotencyKey() string {
106var metric string
107if event.IsTriggerEvent {
108metric = WildcardMetric
109} else {
110metric = event.Metric
111}
112return fmt.Sprintf(
113"%s:%d:%s",
114event.TriggerID,
115event.Timestamp,
116metric,
117)
118}
119
120// Matches implements gomock.Matcher
121func (event *NotificationEvent) Matches(x interface{}) bool {
122other, ok := x.(*NotificationEvent)
123if !ok {
124return false
125}
126
127val1 := *event
128val1.Batch = nil
129
130val2 := *other
131val2.Batch = nil
132
133// 2 events must be equal, except for the Batch
134return reflect.DeepEqual(val1, val2)
135}
136
137func (event *NotificationEvent) String() string {
138return fmt.Sprintf(
139"TriggerId: %s, Metric: %s\nValue: %v, OldValue: %v\nState: %s, OldState: %s\nMessage: %s\nTimestamp: %v",
140event.TriggerID, event.Metric,
141UseFloat64(event.Value), UseFloat64(event.OldValue),
142event.State, event.OldState,
143UseString(event.Message),
144event.Timestamp,
145)
146}
147
148// EventsBatch is grouping attribute for
149// different NotificationEvent`s which were pushed during
150// single trigger check
151type EventsBatch struct {
152ID string `json:"id"`
153Time int64 `json:"ts"`
154}
155
156func NewEventsBatch(ts int64) *EventsBatch {
157return &EventsBatch{
158ID: NewStrID(),
159Time: ts,
160}
161}
162
163type NotificationEventContext struct {
164Deployers []string `json:"deployers,omitempty"`
165Images []contextImageData `json:"images,omitempty"`
166DeployStatuses string `json:"deployStatuses,omitempty"`
167ServiceChannels struct {
168DBaaS []serviceChannel `json:"dbaas,omitempty"`
169} `json:"serviceChannels,omitempty"`
170}
171type contextImageData struct {
172URL string `json:"url"`
173SourceURL string `json:"sourceURL"`
174Caption string `json:"caption,omitempty"`
175}
176type serviceChannel struct {
177ServiceName string `json:"serviceName"`
178SlackChannel string `json:"slackChannel"`
179}
180
181func (context *NotificationEventContext) UnmarshalJSON(data []byte) error {
182type tmp NotificationEventContext
183if err := json.Unmarshal(data, (*tmp)(context)); err != nil {
184return err
185}
186sort.Strings(context.Deployers)
187return nil
188}
189
190func (context *NotificationEventContext) MustMarshal() string {
191if context == nil {
192return ""
193}
194result, err := json.Marshal(context)
195if err != nil {
196panic(err)
197}
198return string(result)
199}
200
201// NotificationEvents represents slice of NotificationEvent
202type NotificationEvents []NotificationEvent
203
204// GetContext returns the context of the events
205// we assume that all events have the same context
206func (events NotificationEvents) GetContext() *NotificationEventContext {
207if len(events) == 0 {
208return nil
209}
210return events[0].Context
211}
212
213// GetSubjectState returns the most critical state of events
214func (events NotificationEvents) GetSubjectState() string {
215result := ""
216states := make(map[string]bool)
217for _, event := range events {
218states[event.State] = true
219}
220for _, state := range eventStates {
221if states[state] {
222result = state
223}
224}
225return result
226}
227
228// TriggerData represents trigger object
229type TriggerData struct {
230ID string `json:"id"`
231Name string `json:"name"`
232Desc string `json:"desc"`
233Targets []string `json:"targets"`
234Parents []string `json:"parents"`
235WarnValue float64 `json:"warn_value"`
236ErrorValue float64 `json:"error_value"`
237Tags []string `json:"__notifier_trigger_tags"`
238Dashboard string `json:"dashboard"`
239Saturation []Saturation `json:"saturation"`
240}
241
242// GetTags returns "[tag1][tag2]...[tagN]" string
243func (trigger *TriggerData) GetTags() string {
244var buffer bytes.Buffer
245for _, tag := range trigger.Tags {
246buffer.WriteString(fmt.Sprintf("[%s]", tag))
247}
248return buffer.String()
249}
250
251type Saturation struct {
252Type SaturationType `json:"type"`
253Fallback string `json:"fallback,omitempty"`
254ExtraParameters json.RawMessage `json:"extra_parameters,omitempty"`
255}
256
257type SaturationType string
258
259// ContactData represents contact object
260type ContactData struct {
261Type string `json:"type"`
262Value string `json:"value"`
263FallbackValue string `json:"fallback_value,omitempty"`
264ID string `json:"id"`
265User string `json:"user"` // User is the user that _created_ the contact
266Expiration *time.Time
267}
268
269func (cd *ContactData) NeedsFallbackValue() bool {
270return cd.Type == "slack" && cd.Value[0] == '_'
271}
272
273type SilentPatternData struct {
274ID string `json:"id"`
275Login string `json:"login"`
276Pattern string `json:"pattern"`
277Created int64 `json:"created_at"`
278Until int64 `json:"until"`
279Type SilentPatternType `json:"type"`
280}
281
282func (spd *SilentPatternData) IsMetric() bool {
283return spd.Type == SPTMetric
284}
285
286func (spd *SilentPatternData) IsTag() bool {
287return spd.Type == SPTTag
288}
289
290type SilentPatternType int
291
292const (
293SPTMetric SilentPatternType = 0
294SPTTag SilentPatternType = 1
295)
296
297// EscalationData represents escalation object
298type EscalationData struct {
299ID string `json:"id"`
300Contacts []string `json:"contacts"`
301OffsetInMinutes int64 `json:"offset_in_minutes"`
302}
303
304// SubscriptionData represent user subscription
305type SubscriptionData struct {
306Contacts []string `json:"contacts"`
307Tags []string `json:"tags"`
308Schedule ScheduleData `json:"sched"`
309ID string `json:"id"`
310Enabled bool `json:"enabled"`
311ThrottlingEnabled bool `json:"throttling"`
312User string `json:"user"`
313Escalations []EscalationData `json:"escalations"`
314}
315
316// ScheduleData represent subscription schedule
317type ScheduleData struct {
318Days []ScheduleDataDay `json:"days"`
319TimezoneOffset int64 `json:"tzOffset"`
320StartOffset int64 `json:"startOffset"`
321EndOffset int64 `json:"endOffset"`
322}
323
324// GetFixedTzOffset returns Moscow tz offset in minutes
325func (schedule *ScheduleData) GetFixedTzOffset() int64 {
326return int64(-180)
327}
328
329// IsScheduleAllows check if the time is in the allowed schedule interval
330func (schedule *ScheduleData) IsScheduleAllows(eventTs int64) bool {
331if schedule == nil {
332return true
333}
334
335eventTs = eventTs - eventTs%60 - FixedTzOffsetSeconds // truncate to minutes
336eventTime := time.Unix(eventTs, 0).UTC()
337eventWeekday := eventTime.Weekday()
338
339// points converted to seconds relative to the day
340eventTs = eventTs % 86400
341scheduleStart := schedule.StartOffset * 60
342scheduleEnd := schedule.EndOffset * 60
343
344if scheduleStart > scheduleEnd { // "inverted" schedule, e.g. 22:00 - 08:00
345// there are 2 possible ways of moments' disposition:
346// 1) schedule start -> event -> midnight -> schedule end
347// 2) schedule start -> midnight -> event -> schedule end
348isEventPastMidnight := eventTs < scheduleEnd
349// if event happened after midnight (the 2nd case) then the previous day enable flag is taken
350if !schedule.isScheduleDaysAllows(eventWeekday, isEventPastMidnight) {
351return false
352}
353
354return (scheduleStart <= eventTs && !isEventPastMidnight) || (eventTs <= scheduleEnd && isEventPastMidnight)
355} else { // "regular" schedule, e.g. 09:00 - 18:00
356if !schedule.isScheduleDaysAllows(eventWeekday, false) {
357return false
358}
359
360return scheduleStart <= eventTs && eventTs <= scheduleEnd
361}
362}
363
364// isScheduleDaysAllows can tell if the particular day of the week is enabled by the schedule
365// dayBefore indicates that the day before must be considered instead of the given day
366func (schedule *ScheduleData) isScheduleDaysAllows(weekday time.Weekday, dayBefore bool) bool {
367var (
368daysOffset int
369)
370
371if dayBefore {
372daysOffset = 1
373} else {
374daysOffset = 0
375}
376
377return schedule.Days[(int(weekday+6)-daysOffset)%7].Enabled
378}
379
380// ScheduleDataDay represent week day of schedule
381type ScheduleDataDay struct {
382Enabled bool `json:"enabled"`
383Name string `json:"name,omitempty"`
384}
385
386// ScheduledNotification represent notification object
387type ScheduledNotification struct {
388Event NotificationEvent `json:"event"`
389Trigger TriggerData `json:"trigger"`
390Contact ContactData `json:"contact"`
391Timestamp int64 `json:"timestamp"`
392SendFail int `json:"send_fail"`
393NeedAck bool `json:"need_ack"`
394Throttled bool `json:"throttled"`
395}
396
397// GetKey return notification key to prevent duplication to the same contact
398func (notification *ScheduledNotification) GetKey() string {
399var prefix string
400if notification.Event.AncestorTriggerID == "" {
401prefix = fmt.Sprintf(
402"%s:%s",
403notification.Contact.Type,
404notification.Contact.Value,
405)
406} else {
407// if the notification event has an ancestor, we ignore the contact name
408prefix = fmt.Sprintf(
409"%s",
410notification.Contact.Type,
411)
412}
413return fmt.Sprintf("%s:%s:%s:%s:%d:%f:%d:%d",
414prefix,
415notification.Event.TriggerID,
416notification.Event.Metric,
417notification.Event.State,
418notification.Event.Timestamp,
419UseFloat64(notification.Event.Value),
420notification.SendFail,
421notification.Timestamp,
422)
423}
424
425// TagStats wraps trigger ids and subscriptions which are related to the given tag
426type TagStats struct {
427Name string `json:"name"`
428Triggers []string `json:"triggers"`
429Subscriptions []SubscriptionData `json:"subscriptions"`
430}
431
432// MatchedMetric represent parsed and matched metric data
433type MatchedMetric struct {
434Metric string
435Patterns []string
436Value float64
437Timestamp int64
438RetentionTimestamp int64
439Retention int
440}
441
442// MetricValue represent metric data
443type MetricValue struct {
444RetentionTimestamp int64 `json:"step,omitempty"`
445Timestamp int64 `json:"ts"`
446Value float64 `json:"value"`
447}
448
449// Trigger represents trigger data object
450type Trigger struct {
451ID string `json:"id"`
452Name string `json:"name"`
453Desc *string `json:"desc,omitempty"`
454Targets []string `json:"targets"`
455Parents []string `json:"parents"`
456WarnValue *float64 `json:"warn_value"`
457ErrorValue *float64 `json:"error_value"`
458Tags []string `json:"tags"`
459TTLState *string `json:"ttl_state,omitempty"`
460TTL int64 `json:"ttl,omitempty"`
461Schedule *ScheduleData `json:"sched,omitempty"`
462Expression *string `json:"expression,omitempty"`
463PythonExpression *string `json:"python_expression,omitempty"`
464Patterns []string `json:"patterns"`
465IsPullType bool `json:"is_pull_type"`
466Dashboard string `json:"dashboard"`
467PendingInterval int64 `json:"pending_interval"`
468Saturation []Saturation `json:"saturation"`
469}
470
471// IsSimple checks triggers patterns
472// If patterns more than one or it contains standard graphite wildcard symbols,
473// when this target can contain more then one metrics, and is it not simple trigger
474func (trigger *Trigger) IsSimple() bool {
475if len(trigger.Targets) > 1 || len(trigger.Patterns) > 1 {
476return false
477}
478for _, pattern := range trigger.Patterns {
479if strings.ContainsAny(pattern, "*{?[") {
480return false
481}
482}
483return true
484}
485
486// TriggerCheck represent trigger data with last check data and check timestamp
487type TriggerCheck struct {
488Trigger
489Throttling int64 `json:"throttling"`
490LastCheck *CheckData `json:"last_check"`
491}
492
493// CheckData represent last trigger check data
494type CheckData struct {
495IsPending bool `json:"is_pending"`
496Message string `json:"msg,omitempty"`
497Timestamp int64 `json:"timestamp,omitempty"`
498EventTimestamp int64 `json:"event_timestamp,omitempty"`
499Score int64 `json:"score"`
500State string `json:"state"`
501Suppressed bool `json:"suppressed,omitempty"`
502Maintenance int64 `json:"maintenance,omitempty"`
503MaintenanceMetric map[string]int64 `json:"maintenance_metric,omitempty"`
504Metrics map[string]*MetricState `json:"metrics"`
505Version int `json:"version"`
506}
507
508// GetEventTimestamp gets event timestamp for given check
509func (checkData CheckData) GetEventTimestamp() int64 {
510if checkData.EventTimestamp == 0 {
511return checkData.Timestamp
512}
513return checkData.EventTimestamp
514}
515
516// GetOrCreateMetricState gets metric state from check data or create new if CheckData has no state for given metric
517func (checkData *CheckData) GetOrCreateMetricState(metric string, emptyTimestampValue int64) *MetricState {
518_, ok := checkData.Metrics[metric]
519if !ok {
520checkData.Metrics[metric] = &MetricState{
521IsNoData: true,
522State: NODATA,
523Timestamp: emptyTimestampValue,
524}
525}
526return checkData.Metrics[metric]
527}
528
529// UpdateScore update and return checkData score, based on metric states and checkData state
530func (checkData *CheckData) UpdateScore() int64 {
531checkData.Score = scores[checkData.State]
532for _, metricData := range checkData.Metrics {
533checkData.Score += scores[metricData.State]
534}
535return checkData.Score
536}
537
538// MetricState represent metric state data for given timestamp
539type MetricState struct {
540EventTimestamp int64 `json:"event_timestamp"`
541State string `json:"state"`
542Suppressed bool `json:"suppressed"`
543Timestamp int64 `json:"timestamp"`
544Value *float64 `json:"value,omitempty"`
545Maintenance int64 `json:"maintenance,omitempty"`
546IsPending bool `json:"is_pending"`
547
548IsNoData bool `json:"is_no_data"`
549IsForced bool `json:"is_forced,omitempty"`
550}
551
552// GetCheckPoint gets check point for given MetricState
553// CheckPoint is the timestamp from which to start checking the current state of the metric
554func (metricState *MetricState) GetCheckPoint(checkPointGap int64) int64 {
555return MaxI64(metricState.Timestamp-checkPointGap, metricState.EventTimestamp)
556}
557
558// GetEventTimestamp gets event timestamp for given metric
559func (metricState *MetricState) GetEventTimestamp() int64 {
560if metricState.EventTimestamp == 0 {
561return metricState.Timestamp
562}
563return metricState.EventTimestamp
564}
565
566// MetricEvent represent filter metric event
567type MetricEvent struct {
568Metric string `json:"metric"`
569Pattern string `json:"pattern"`
570}
571
572// maintenanceInterval is maintenance interval for some metric or for the whole trigger
573type maintenanceInterval struct {
574From int64 `json:"from"`
575Until int64 `json:"until"`
576}
577
578// Maintenance is history of maintenance intervals for each metric of the trigger
579// key for the whole trigger maintenance is WildcardMetric
580type Maintenance map[string][]maintenanceInterval
581
582// NewMaintenance creates blank Maintenance instance
583func NewMaintenance() Maintenance {
584return make(Maintenance)
585}
586
587// NewMaintenanceFromCheckData migrates CheckData maintenance to the new Maintenance instance
588// only maintenanceInterval.Until values can be filled
589func NewMaintenanceFromCheckData(data *CheckData) Maintenance {
590result := make(Maintenance, len(data.MaintenanceMetric)+1)
591if data.Maintenance > 0 {
592result[WildcardMetric] = []maintenanceInterval{{Until: data.Maintenance}}
593}
594for metric, maintenance := range data.MaintenanceMetric {
595result[metric] = []maintenanceInterval{{Until: maintenance}}
596}
597return result
598}
599
600// Add adds maintenance for the given metric
601func (maintenance Maintenance) Add(metric string, until int64) {
602now := time.Now().Unix()
603history, ok := maintenance[metric]
604
605if ok {
606last := &history[len(history)-1]
607if last.Until > now {
608// last maintenance isn't over yet, extend it
609last.Until = until
610} else {
611// append new maintenance
612history = append(history, maintenanceInterval{
613From: now,
614Until: until,
615})
616maintenance[metric] = history
617}
618} else {
619maintenance[metric] = []maintenanceInterval{{
620From: now,
621Until: until,
622}}
623}
624}
625
626// Get returns maintenance state for the given metric at the given timestamp
627func (maintenance Maintenance) Get(metric string, ts int64) (maintained bool, until int64) {
628// check the metric is present in the first place
629history, ok := maintenance[metric]
630if !ok {
631return false, 0
632}
633qty := len(history)
634
635// look for the interval where Until exceeds ts
636pos := sort.Search(qty, func(i int) bool {
637return history[i].Until >= ts
638})
639if pos == qty { // sort.Search returns collection's length if nothing has been found
640return false, 0
641}
642return true, history[pos].Until
643}
644
645// Del terminates maintenance for the given metric
646// it does nothing if the metric doesn't exist
647func (maintenance Maintenance) Del(metric string) {
648if history, ok := maintenance[metric]; ok {
649history[len(history)-1].Until = time.Now().Unix()
650maintenance[metric] = history
651}
652}
653
654// Clean removes all outdated maintenance intervals
655func (maintenance Maintenance) Clean() {
656const housekeepingRange = 30 * 24 * 60 * 60 // 30 days
657margin := time.Now().Unix() - housekeepingRange
658
659for metric, history := range maintenance {
660// find first non-outdated interval
661qty := len(history)
662pos := sort.Search(qty, func(i int) bool {
663return history[i].From > margin
664})
665
666if pos == qty {
667// all intervals are outdated -- just delete metric entry
668delete(maintenance, metric)
669} else if pos > 0 {
670// truncate intervals
671maintenance[metric] = history[pos:]
672}
673}
674}
675
676// Snapshot returns map of metrics to their actual maintenance on the given timestamp
677func (maintenance Maintenance) Snapshot(ts int64) map[string]int64 {
678result := make(map[string]int64, len(maintenance))
679for metric, history := range maintenance {
680qty := len(history)
681pos := sort.Search(qty, func(i int) bool {
682return history[i].Until >= ts
683})
684
685if pos < qty {
686result[metric] = history[pos].Until
687}
688}
689return result
690}
691
692func (maintenance Maintenance) SnapshotNow() map[string]int64 {
693return maintenance.Snapshot(time.Now().Unix())
694}
695
696// ScheduledEscalationEvent represent escalated notification event
697type ScheduledEscalationEvent struct {
698Escalation EscalationData `json:"escalation"`
699Event NotificationEvent `json:"event"`
700Trigger TriggerData `json:"trigger"`
701IsFinal bool `json:"is_final"`
702IsResolution bool `json:"is_resolution"`
703}
704
705type NotificationsDisabledSettings struct {
706Author string `json:"author"`
707Disabled bool `json:"disabled"`
708}
709
710type GlobalSettings struct {
711Notifications NotificationsDisabledSettings `json:"notifications"`
712}
713
714type DutyItem struct {
715Login string
716DutyEnd *time.Time `json:"duty_end"`
717}
718
719type DutyData struct {
720Duty []DutyItem
721Timestamp time.Time
722}
723
724type RateLimit struct {
725AcceptRate float64
726ThreadsQty int
727}
728
729type SlackDelayedAction struct {
730Action string `json:"action"`
731EncodedArgs json.RawMessage `json:"encodedArgs"`
732FailCount int `json:"failCount"`
733ScheduledAt time.Time `json:"scheduledAt"`
734
735// these fields are only used for logging
736Contact ContactData `json:"contact"`
737}
738
739// SlackUserGroup represents slack user group macro (user to mention several users at a time)
740type SlackUserGroup struct {
741Id string `json:"id"`
742Handle string `json:"handle"` // macro
743Name string `json:"name"` // human-readable name
744DateCreate time.Time `json:"date_create"`
745DateUpdate time.Time `json:"date_update"`
746UserIds []string `json:"user_ids"` // sadly, not names, but slack accepts it
747}
748
749// SlackUserGroupsCache maps SlackUserGroup.Handle to SlackUserGroup
750type SlackUserGroupsCache map[string]SlackUserGroup
751