prometheus

Форк
0
387 строк · 12.0 Кб
1
// Copyright 2016 The Prometheus Authors
2
// Licensed under the Apache License, Version 2.0 (the "License");
3
// you may not use this file except in compliance with the License.
4
// You may obtain a copy of the License at
5
//
6
// http://www.apache.org/licenses/LICENSE-2.0
7
//
8
// Unless required by applicable law or agreed to in writing, software
9
// distributed under the License is distributed on an "AS IS" BASIS,
10
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
// See the License for the specific language governing permissions and
12
// limitations under the License.
13

14
package dns
15

16
import (
17
	"context"
18
	"errors"
19
	"fmt"
20
	"net"
21
	"strconv"
22
	"strings"
23
	"sync"
24
	"time"
25

26
	"github.com/go-kit/log"
27
	"github.com/go-kit/log/level"
28
	"github.com/miekg/dns"
29
	"github.com/prometheus/client_golang/prometheus"
30
	"github.com/prometheus/common/model"
31

32
	"github.com/prometheus/prometheus/discovery"
33
	"github.com/prometheus/prometheus/discovery/refresh"
34
	"github.com/prometheus/prometheus/discovery/targetgroup"
35
)
36

37
const (
38
	resolvConf = "/etc/resolv.conf"
39

40
	dnsNameLabel            = model.MetaLabelPrefix + "dns_name"
41
	dnsSrvRecordPrefix      = model.MetaLabelPrefix + "dns_srv_record_"
42
	dnsSrvRecordTargetLabel = dnsSrvRecordPrefix + "target"
43
	dnsSrvRecordPortLabel   = dnsSrvRecordPrefix + "port"
44
	dnsMxRecordPrefix       = model.MetaLabelPrefix + "dns_mx_record_"
45
	dnsMxRecordTargetLabel  = dnsMxRecordPrefix + "target"
46
	dnsNsRecordPrefix       = model.MetaLabelPrefix + "dns_ns_record_"
47
	dnsNsRecordTargetLabel  = dnsNsRecordPrefix + "target"
48

49
	// Constants for instrumentation.
50
	namespace = "prometheus"
51
)
52

53
// DefaultSDConfig is the default DNS SD configuration.
54
var DefaultSDConfig = SDConfig{
55
	RefreshInterval: model.Duration(30 * time.Second),
56
	Type:            "SRV",
57
}
58

59
func init() {
60
	discovery.RegisterConfig(&SDConfig{})
61
}
62

63
// SDConfig is the configuration for DNS based service discovery.
64
type SDConfig struct {
65
	Names           []string       `yaml:"names"`
66
	RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
67
	Type            string         `yaml:"type"`
68
	Port            int            `yaml:"port"` // Ignored for SRV records
69
}
70

71
// NewDiscovererMetrics implements discovery.Config.
72
func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
73
	return newDiscovererMetrics(reg, rmi)
74
}
75

76
// Name returns the name of the Config.
77
func (*SDConfig) Name() string { return "dns" }
78

79
// NewDiscoverer returns a Discoverer for the Config.
80
func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
81
	return NewDiscovery(*c, opts.Logger, opts.Metrics)
82
}
83

84
// UnmarshalYAML implements the yaml.Unmarshaler interface.
85
func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
86
	*c = DefaultSDConfig
87
	type plain SDConfig
88
	err := unmarshal((*plain)(c))
89
	if err != nil {
90
		return err
91
	}
92
	if len(c.Names) == 0 {
93
		return errors.New("DNS-SD config must contain at least one SRV record name")
94
	}
95
	switch strings.ToUpper(c.Type) {
96
	case "SRV":
97
	case "A", "AAAA", "MX", "NS":
98
		if c.Port == 0 {
99
			return errors.New("a port is required in DNS-SD configs for all record types except SRV")
100
		}
101
	default:
102
		return fmt.Errorf("invalid DNS-SD records type %s", c.Type)
103
	}
104
	return nil
105
}
106

107
// Discovery periodically performs DNS-SD requests. It implements
108
// the Discoverer interface.
109
type Discovery struct {
110
	*refresh.Discovery
111
	names   []string
112
	port    int
113
	qtype   uint16
114
	logger  log.Logger
115
	metrics *dnsMetrics
116

117
	lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
118
}
119

120
// NewDiscovery returns a new Discovery which periodically refreshes its targets.
121
func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
122
	m, ok := metrics.(*dnsMetrics)
123
	if !ok {
124
		return nil, fmt.Errorf("invalid discovery metrics type")
125
	}
126

127
	if logger == nil {
128
		logger = log.NewNopLogger()
129
	}
130

131
	qtype := dns.TypeSRV
132
	switch strings.ToUpper(conf.Type) {
133
	case "A":
134
		qtype = dns.TypeA
135
	case "AAAA":
136
		qtype = dns.TypeAAAA
137
	case "SRV":
138
		qtype = dns.TypeSRV
139
	case "MX":
140
		qtype = dns.TypeMX
141
	case "NS":
142
		qtype = dns.TypeNS
143
	}
144
	d := &Discovery{
145
		names:    conf.Names,
146
		qtype:    qtype,
147
		port:     conf.Port,
148
		logger:   logger,
149
		lookupFn: lookupWithSearchPath,
150
		metrics:  m,
151
	}
152

153
	d.Discovery = refresh.NewDiscovery(
154
		refresh.Options{
155
			Logger:              logger,
156
			Mech:                "dns",
157
			Interval:            time.Duration(conf.RefreshInterval),
158
			RefreshF:            d.refresh,
159
			MetricsInstantiator: m.refreshMetrics,
160
		},
161
	)
162

163
	return d, nil
164
}
165

166
func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
167
	var (
168
		wg  sync.WaitGroup
169
		ch  = make(chan *targetgroup.Group)
170
		tgs = make([]*targetgroup.Group, 0, len(d.names))
171
	)
172

173
	wg.Add(len(d.names))
174
	for _, name := range d.names {
175
		go func(n string) {
176
			if err := d.refreshOne(ctx, n, ch); err != nil && !errors.Is(err, context.Canceled) {
177
				level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err)
178
			}
179
			wg.Done()
180
		}(name)
181
	}
182

183
	go func() {
184
		wg.Wait()
185
		close(ch)
186
	}()
187

188
	for tg := range ch {
189
		tgs = append(tgs, tg)
190
	}
191
	return tgs, nil
192
}
193

194
func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error {
195
	response, err := d.lookupFn(name, d.qtype, d.logger)
196
	d.metrics.dnsSDLookupsCount.Inc()
197
	if err != nil {
198
		d.metrics.dnsSDLookupFailuresCount.Inc()
199
		return err
200
	}
201

202
	tg := &targetgroup.Group{}
203
	hostPort := func(a string, p int) model.LabelValue {
204
		return model.LabelValue(net.JoinHostPort(a, strconv.Itoa(p)))
205
	}
206

207
	for _, record := range response.Answer {
208
		var target, dnsSrvRecordTarget, dnsSrvRecordPort, dnsMxRecordTarget, dnsNsRecordTarget model.LabelValue
209

210
		switch addr := record.(type) {
211
		case *dns.SRV:
212
			dnsSrvRecordTarget = model.LabelValue(addr.Target)
213
			dnsSrvRecordPort = model.LabelValue(strconv.Itoa(int(addr.Port)))
214

215
			// Remove the final dot from rooted DNS names to make them look more usual.
216
			addr.Target = strings.TrimRight(addr.Target, ".")
217

218
			target = hostPort(addr.Target, int(addr.Port))
219
		case *dns.MX:
220
			dnsMxRecordTarget = model.LabelValue(addr.Mx)
221

222
			// Remove the final dot from rooted DNS names to make them look more usual.
223
			addr.Mx = strings.TrimRight(addr.Mx, ".")
224

225
			target = hostPort(addr.Mx, d.port)
226
		case *dns.NS:
227
			dnsNsRecordTarget = model.LabelValue(addr.Ns)
228

229
			// Remove the final dot from rooted DNS names to make them look more usual.
230
			addr.Ns = strings.TrimRight(addr.Ns, ".")
231

232
			target = hostPort(addr.Ns, d.port)
233
		case *dns.A:
234
			target = hostPort(addr.A.String(), d.port)
235
		case *dns.AAAA:
236
			target = hostPort(addr.AAAA.String(), d.port)
237
		case *dns.CNAME:
238
			// CNAME responses can occur with "Type: A" dns_sd_config requests.
239
			continue
240
		default:
241
			level.Warn(d.logger).Log("msg", "Invalid record", "record", record)
242
			continue
243
		}
244
		tg.Targets = append(tg.Targets, model.LabelSet{
245
			model.AddressLabel:      target,
246
			dnsNameLabel:            model.LabelValue(name),
247
			dnsSrvRecordTargetLabel: dnsSrvRecordTarget,
248
			dnsSrvRecordPortLabel:   dnsSrvRecordPort,
249
			dnsMxRecordTargetLabel:  dnsMxRecordTarget,
250
			dnsNsRecordTargetLabel:  dnsNsRecordTarget,
251
		})
252
	}
253

254
	tg.Source = name
255
	select {
256
	case <-ctx.Done():
257
		return ctx.Err()
258
	case ch <- tg:
259
	}
260

261
	return nil
262
}
263

264
// lookupWithSearchPath tries to get an answer for various permutations of
265
// the given name, appending the system-configured search path as necessary.
266
//
267
// There are three possible outcomes:
268
//
269
//  1. One of the permutations of the given name is recognized as
270
//     "valid" by the DNS, in which case we consider ourselves "done"
271
//     and that answer is returned.  Note that, due to the way the DNS
272
//     handles "name has resource records, but none of the specified type",
273
//     the answer received may have an empty set of results.
274
//
275
//  2. All of the permutations of the given name are responded to by one of
276
//     the servers in the "nameservers" list with the answer "that name does
277
//     not exist" (NXDOMAIN).  In that case, it can be considered
278
//     pseudo-authoritative that there are no records for that name.
279
//
280
//  3. One or more of the names was responded to by all servers with some
281
//     sort of error indication.  In that case, we can't know if, in fact,
282
//     there are records for the name or not, so whatever state the
283
//     configuration is in, we should keep it that way until we know for
284
//     sure (by, presumably, all the names getting answers in the future).
285
//
286
// Outcomes 1 and 2 are indicated by a valid response message (possibly an
287
// empty one) and no error.  Outcome 3 is indicated by an error return.  The
288
// error will be generic-looking, because trying to return all the errors
289
// returned by the combination of all name permutations and servers is a
290
// nightmare.
291
func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
292
	conf, err := dns.ClientConfigFromFile(resolvConf)
293
	if err != nil {
294
		return nil, fmt.Errorf("could not load resolv.conf: %w", err)
295
	}
296

297
	allResponsesValid := true
298

299
	for _, lname := range conf.NameList(name) {
300
		response, err := lookupFromAnyServer(lname, qtype, conf, logger)
301

302
		switch {
303
		case err != nil:
304
			// We can't go home yet, because a later name
305
			// may give us a valid, successful answer.  However
306
			// we can no longer say "this name definitely doesn't
307
			// exist", because we did not get that answer for
308
			// at least one name.
309
			allResponsesValid = false
310
		case response.Rcode == dns.RcodeSuccess:
311
			// Outcome 1: GOLD!
312
			return response, nil
313
		}
314
	}
315

316
	if allResponsesValid {
317
		// Outcome 2: everyone says NXDOMAIN, that's good enough for me.
318
		return &dns.Msg{}, nil
319
	}
320
	// Outcome 3: boned.
321
	return nil, fmt.Errorf("could not resolve %q: all servers responded with errors to at least one search domain", name)
322
}
323

324
// lookupFromAnyServer uses all configured servers to try and resolve a specific
325
// name.  If a viable answer is received from a server, then it is
326
// immediately returned, otherwise the other servers in the config are
327
// tried, and if none of them return a viable answer, an error is returned.
328
//
329
// A "viable answer" is one which indicates either:
330
//
331
//  1. "yes, I know that name, and here are its records of the requested type"
332
//     (RCODE==SUCCESS, ANCOUNT > 0);
333
//  2. "yes, I know that name, but it has no records of the requested type"
334
//     (RCODE==SUCCESS, ANCOUNT==0); or
335
//  3. "I know that name doesn't exist" (RCODE==NXDOMAIN).
336
//
337
// A non-viable answer is "anything else", which encompasses both various
338
// system-level problems (like network timeouts) and also
339
// valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc).
340
func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger log.Logger) (*dns.Msg, error) {
341
	client := &dns.Client{}
342

343
	for _, server := range conf.Servers {
344
		servAddr := net.JoinHostPort(server, conf.Port)
345
		msg, err := askServerForName(name, qtype, client, servAddr, true)
346
		if err != nil {
347
			level.Warn(logger).Log("msg", "DNS resolution failed", "server", server, "name", name, "err", err)
348
			continue
349
		}
350

351
		if msg.Rcode == dns.RcodeSuccess || msg.Rcode == dns.RcodeNameError {
352
			// We have our answer.  Time to go home.
353
			return msg, nil
354
		}
355
	}
356

357
	return nil, fmt.Errorf("could not resolve %s: no servers returned a viable answer", name)
358
}
359

360
// askServerForName makes a request to a specific DNS server for a specific
361
// name (and qtype).  Retries with TCP in the event of response truncation,
362
// but otherwise just sends back whatever the server gave, whether that be a
363
// valid-looking response, or an error.
364
func askServerForName(name string, queryType uint16, client *dns.Client, servAddr string, edns bool) (*dns.Msg, error) {
365
	msg := &dns.Msg{}
366

367
	msg.SetQuestion(dns.Fqdn(name), queryType)
368
	if edns {
369
		msg.SetEdns0(dns.DefaultMsgSize, false)
370
	}
371

372
	response, _, err := client.Exchange(msg, servAddr)
373
	if err != nil {
374
		return nil, err
375
	}
376

377
	if response.Truncated {
378
		if client.Net == "tcp" {
379
			return nil, errors.New("got truncated message on TCP (64kiB limit exceeded?)")
380
		}
381

382
		client.Net = "tcp"
383
		return askServerForName(name, queryType, client, servAddr, false)
384
	}
385

386
	return response, nil
387
}
388

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.