prometheus
387 строк · 12.0 Кб
1// Copyright 2016 The Prometheus Authors
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14package dns
15
16import (
17"context"
18"errors"
19"fmt"
20"net"
21"strconv"
22"strings"
23"sync"
24"time"
25
26"github.com/go-kit/log"
27"github.com/go-kit/log/level"
28"github.com/miekg/dns"
29"github.com/prometheus/client_golang/prometheus"
30"github.com/prometheus/common/model"
31
32"github.com/prometheus/prometheus/discovery"
33"github.com/prometheus/prometheus/discovery/refresh"
34"github.com/prometheus/prometheus/discovery/targetgroup"
35)
36
37const (
38resolvConf = "/etc/resolv.conf"
39
40dnsNameLabel = model.MetaLabelPrefix + "dns_name"
41dnsSrvRecordPrefix = model.MetaLabelPrefix + "dns_srv_record_"
42dnsSrvRecordTargetLabel = dnsSrvRecordPrefix + "target"
43dnsSrvRecordPortLabel = dnsSrvRecordPrefix + "port"
44dnsMxRecordPrefix = model.MetaLabelPrefix + "dns_mx_record_"
45dnsMxRecordTargetLabel = dnsMxRecordPrefix + "target"
46dnsNsRecordPrefix = model.MetaLabelPrefix + "dns_ns_record_"
47dnsNsRecordTargetLabel = dnsNsRecordPrefix + "target"
48
49// Constants for instrumentation.
50namespace = "prometheus"
51)
52
53// DefaultSDConfig is the default DNS SD configuration.
54var DefaultSDConfig = SDConfig{
55RefreshInterval: model.Duration(30 * time.Second),
56Type: "SRV",
57}
58
59func init() {
60discovery.RegisterConfig(&SDConfig{})
61}
62
63// SDConfig is the configuration for DNS based service discovery.
64type SDConfig struct {
65Names []string `yaml:"names"`
66RefreshInterval model.Duration `yaml:"refresh_interval,omitempty"`
67Type string `yaml:"type"`
68Port int `yaml:"port"` // Ignored for SRV records
69}
70
71// NewDiscovererMetrics implements discovery.Config.
72func (*SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics {
73return newDiscovererMetrics(reg, rmi)
74}
75
76// Name returns the name of the Config.
77func (*SDConfig) Name() string { return "dns" }
78
79// NewDiscoverer returns a Discoverer for the Config.
80func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) {
81return NewDiscovery(*c, opts.Logger, opts.Metrics)
82}
83
84// UnmarshalYAML implements the yaml.Unmarshaler interface.
85func (c *SDConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
86*c = DefaultSDConfig
87type plain SDConfig
88err := unmarshal((*plain)(c))
89if err != nil {
90return err
91}
92if len(c.Names) == 0 {
93return errors.New("DNS-SD config must contain at least one SRV record name")
94}
95switch strings.ToUpper(c.Type) {
96case "SRV":
97case "A", "AAAA", "MX", "NS":
98if c.Port == 0 {
99return errors.New("a port is required in DNS-SD configs for all record types except SRV")
100}
101default:
102return fmt.Errorf("invalid DNS-SD records type %s", c.Type)
103}
104return nil
105}
106
107// Discovery periodically performs DNS-SD requests. It implements
108// the Discoverer interface.
109type Discovery struct {
110*refresh.Discovery
111names []string
112port int
113qtype uint16
114logger log.Logger
115metrics *dnsMetrics
116
117lookupFn func(name string, qtype uint16, logger log.Logger) (*dns.Msg, error)
118}
119
120// NewDiscovery returns a new Discovery which periodically refreshes its targets.
121func NewDiscovery(conf SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) {
122m, ok := metrics.(*dnsMetrics)
123if !ok {
124return nil, fmt.Errorf("invalid discovery metrics type")
125}
126
127if logger == nil {
128logger = log.NewNopLogger()
129}
130
131qtype := dns.TypeSRV
132switch strings.ToUpper(conf.Type) {
133case "A":
134qtype = dns.TypeA
135case "AAAA":
136qtype = dns.TypeAAAA
137case "SRV":
138qtype = dns.TypeSRV
139case "MX":
140qtype = dns.TypeMX
141case "NS":
142qtype = dns.TypeNS
143}
144d := &Discovery{
145names: conf.Names,
146qtype: qtype,
147port: conf.Port,
148logger: logger,
149lookupFn: lookupWithSearchPath,
150metrics: m,
151}
152
153d.Discovery = refresh.NewDiscovery(
154refresh.Options{
155Logger: logger,
156Mech: "dns",
157Interval: time.Duration(conf.RefreshInterval),
158RefreshF: d.refresh,
159MetricsInstantiator: m.refreshMetrics,
160},
161)
162
163return d, nil
164}
165
166func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) {
167var (
168wg sync.WaitGroup
169ch = make(chan *targetgroup.Group)
170tgs = make([]*targetgroup.Group, 0, len(d.names))
171)
172
173wg.Add(len(d.names))
174for _, name := range d.names {
175go func(n string) {
176if err := d.refreshOne(ctx, n, ch); err != nil && !errors.Is(err, context.Canceled) {
177level.Error(d.logger).Log("msg", "Error refreshing DNS targets", "err", err)
178}
179wg.Done()
180}(name)
181}
182
183go func() {
184wg.Wait()
185close(ch)
186}()
187
188for tg := range ch {
189tgs = append(tgs, tg)
190}
191return tgs, nil
192}
193
194func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targetgroup.Group) error {
195response, err := d.lookupFn(name, d.qtype, d.logger)
196d.metrics.dnsSDLookupsCount.Inc()
197if err != nil {
198d.metrics.dnsSDLookupFailuresCount.Inc()
199return err
200}
201
202tg := &targetgroup.Group{}
203hostPort := func(a string, p int) model.LabelValue {
204return model.LabelValue(net.JoinHostPort(a, strconv.Itoa(p)))
205}
206
207for _, record := range response.Answer {
208var target, dnsSrvRecordTarget, dnsSrvRecordPort, dnsMxRecordTarget, dnsNsRecordTarget model.LabelValue
209
210switch addr := record.(type) {
211case *dns.SRV:
212dnsSrvRecordTarget = model.LabelValue(addr.Target)
213dnsSrvRecordPort = model.LabelValue(strconv.Itoa(int(addr.Port)))
214
215// Remove the final dot from rooted DNS names to make them look more usual.
216addr.Target = strings.TrimRight(addr.Target, ".")
217
218target = hostPort(addr.Target, int(addr.Port))
219case *dns.MX:
220dnsMxRecordTarget = model.LabelValue(addr.Mx)
221
222// Remove the final dot from rooted DNS names to make them look more usual.
223addr.Mx = strings.TrimRight(addr.Mx, ".")
224
225target = hostPort(addr.Mx, d.port)
226case *dns.NS:
227dnsNsRecordTarget = model.LabelValue(addr.Ns)
228
229// Remove the final dot from rooted DNS names to make them look more usual.
230addr.Ns = strings.TrimRight(addr.Ns, ".")
231
232target = hostPort(addr.Ns, d.port)
233case *dns.A:
234target = hostPort(addr.A.String(), d.port)
235case *dns.AAAA:
236target = hostPort(addr.AAAA.String(), d.port)
237case *dns.CNAME:
238// CNAME responses can occur with "Type: A" dns_sd_config requests.
239continue
240default:
241level.Warn(d.logger).Log("msg", "Invalid record", "record", record)
242continue
243}
244tg.Targets = append(tg.Targets, model.LabelSet{
245model.AddressLabel: target,
246dnsNameLabel: model.LabelValue(name),
247dnsSrvRecordTargetLabel: dnsSrvRecordTarget,
248dnsSrvRecordPortLabel: dnsSrvRecordPort,
249dnsMxRecordTargetLabel: dnsMxRecordTarget,
250dnsNsRecordTargetLabel: dnsNsRecordTarget,
251})
252}
253
254tg.Source = name
255select {
256case <-ctx.Done():
257return ctx.Err()
258case ch <- tg:
259}
260
261return nil
262}
263
264// lookupWithSearchPath tries to get an answer for various permutations of
265// the given name, appending the system-configured search path as necessary.
266//
267// There are three possible outcomes:
268//
269// 1. One of the permutations of the given name is recognized as
270// "valid" by the DNS, in which case we consider ourselves "done"
271// and that answer is returned. Note that, due to the way the DNS
272// handles "name has resource records, but none of the specified type",
273// the answer received may have an empty set of results.
274//
275// 2. All of the permutations of the given name are responded to by one of
276// the servers in the "nameservers" list with the answer "that name does
277// not exist" (NXDOMAIN). In that case, it can be considered
278// pseudo-authoritative that there are no records for that name.
279//
280// 3. One or more of the names was responded to by all servers with some
281// sort of error indication. In that case, we can't know if, in fact,
282// there are records for the name or not, so whatever state the
283// configuration is in, we should keep it that way until we know for
284// sure (by, presumably, all the names getting answers in the future).
285//
286// Outcomes 1 and 2 are indicated by a valid response message (possibly an
287// empty one) and no error. Outcome 3 is indicated by an error return. The
288// error will be generic-looking, because trying to return all the errors
289// returned by the combination of all name permutations and servers is a
290// nightmare.
291func lookupWithSearchPath(name string, qtype uint16, logger log.Logger) (*dns.Msg, error) {
292conf, err := dns.ClientConfigFromFile(resolvConf)
293if err != nil {
294return nil, fmt.Errorf("could not load resolv.conf: %w", err)
295}
296
297allResponsesValid := true
298
299for _, lname := range conf.NameList(name) {
300response, err := lookupFromAnyServer(lname, qtype, conf, logger)
301
302switch {
303case err != nil:
304// We can't go home yet, because a later name
305// may give us a valid, successful answer. However
306// we can no longer say "this name definitely doesn't
307// exist", because we did not get that answer for
308// at least one name.
309allResponsesValid = false
310case response.Rcode == dns.RcodeSuccess:
311// Outcome 1: GOLD!
312return response, nil
313}
314}
315
316if allResponsesValid {
317// Outcome 2: everyone says NXDOMAIN, that's good enough for me.
318return &dns.Msg{}, nil
319}
320// Outcome 3: boned.
321return nil, fmt.Errorf("could not resolve %q: all servers responded with errors to at least one search domain", name)
322}
323
324// lookupFromAnyServer uses all configured servers to try and resolve a specific
325// name. If a viable answer is received from a server, then it is
326// immediately returned, otherwise the other servers in the config are
327// tried, and if none of them return a viable answer, an error is returned.
328//
329// A "viable answer" is one which indicates either:
330//
331// 1. "yes, I know that name, and here are its records of the requested type"
332// (RCODE==SUCCESS, ANCOUNT > 0);
333// 2. "yes, I know that name, but it has no records of the requested type"
334// (RCODE==SUCCESS, ANCOUNT==0); or
335// 3. "I know that name doesn't exist" (RCODE==NXDOMAIN).
336//
337// A non-viable answer is "anything else", which encompasses both various
338// system-level problems (like network timeouts) and also
339// valid-but-unexpected DNS responses (SERVFAIL, REFUSED, etc).
340func lookupFromAnyServer(name string, qtype uint16, conf *dns.ClientConfig, logger log.Logger) (*dns.Msg, error) {
341client := &dns.Client{}
342
343for _, server := range conf.Servers {
344servAddr := net.JoinHostPort(server, conf.Port)
345msg, err := askServerForName(name, qtype, client, servAddr, true)
346if err != nil {
347level.Warn(logger).Log("msg", "DNS resolution failed", "server", server, "name", name, "err", err)
348continue
349}
350
351if msg.Rcode == dns.RcodeSuccess || msg.Rcode == dns.RcodeNameError {
352// We have our answer. Time to go home.
353return msg, nil
354}
355}
356
357return nil, fmt.Errorf("could not resolve %s: no servers returned a viable answer", name)
358}
359
360// askServerForName makes a request to a specific DNS server for a specific
361// name (and qtype). Retries with TCP in the event of response truncation,
362// but otherwise just sends back whatever the server gave, whether that be a
363// valid-looking response, or an error.
364func askServerForName(name string, queryType uint16, client *dns.Client, servAddr string, edns bool) (*dns.Msg, error) {
365msg := &dns.Msg{}
366
367msg.SetQuestion(dns.Fqdn(name), queryType)
368if edns {
369msg.SetEdns0(dns.DefaultMsgSize, false)
370}
371
372response, _, err := client.Exchange(msg, servAddr)
373if err != nil {
374return nil, err
375}
376
377if response.Truncated {
378if client.Net == "tcp" {
379return nil, errors.New("got truncated message on TCP (64kiB limit exceeded?)")
380}
381
382client.Net = "tcp"
383return askServerForName(name, queryType, client, servAddr, false)
384}
385
386return response, nil
387}
388