inspektor-gadget

Форк
0
354 строки · 10.8 Кб
1
// SPDX-License-Identifier: GPL-2.0
2
/* Copyright (c) 2021 The Inspektor Gadget authors */
3

4
#include <linux/bpf.h>
5
#include <linux/if_ether.h>
6
#include <linux/ip.h>
7
#include <linux/in.h>
8
#include <linux/udp.h>
9
#include <sys/socket.h>
10

11
#include <bpf/bpf_helpers.h>
12
#include <bpf/bpf_endian.h>
13

14
#define GADGET_TYPE_NETWORKING
15
#include <gadget/sockets-map.h>
16

17
#include "dns-common.h"
18

19
#define DNS_OFF (ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr))
20

21
#define DNS_CLASS_IN \
22
	1 // https://datatracker.ietf.org/doc/html/rfc1035#section-3.2.4
23
#define DNS_TYPE_A \
24
	1 // https://datatracker.ietf.org/doc/html/rfc1035#section-3.2.2
25
#define DNS_TYPE_AAAA 28 // https://www.rfc-editor.org/rfc/rfc3596#section-2.1
26

27
#ifndef PACKET_HOST
28
#define PACKET_HOST 0x0
29
#endif
30

31
#ifndef PACKET_OUTGOING
32
#define PACKET_OUTGOING 0x4
33
#endif
34

35
#define DNS_QR_QUERY 0
36
#define DNS_QR_RESP 1
37

38
// we need this to make sure the compiler doesn't remove our struct
39
const struct event_t *unusedevent __attribute__((unused));
40

41
struct {
42
	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
43
} events SEC(".maps");
44

45
// https://datatracker.ietf.org/doc/html/rfc1035#section-4.1.1
46
union dnsflags {
47
	struct {
48
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
49
		__u8 rcode : 4; // response code
50
		__u8 z : 3; // reserved
51
		__u8 ra : 1; // recursion available
52
		__u8 rd : 1; // recursion desired
53
		__u8 tc : 1; // truncation
54
		__u8 aa : 1; // authoritive answer
55
		__u8 opcode : 4; // kind of query
56
		__u8 qr : 1; // 0=query; 1=response
57
#elif __BYTE_ORDER == __ORDER_BIG_ENDIAN__
58
		__u8 qr : 1; // 0=query; 1=response
59
		__u8 opcode : 4; // kind of query
60
		__u8 aa : 1; // authoritive answer
61
		__u8 tc : 1; // truncation
62
		__u8 rd : 1; // recursion desired
63
		__u8 ra : 1; // recursion available
64
		__u8 z : 3; // reserved
65
		__u8 rcode : 4; // response code
66
#else
67
#error "Fix your compiler's __BYTE_ORDER__?!"
68
#endif
69
	};
70
	__u16 flags;
71
};
72

73
struct dnshdr {
74
	__u16 id;
75

76
	union dnsflags flags;
77

78
	__u16 qdcount; // number of question entries
79
	__u16 ancount; // number of answer entries
80
	__u16 nscount; // number of authority records
81
	__u16 arcount; // number of additional records
82
};
83

84
// DNS resource record
85
// https://datatracker.ietf.org/doc/html/rfc1035#section-4.1.3
86
#pragma pack(2)
87
struct dnsrr {
88
	__u16 name; // Two octets when using message compression, see https://datatracker.ietf.org/doc/html/rfc1035#section-4.1.4
89
	__u16 type;
90
	__u16 class;
91
	__u32 ttl;
92
	__u16 rdlength;
93
	// Followed by rdata
94
};
95

96
// The stack is limited, so use a map to build the event
97
struct {
98
	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
99
	__uint(max_entries, 1);
100
	__type(key, __u32);
101
	__type(value, struct event_t);
102
} tmp_event SEC(".maps");
103

104
// Map of DNS query to timestamp so we can calculate latency from query sent to answer received.
105
struct query_key_t {
106
	__u64 pid_tgid;
107
	__u16 id;
108
};
109

110
struct {
111
	__uint(type, BPF_MAP_TYPE_HASH);
112
	__type(key, struct query_key_t);
113
	__type(value, __u64); // timestamp of the query
114
	__uint(max_entries, 1024);
115
} query_map SEC(".maps");
116

117
static __always_inline __u32 dns_name_length(struct __sk_buff *skb)
118
{
119
	// This loop iterates over the DNS labels to find the total DNS name
120
	// length.
121
	unsigned int i;
122
	unsigned int skip = 0;
123
	for (i = 0; i < MAX_DNS_NAME; i++) {
124
		if (skip != 0) {
125
			skip--;
126
		} else {
127
			int label_len = load_byte(
128
				skb, DNS_OFF + sizeof(struct dnshdr) + i);
129
			if (label_len == 0)
130
				break;
131
			// The simple solution "i += label_len" gives verifier
132
			// errors, so work around with skip.
133
			skip = label_len;
134
		}
135
	}
136

137
	return i < MAX_DNS_NAME ? i : MAX_DNS_NAME;
138
}
139

140
// Save the IPv4 and IPv6 addresses in event->anaddr. Returns the number of saved addresses.
141
static __always_inline int load_addresses(struct __sk_buff *skb, int ancount,
142
					  int anoffset, struct event_t *event)
143
{
144
	int rroffset = anoffset;
145
	int index = 0;
146
	for (int i = 0; i < ancount && i < MAX_ADDR_ANSWERS; i++) {
147
		__u16 rrname =
148
			load_byte(skb, rroffset + offsetof(struct dnsrr, name));
149

150
		// In most cases, the name will be compressed to two octets (indicated by first two bits 0b11).
151
		// The offset calculations below assume compression, so exit early if the name isn't compressed.
152
		if ((rrname & 0xf0) != 0xc0)
153
			return 0;
154

155
		// Safe to assume that all answers refer to the same domain name
156
		// because we verified earlier that there's exactly one question.
157

158
		__u16 rrtype =
159
			load_half(skb, rroffset + offsetof(struct dnsrr, type));
160
		__u16 rrclass = load_half(skb, rroffset + offsetof(struct dnsrr,
161
								   class));
162
		__u16 rdlength = load_half(
163
			skb, rroffset + offsetof(struct dnsrr, rdlength));
164

165
		if (rrtype == DNS_TYPE_A && rrclass == DNS_CLASS_IN &&
166
		    rdlength == 4) {
167
			// A record contains an IPv4 address.
168
			// Encode this as IPv4-mapped-IPv6 in the BPF event (::ffff:<ipv4>)
169
			// https://datatracker.ietf.org/doc/html/rfc4291#section-2.5.5.2
170
			__builtin_memset(&event->anaddr[index][0], 0x0, 10);
171
			__builtin_memset(&event->anaddr[index][10], 0xff, 2);
172
			bpf_skb_load_bytes(skb, rroffset + sizeof(struct dnsrr),
173
					   &event->anaddr[index][12], rdlength);
174
			index++;
175
		} else if (rrtype == DNS_TYPE_AAAA && rrclass == DNS_CLASS_IN &&
176
			   rdlength == 16) {
177
			// AAAA record contains an IPv6 address.
178
			bpf_skb_load_bytes(skb, rroffset + sizeof(struct dnsrr),
179
					   &event->anaddr[index][0], rdlength);
180
			index++;
181
		}
182
		rroffset += sizeof(struct dnsrr) + rdlength;
183
	}
184
	return index;
185
}
186

187
static __always_inline int output_dns_event(struct __sk_buff *skb,
188
					    union dnsflags flags,
189
					    __u32 name_len, __u16 ancount)
190
{
191
	__u32 zero = 0;
192
	struct event_t *event = bpf_map_lookup_elem(&tmp_event, &zero);
193
	if (!event)
194
		return 0;
195

196
	__builtin_memset(event, 0, sizeof(*event));
197

198
	event->netns = skb->cb[0]; // cb[0] initialized by dispatcher.bpf.c
199
	event->timestamp = bpf_ktime_get_boot_ns();
200
	event->id = load_half(skb, DNS_OFF + offsetof(struct dnshdr, id));
201
	event->af = AF_INET;
202
	event->daddr_v4 =
203
		load_word(skb, ETH_HLEN + offsetof(struct iphdr, daddr));
204
	event->saddr_v4 =
205
		load_word(skb, ETH_HLEN + offsetof(struct iphdr, saddr));
206
	// load_word converts from network to host endianness. Convert back to
207
	// network endianness because inet_ntop() requires it.
208
	event->daddr_v4 = bpf_htonl(event->daddr_v4);
209
	event->saddr_v4 = bpf_htonl(event->saddr_v4);
210

211
	// Check network protocol.
212
	// This only works with IPv4.
213
	// For IPv6, gadget_socket_lookup() in sockets-map.h
214
	// provides an example how to parse ip/ports on IPv6.
215
	event->proto =
216
		load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
217
	if (event->proto == IPPROTO_TCP) {
218
		event->sport =
219
			load_half(skb, ETH_HLEN + sizeof(struct iphdr) +
220
					       offsetof(struct tcphdr, source));
221
		event->dport =
222
			load_half(skb, ETH_HLEN + sizeof(struct iphdr) +
223
					       offsetof(struct tcphdr, dest));
224
	} else if (event->proto == IPPROTO_UDP) {
225
		event->sport =
226
			load_half(skb, ETH_HLEN + sizeof(struct iphdr) +
227
					       offsetof(struct udphdr, source));
228
		event->dport =
229
			load_half(skb, ETH_HLEN + sizeof(struct iphdr) +
230
					       offsetof(struct udphdr, dest));
231
	}
232

233
	event->qr = flags.qr;
234

235
	if (flags.qr == 1) {
236
		// Response code set only for replies.
237
		event->rcode = flags.rcode;
238
	}
239

240
	bpf_skb_load_bytes(skb, DNS_OFF + sizeof(struct dnshdr), event->name,
241
			   name_len);
242

243
	event->pkt_type = skb->pkt_type;
244

245
	// Read QTYPE right after the QNAME (name_len + the zero length octet)
246
	// https://datatracker.ietf.org/doc/html/rfc1035#section-4.1.2
247
	event->qtype =
248
		load_half(skb, DNS_OFF + sizeof(struct dnshdr) + name_len + 1);
249

250
	// Enrich event with process metadata
251
	struct sockets_value *skb_val = gadget_socket_lookup(skb);
252
	if (skb_val != NULL) {
253
		event->mount_ns_id = skb_val->mntns;
254
		event->pid = skb_val->pid_tgid >> 32;
255
		event->tid = (__u32)skb_val->pid_tgid;
256
		__builtin_memcpy(&event->task, skb_val->task,
257
				 sizeof(event->task));
258
		event->uid = (__u32)skb_val->uid_gid;
259
		event->gid = (__u32)(skb_val->uid_gid >> 32);
260
	}
261

262
	event->ancount = ancount;
263

264
	// DNS answers start immediately after qname (name_len octets)
265
	// + the zero length octet + qtype (2 octets) + qclass (2 octets).
266
	int anoffset = DNS_OFF + sizeof(struct dnshdr) + name_len + 5;
267
	int anaddrcount = load_addresses(skb, ancount, anoffset, event);
268
	event->anaddrcount = anaddrcount;
269

270
	// Calculate latency:
271
	//
272
	// Track the latency from when a query is sent from a container
273
	// to when a response to the query is received by that same container.
274
	//
275
	// * On DNS query sent from a container namespace (qr == DNS_QR_QUERY and pkt_type == OUTGOING),
276
	//   store the query timestamp in a map.
277
	//
278
	// * On DNS response received in the same container namespace (qr == DNS_QR_RESP and pkt_type == HOST)
279
	//   retrieve/delete the query timestamp and set the latency field on the event.
280
	//
281
	// A garbage collection thread running in userspace periodically scans for keys with old timestamps
282
	// to free space occupied by queries that never receive a response.
283
	//
284
	// Skip this if skb_val == NULL (gadget_socket_lookup did not set pid_tgid we use in the query key)
285
	// or if event->timestamp == 0 (kernels before 5.8 don't support bpf_ktime_get_boot_ns, and the patched
286
	// version IG injects always returns zero).
287
	if (skb_val != NULL && event->timestamp > 0) {
288
		struct query_key_t query_key = {
289
			.pid_tgid = skb_val->pid_tgid,
290
			.id = event->id,
291
		};
292
		if (event->qr == DNS_QR_QUERY &&
293
		    event->pkt_type == PACKET_OUTGOING) {
294
			bpf_map_update_elem(&query_map, &query_key,
295
					    &event->timestamp, BPF_NOEXIST);
296
		} else if (event->qr == DNS_QR_RESP &&
297
			   event->pkt_type == PACKET_HOST) {
298
			__u64 *query_ts =
299
				bpf_map_lookup_elem(&query_map, &query_key);
300
			if (query_ts != NULL) {
301
				// query ts should always be less than the event ts, but check anyway to be safe.
302
				if (*query_ts < event->timestamp) {
303
					event->latency_ns =
304
						event->timestamp - *query_ts;
305
				}
306
				bpf_map_delete_elem(&query_map, &query_key);
307
			}
308
		}
309
	}
310

311
	// size of full structure - addresses + only used addresses
312
	unsigned long long size =
313
		sizeof(*event) - MAX_ADDR_ANSWERS * 16 + anaddrcount * 16;
314
	bpf_perf_event_output(skb, &events, BPF_F_CURRENT_CPU, event, size);
315

316
	return 0;
317
}
318

319
SEC("socket1")
320
int ig_trace_dns(struct __sk_buff *skb)
321
{
322
	// Skip non-IP packets
323
	if (load_half(skb, offsetof(struct ethhdr, h_proto)) != ETH_P_IP)
324
		return 0;
325

326
	// Skip non-UDP packets
327
	if (load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)) !=
328
	    IPPROTO_UDP)
329
		return 0;
330

331
	union dnsflags flags;
332
	flags.flags = load_half(skb, DNS_OFF + offsetof(struct dnshdr, flags));
333

334
	// Skip DNS packets with more than 1 question
335
	if (load_half(skb, DNS_OFF + offsetof(struct dnshdr, qdcount)) != 1)
336
		return 0;
337

338
	__u16 ancount =
339
		load_half(skb, DNS_OFF + offsetof(struct dnshdr, ancount));
340
	__u16 nscount =
341
		load_half(skb, DNS_OFF + offsetof(struct dnshdr, nscount));
342

343
	// Skip DNS queries with answers
344
	if ((flags.qr == 0) && (ancount + nscount != 0))
345
		return 0;
346

347
	__u32 name_len = dns_name_length(skb);
348
	if (name_len == 0)
349
		return 0;
350

351
	return output_dns_event(skb, flags, name_len, ancount);
352
}
353

354
char _license[] SEC("license") = "GPL";
355

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.