inspektor-gadget
350 строк · 8.9 Кб
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2020 Anton Protopopov
3//
4// Based on tcpconnect(8) from BCC by Brendan Gregg
5#include <vmlinux.h>6
7#include <bpf/bpf_helpers.h>8#include <bpf/bpf_core_read.h>9#include <bpf/bpf_tracing.h>10
11#include <gadget/maps.bpf.h>12#include "tcpconnect.h"13#include <gadget/mntns_filter.h>14
15const volatile int filter_ports[MAX_PORTS];16const volatile int filter_ports_len = 0;17const volatile uid_t filter_uid = -1;18const volatile pid_t filter_pid = 0;19const volatile bool do_count = 0;20const volatile bool calculate_latency = false;21const volatile __u64 targ_min_latency_ns = 0;22
23/* Define here, because there are conflicts with include files */
24#define AF_INET 225#define AF_INET6 1026
27// we need this to make sure the compiler doesn't remove our struct
28const struct event *unusedevent __attribute__((unused));29
30// sockets_per_process keeps track of the sockets between:
31// - kprobe enter_tcp_connect
32// - kretprobe exit_tcp_connect
33struct {34__uint(type, BPF_MAP_TYPE_HASH);35__uint(max_entries, MAX_ENTRIES);36__type(key, u32); // tid37__type(value, struct sock *);38} sockets_per_process SEC(".maps");39
40struct piddata {41char comm[TASK_COMM_LEN];42u64 ts;43u32 pid;44u32 tid;45u64 mntns_id;46};47
48// sockets_latency keeps track of sockets to calculate the latency between:
49// - enter_tcp_connect (where the socket is added in the map)
50// - handle_tcp_rcv_state_process (where the socket is removed from the map)
51struct {52__uint(type, BPF_MAP_TYPE_HASH);53__uint(max_entries, 4096);54__type(key, struct sock *);55__type(value, struct piddata);56} sockets_latency SEC(".maps");57
58struct {59__uint(type, BPF_MAP_TYPE_HASH);60__uint(max_entries, MAX_ENTRIES);61__type(key, struct ipv4_flow_key);62__type(value, u64);63} ipv4_count SEC(".maps");64
65struct {66__uint(type, BPF_MAP_TYPE_HASH);67__uint(max_entries, MAX_ENTRIES);68__type(key, struct ipv6_flow_key);69__type(value, u64);70} ipv6_count SEC(".maps");71
72struct {73__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);74__uint(key_size, sizeof(u32));75__uint(value_size, sizeof(u32));76} events SEC(".maps");77
78static __always_inline bool filter_port(__u16 port)79{
80int i;81
82if (filter_ports_len == 0)83return false;84
85// This loop was written a bit different than the upstream one86// to avoid a verifier error.87for (i = 0; i < MAX_PORTS; i++) {88if (i >= filter_ports_len)89break;90if (port == filter_ports[i])91return false;92}93return true;94}
95
96static __always_inline int enter_tcp_connect(struct pt_regs *ctx,97struct sock *sk)98{
99__u64 pid_tgid = bpf_get_current_pid_tgid();100__u64 uid_gid = bpf_get_current_uid_gid();101__u32 pid = pid_tgid >> 32;102__u32 tid = pid_tgid;103__u64 mntns_id;104__u32 uid = (u32)uid_gid;105;106struct piddata piddata = {};107
108if (filter_pid && pid != filter_pid)109return 0;110
111if (filter_uid != (uid_t)-1 && uid != filter_uid)112return 0;113
114mntns_id = gadget_get_mntns_id();115
116if (gadget_should_discard_mntns_id(mntns_id))117return 0;118
119if (calculate_latency) {120bpf_get_current_comm(&piddata.comm, sizeof(piddata.comm));121piddata.ts = bpf_ktime_get_ns();122piddata.tid = tid;123piddata.pid = pid;124piddata.mntns_id = mntns_id;125bpf_map_update_elem(&sockets_latency, &sk, &piddata, 0);126} else {127bpf_map_update_elem(&sockets_per_process, &tid, &sk, 0);128}129return 0;130}
131
132static __always_inline void count_v4(struct sock *sk, __u16 dport)133{
134struct ipv4_flow_key key = {};135static __u64 zero;136__u64 *val;137
138BPF_CORE_READ_INTO(&key.saddr, sk, __sk_common.skc_rcv_saddr);139BPF_CORE_READ_INTO(&key.daddr, sk, __sk_common.skc_daddr);140key.dport = dport;141val = bpf_map_lookup_or_try_init(&ipv4_count, &key, &zero);142if (val)143__atomic_add_fetch(val, 1, __ATOMIC_RELAXED);144}
145
146static __always_inline void count_v6(struct sock *sk, __u16 dport)147{
148struct ipv6_flow_key key = {};149static const __u64 zero;150__u64 *val;151
152BPF_CORE_READ_INTO(&key.saddr, sk,153__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);154BPF_CORE_READ_INTO(&key.daddr, sk,155__sk_common.skc_v6_daddr.in6_u.u6_addr32);156key.dport = dport;157
158val = bpf_map_lookup_or_try_init(&ipv6_count, &key, &zero);159if (val)160__atomic_add_fetch(val, 1, __ATOMIC_RELAXED);161}
162
163static __always_inline void trace_v4(struct pt_regs *ctx, pid_t pid,164struct sock *sk, __u16 dport,165__u64 mntns_id)166{
167struct event event = {};168
169__u64 uid_gid = bpf_get_current_uid_gid();170
171event.af = AF_INET;172event.pid = pid;173event.uid = (u32)uid_gid;174event.gid = (u32)(uid_gid >> 32);175BPF_CORE_READ_INTO(&event.saddr_v4, sk, __sk_common.skc_rcv_saddr);176BPF_CORE_READ_INTO(&event.daddr_v4, sk, __sk_common.skc_daddr);177event.dport = dport;178event.sport = BPF_CORE_READ(sk, __sk_common.skc_num);179;180event.mntns_id = mntns_id;181bpf_get_current_comm(event.task, sizeof(event.task));182event.timestamp = bpf_ktime_get_boot_ns();183
184bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,185sizeof(event));186}
187
188static __always_inline void trace_v6(struct pt_regs *ctx, pid_t pid,189struct sock *sk, __u16 dport,190__u64 mntns_id)191{
192struct event event = {};193
194__u64 uid_gid = bpf_get_current_uid_gid();195
196event.af = AF_INET6;197event.pid = pid;198event.uid = (u32)uid_gid;199event.gid = (u32)(uid_gid >> 32);200event.mntns_id = mntns_id;201BPF_CORE_READ_INTO(&event.saddr_v6, sk,202__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);203BPF_CORE_READ_INTO(&event.daddr_v6, sk,204__sk_common.skc_v6_daddr.in6_u.u6_addr32);205event.dport = dport;206event.sport = BPF_CORE_READ(sk, __sk_common.skc_num);207;208bpf_get_current_comm(event.task, sizeof(event.task));209event.timestamp = bpf_ktime_get_boot_ns();210
211bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,212sizeof(event));213}
214
215static __always_inline int exit_tcp_connect(struct pt_regs *ctx, int ret,216int ip_ver)217{
218__u64 pid_tgid = bpf_get_current_pid_tgid();219__u32 pid = pid_tgid >> 32;220__u32 tid = pid_tgid;221struct sock **skpp;222struct sock *sk;223u64 mntns_id;224__u16 dport;225
226skpp = bpf_map_lookup_elem(&sockets_per_process, &tid);227if (!skpp)228return 0;229
230if (ret)231goto end;232
233sk = *skpp;234
235BPF_CORE_READ_INTO(&dport, sk, __sk_common.skc_dport);236if (filter_port(dport))237goto end;238
239if (do_count) {240if (ip_ver == 4)241count_v4(sk, dport);242else243count_v6(sk, dport);244} else {245mntns_id = gadget_get_mntns_id();246
247if (ip_ver == 4)248trace_v4(ctx, pid, sk, dport, mntns_id);249else250trace_v6(ctx, pid, sk, dport, mntns_id);251}252
253end:254bpf_map_delete_elem(&sockets_per_process, &tid);255return 0;256}
257
258static __always_inline int cleanup_sockets_latency_map(const struct sock *sk)259{
260bpf_map_delete_elem(&sockets_latency, &sk);261return 0;262}
263
264static __always_inline int handle_tcp_rcv_state_process(void *ctx,265struct sock *sk)266{
267struct piddata *piddatap;268struct event event = {};269u64 ts;270
271if (BPF_CORE_READ(sk, __sk_common.skc_state) != TCP_SYN_SENT)272return 0;273
274piddatap = bpf_map_lookup_elem(&sockets_latency, &sk);275if (!piddatap)276return 0;277
278ts = bpf_ktime_get_ns();279if (ts < piddatap->ts)280goto cleanup;281
282event.latency = ts - piddatap->ts;283if (targ_min_latency_ns && event.latency < targ_min_latency_ns)284goto cleanup;285__builtin_memcpy(&event.task, piddatap->comm, sizeof(event.task));286event.pid = piddatap->pid;287event.mntns_id = piddatap->mntns_id;288event.sport = BPF_CORE_READ(sk, __sk_common.skc_num);289event.dport = BPF_CORE_READ(sk, __sk_common.skc_dport);290event.af = BPF_CORE_READ(sk, __sk_common.skc_family);291if (event.af == AF_INET) {292event.saddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr);293event.daddr_v4 = BPF_CORE_READ(sk, __sk_common.skc_daddr);294} else {295BPF_CORE_READ_INTO(296&event.saddr_v6, sk,297__sk_common.skc_v6_rcv_saddr.in6_u.u6_addr32);298BPF_CORE_READ_INTO(&event.daddr_v6, sk,299__sk_common.skc_v6_daddr.in6_u.u6_addr32);300}301event.timestamp = bpf_ktime_get_boot_ns();302bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,303sizeof(event));304
305cleanup:306return cleanup_sockets_latency_map(sk);307}
308
309SEC("kprobe/tcp_v4_connect")310int BPF_KPROBE(ig_tcpc_v4_co_e, struct sock *sk)311{
312return enter_tcp_connect(ctx, sk);313}
314
315// This kretprobe is only attached if calculate_latency is false
316SEC("kretprobe/tcp_v4_connect")317int BPF_KRETPROBE(ig_tcpc_v4_co_x, int ret)318{
319return exit_tcp_connect(ctx, ret, 4);320}
321
322SEC("kprobe/tcp_v6_connect")323int BPF_KPROBE(ig_tcpc_v6_co_e, struct sock *sk)324{
325return enter_tcp_connect(ctx, sk);326}
327
328// This kretprobe is only attached if calculate_latency is false
329SEC("kretprobe/tcp_v6_connect")330int BPF_KRETPROBE(ig_tcpc_v6_co_x, int ret)331{
332return exit_tcp_connect(ctx, ret, 6);333}
334
335// This kprobe is only attached if calculate_latency is true
336SEC("kprobe/tcp_rcv_state_process")337int BPF_KPROBE(ig_tcp_rsp, struct sock *sk)338{
339return handle_tcp_rcv_state_process(ctx, sk);340}
341
342// tcp_destroy_sock is fired for ipv4 and ipv6.
343// This tracepoint is only attached if calculate_latency is true
344SEC("tracepoint/tcp/tcp_destroy_sock")345int ig_tcp_destroy(struct trace_event_raw_tcp_event_sk *ctx)346{
347return cleanup_sockets_latency_map(ctx->skaddr);348}
349
350char LICENSE[] SEC("license") = "GPL";351