3
#include <bpf/bpf_helpers.h>
4
#include <bpf/bpf_core_read.h>
5
#include <bpf/bpf_tracing.h>
6
#include <gadget/mntns_filter.h>
13
#if defined(__TARGET_ARCH_arm64)
14
#define __NR_rt_sigreturn 139
15
#define __NR_exit_group 94
17
#elif defined(__TARGET_ARCH_x86)
18
#define __NR_rt_sigreturn 15
19
#define __NR_exit_group 231
22
#error "Traceloop is not supported on your architecture."
26
#if defined(SHOW_DEBUG)
27
#define bpf_debug_printk(fmt, ...) bpf_printk(fmt, ##__VA_ARGS__)
29
#define bpf_debug_printk(fmt, ...)
33
#if defined(SHOW_ERROR)
34
#define bpf_error_printk(fmt, ...) bpf_printk(fmt, ##__VA_ARGS__)
36
#define bpf_error_printk(fmt, ...)
39
const volatile bool filter_syscall = false;
41
const struct syscall_event_t *unused_event __attribute__((unused));
42
const struct syscall_event_cont_t *unused_event_cont __attribute__((unused));
48
static const struct pt_regs empty;
49
static struct syscall_def_t default_definition;
52
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
57
__uint(key_size, sizeof(u64));
58
__uint(value_size, sizeof(u32));
59
__uint(max_entries, 1024);
62
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
63
__uint(key_size, sizeof(u32));
64
__uint(value_size, sizeof(u32));
66
} map_of_perf_buffers SEC(".maps");
69
__uint(type, BPF_MAP_TYPE_HASH);
70
__uint(key_size, sizeof(u64));
71
__uint(value_size, sizeof(struct syscall_def_t));
76
__uint(max_entries, 512);
77
} syscalls SEC(".maps");
80
__uint(type, BPF_MAP_TYPE_HASH);
81
__uint(key_size, sizeof(u64));
86
__uint(value_size, sizeof(bool));
87
__uint(map_flags, BPF_F_NO_PREALLOC);
88
__uint(max_entries, SYSCALL_FILTERS);
89
} syscall_filters SEC(".maps");
97
__uint(type, BPF_MAP_TYPE_HASH);
98
__uint(key_size, sizeof(u64));
99
__uint(value_size, sizeof(struct remembered_args));
100
__uint(max_entries, 1024);
101
} probe_at_sys_exit SEC(".maps");
104
__uint(type, BPF_MAP_TYPE_HASH);
105
__uint(key_size, sizeof(u64));
106
__uint(value_size, sizeof(struct pt_regs));
107
__uint(max_entries, 1024);
108
} regs_map SEC(".maps");
110
static __always_inline int skip_exit_probe(int nr)
112
return !!(nr == __NR_exit || nr == __NR_exit_group ||
113
nr == __NR_rt_sigreturn);
120
static __always_inline u64 get_arg(struct pt_regs *regs, int i)
124
return PT_REGS_PARM1_CORE_SYSCALL(regs);
126
return PT_REGS_PARM2_CORE_SYSCALL(regs);
128
return PT_REGS_PARM3_CORE_SYSCALL(regs);
130
return PT_REGS_PARM4_CORE_SYSCALL(regs);
132
return PT_REGS_PARM5_CORE_SYSCALL(regs);
134
return PT_REGS_PARM6_CORE_SYSCALL(regs);
137
"There is no PT_REGS_PARM%d_SYSCALL macro, check the argument!\n",
143
static __always_inline bool should_filter_out_syscall(u64 syscall_nr)
145
return filter_syscall &&
146
bpf_map_lookup_elem(&syscall_filters, &syscall_nr) == NULL;
155
SEC("raw_tracepoint/sys_enter")
156
int ig_traceloop_e(struct bpf_raw_tracepoint_args *ctx)
158
struct remembered_args remembered = {};
159
u64 pid = bpf_get_current_pid_tgid();
160
struct syscall_def_t *syscall_def;
165
struct syscall_event_t sc = {};
166
struct task_struct *task;
167
u64 nr = ctx->args[1];
168
struct pt_regs *args;
174
if (should_filter_out_syscall(nr))
183
u64 boot_ts = bpf_ktime_get_boot_ns();
188
u64 monotonic_ts = bpf_ktime_get_ns();
190
sc.boot_timestamp = boot_ts;
191
sc.monotonic_timestamp = monotonic_ts;
193
sc.cpu = bpf_get_smp_processor_id();
195
sc.typ = SYSCALL_EVENT_TYPE_ENTER;
198
remembered.monotonic_timestamp = monotonic_ts;
201
syscall_def = bpf_map_lookup_elem(&syscalls, &nr);
209
if (syscall_def == NULL)
210
syscall_def = &default_definition;
212
task = (struct task_struct *)bpf_get_current_task();
213
mntns_id = (u64)BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum);
215
perf_buffer = bpf_map_lookup_elem(&map_of_perf_buffers, &mntns_id);
219
bpf_get_current_comm(sc.comm, sizeof(sc.comm));
221
ret = bpf_map_update_elem(®s_map, &pid, &empty, BPF_NOEXIST);
224
"enter: there should not be any pt_regs for key %lu: %d\n",
230
args = bpf_map_lookup_elem(®s_map, &pid);
233
"enter: there should be a pt_regs for key %lu\n", pid);
238
bpf_probe_read(args, sizeof(*args), (void *)ctx->args[0]);
240
for (i = 0; i < SYSCALL_ARGS; i++) {
242
u64 arg = get_arg(args, i + 1);
244
remembered.args[i] = arg;
245
if (syscall_def->args_len[i])
250
"Perf event output: sc.id: %d; sc.comm: %s; sizeof(sc): %d\n",
251
sc.id, sc.comm, sizeof(sc));
252
ret = bpf_perf_event_output(ctx, perf_buffer, BPF_F_CURRENT_CPU, &sc,
255
bpf_error_printk("Problem outputting perf event: %d", ret);
262
if (!skip_exit_probe(nr))
263
bpf_map_update_elem(&probe_at_sys_exit, &pid, &remembered,
269
for (i = 0; i < SYSCALL_ARGS; i++) {
270
__u64 arg_len = syscall_def->args_len[i];
272
if (!arg_len || (arg_len & PARAM_PROBE_AT_EXIT_MASK) ||
273
arg_len == USE_RET_AS_PARAM_LENGTH)
276
bool null_terminated = false;
277
struct syscall_event_cont_t sc_cont = {};
279
sc_cont.monotonic_timestamp = monotonic_ts;
281
sc_cont.failed = false;
283
if (arg_len == USE_NULL_BYTE_LENGTH) {
284
null_terminated = true;
286
} else if (arg_len >= USE_ARG_INDEX_AS_PARAM_LENGTH) {
287
__u64 idx = arg_len &
288
USE_ARG_INDEX_AS_PARAM_LENGTH_MASK;
296
struct remembered_args *remembered_ctx_workaround;
297
if (idx < SYSCALL_ARGS) {
298
remembered_ctx_workaround = bpf_map_lookup_elem(
299
&probe_at_sys_exit, &pid);
300
if (remembered_ctx_workaround)
301
arg_len = remembered_ctx_workaround
310
if (arg_len > sizeof(sc_cont.param))
311
arg_len = sizeof(sc_cont.param);
314
sc_cont.length = USE_NULL_BYTE_LENGTH;
316
sc_cont.length = arg_len;
319
u64 arg = get_arg(args, i + 1);
323
&& bpf_probe_read_user_str(sc_cont.param, PARAM_LEN,
325
sc_cont.failed = true;
326
else if (sizeof(u8) <= arg_len &&
329
&& bpf_probe_read_user(sc_cont.param, arg_len,
331
sc_cont.failed = true;
332
else if (bpf_probe_read_user(
333
sc_cont.param, PARAM_LEN,
335
sc_cont.failed = true;
338
"Perf event output: sc_cont.index: %d; sizeof(sc_cont): %d\n",
339
sc_cont.index, sizeof(sc_cont));
340
ret = bpf_perf_event_output(ctx, perf_buffer, BPF_F_CURRENT_CPU,
341
&sc_cont, sizeof(sc_cont));
344
"Problem outputting continued perf event: %d",
350
bpf_map_delete_elem(®s_map, &pid);
362
static __always_inline int syscall_get_nr(struct pt_regs *regs)
364
#if defined(__TARGET_ARCH_arm64)
365
return regs->syscallno;
366
#elif defined(__TARGET_ARCH_x86)
367
return regs->orig_ax;
369
#error "Traceloop is not supported on your architecture."
380
SEC("raw_tracepoint/sys_exit")
381
int ig_traceloop_x(struct bpf_raw_tracepoint_args *ctx)
383
u64 pid = bpf_get_current_pid_tgid();
384
struct remembered_args *remembered;
385
struct syscall_def_t *syscall_def;
386
struct task_struct *task;
387
long ret = ctx->args[1];
388
struct pt_regs *args;
394
r = bpf_map_update_elem(®s_map, &pid, &empty, BPF_NOEXIST);
397
"exit: there should not be any pt_regs for key %lu: %d\n",
403
args = bpf_map_lookup_elem(®s_map, &pid);
406
"exit: there should be a pt_regs for key %lu\n", pid);
411
bpf_probe_read(args, sizeof(*args), (void *)ctx->args[0]);
412
nr = syscall_get_nr(args);
417
struct syscall_event_t sc = {
418
.boot_timestamp = bpf_ktime_get_boot_ns(),
419
.cpu = bpf_get_smp_processor_id(),
421
.typ = SYSCALL_EVENT_TYPE_EXIT,
426
syscall_def = bpf_map_lookup_elem(&syscalls, &nr);
427
if (syscall_def == NULL)
428
syscall_def = &default_definition;
430
task = (struct task_struct *)bpf_get_current_task();
431
mntns_id = (u64)BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum);
433
perf_buffer = bpf_map_lookup_elem(&map_of_perf_buffers, &mntns_id);
437
remembered = bpf_map_lookup_elem(&probe_at_sys_exit, &pid);
445
sc.monotonic_timestamp = remembered->monotonic_timestamp;
447
for (i = 0; i < SYSCALL_ARGS; i++) {
448
__u64 arg_len = syscall_def->args_len[i];
450
if (!arg_len || !(arg_len & PARAM_PROBE_AT_EXIT_MASK))
453
bool null_terminated = false;
454
struct syscall_event_cont_t sc_cont = {
455
.monotonic_timestamp = remembered->monotonic_timestamp,
460
arg_len &= ~PARAM_PROBE_AT_EXIT_MASK;
462
if (arg_len == USE_RET_AS_PARAM_LENGTH) {
463
if ((signed long)ret < 0)
467
} else if (arg_len == USE_NULL_BYTE_LENGTH) {
468
null_terminated = true;
470
} else if (arg_len >= USE_ARG_INDEX_AS_PARAM_LENGTH) {
471
__u64 idx = arg_len &
472
USE_ARG_INDEX_AS_PARAM_LENGTH_MASK;
473
if (idx < SYSCALL_ARGS)
474
arg_len = remembered->args[idx];
479
if (arg_len > sizeof(sc_cont.param))
480
arg_len = sizeof(sc_cont.param);
483
sc_cont.length = USE_NULL_BYTE_LENGTH;
485
sc_cont.length = arg_len;
487
if (arg_len == 0 && null_terminated) {
488
if (bpf_probe_read_user_str(
489
sc_cont.param, PARAM_LEN,
490
(void *)(remembered->args[i])) < 0)
491
sc_cont.failed = true;
492
} else if (sizeof(u8) <= arg_len && arg_len <= sizeof(u64) &&
493
bpf_probe_read_user(sc_cont.param, arg_len,
494
(void *)(remembered->args[i]))) {
495
sc_cont.failed = true;
496
} else if (bpf_probe_read_user(sc_cont.param, PARAM_LEN,
497
(void *)(remembered->args[i]))) {
498
sc_cont.failed = true;
502
"Perf event output (exit): sc_cont.index: %d; sizeof(sc_cont): %d\n",
503
sc_cont.index, sizeof(sc_cont));
504
r = bpf_perf_event_output(ctx, perf_buffer, BPF_F_CURRENT_CPU,
505
&sc_cont, sizeof(sc_cont));
508
"Problem outputting continued perf event: %d",
512
bpf_map_delete_elem(&probe_at_sys_exit, &pid);
515
bpf_get_current_comm(sc.comm, sizeof(sc.comm));
518
"Perf event output (exit): sc.id: %d; sc.comm: %s; sizeof(sc): %d\n",
519
sc.id, sc.comm, sizeof(sc));
520
r = bpf_perf_event_output(ctx, perf_buffer, BPF_F_CURRENT_CPU, &sc,
523
bpf_error_printk("Problem outputting perf event: %d", ret);
526
bpf_map_delete_elem(®s_map, &pid);
531
char LICENSE[] SEC("license") = "GPL";