inspektor-gadget

execsnoop.bpf.c
264 строки · 7.2 Кб
Перенос по словам
1
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
#include <vmlinux.h>
3
#include <bpf/bpf_helpers.h>
4
#include <bpf/bpf_core_read.h>
5
#ifdef __TARGET_ARCH_arm64
6
#include <bpf/bpf_tracing.h>
7
#endif /* __TARGET_ARCH_arm64 */
8

9
#include <gadget/mntns_filter.h>
10
#ifdef WITH_CWD
11
#include <gadget/filesystem.h>
12
#endif
13
#include "execsnoop.h"
14

15
// Defined in include/uapi/linux/magic.h
16
#define OVERLAYFS_SUPER_MAGIC 0x794c7630
17

18
const volatile bool ignore_failed = true;
19
const volatile uid_t targ_uid = INVALID_UID;
20
const volatile int max_args = DEFAULT_MAXARGS;
21

22
static const struct event empty_event = {};
23

24
struct {
25
	__uint(type, BPF_MAP_TYPE_HASH);
26
#ifdef WITH_CWD
27
	__uint(max_entries, 1024);
28
#else /* !WITH_CWD */
29
	__uint(max_entries, 10240);
30
#endif /* !WITH_CWD */
31
	__type(key, pid_t);
32
	__type(value, struct event);
33
} execs SEC(".maps");
34

35
struct {
36
	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
37
	__uint(key_size, sizeof(u32));
38
	__uint(value_size, sizeof(u32));
39
} events SEC(".maps");
40

41
// man clone(2):
42
//   If any of the threads in a thread group performs an
43
//   execve(2), then all threads other than the thread group
44
//   leader are terminated, and the new program is executed in
45
//   the thread group leader.
46
//
47
// sys_enter_execve might be called from a thread and the corresponding
48
// sys_exit_execve will be called from the thread group leader in case of
49
// execve success, or from the same thread in case of execve failure. So we
50
// need to lookup the pid from the tgid in sys_exit_execve.
51
//
52
// We don't know in advance which execve(2) will succeed, so we need to keep
53
// track of all tgid<->pid mappings in a BPF map.
54
//
55
// We don't want to use bpf_for_each_map_elem() because it requires Linux 5.13.
56
//
57
// If several execve(2) are performed in parallel from different threads, only
58
// one can succeed. The kernel will run the tracepoint syscalls/sys_exit_execve
59
// for the failing execve(2) first and then for the successful one last.
60
//
61
// So we can insert a tgid->pid mapping in the same hash entry by modulo adding
62
// the pid in value and removing it by subtracting. By the time we need to
63
// lookup the pid by the tgid, there will be only one pid left in the hash entry.
64
struct {
65
	__uint(type, BPF_MAP_TYPE_HASH);
66
	__type(key, pid_t); // tgid
67
	__type(value, u64); // sum of pids
68
	__uint(max_entries, 1024);
69
} pid_by_tgid SEC(".maps");
70

71
static __always_inline bool valid_uid(uid_t uid)
72
{
73
	return uid != INVALID_UID;
74
}
75

76
SEC("tracepoint/syscalls/sys_enter_execve")
77
int ig_execve_e(struct trace_event_raw_sys_enter *ctx)
78
{
79
	u64 id;
80
	char *cwd;
81
	pid_t pid, tgid;
82
	u64 zero64 = 0;
83
	u64 *pid_sum;
84
	struct event *event;
85
	struct fs_struct *fs;
86
	struct task_struct *task;
87
	unsigned int ret;
88
	const char **args = (const char **)(ctx->args[1]);
89
	const char *argp;
90
	int i;
91
	u64 mntns_id;
92
	u64 uid_gid = bpf_get_current_uid_gid();
93
	u32 uid = (u32)uid_gid;
94
	u32 gid = (u32)(uid_gid >> 32);
95

96
	if (valid_uid(targ_uid) && targ_uid != uid)
97
		return 0;
98

99
	task = (struct task_struct *)bpf_get_current_task();
100
	mntns_id = (u64)BPF_CORE_READ(task, nsproxy, mnt_ns, ns.inum);
101

102
	if (gadget_should_discard_mntns_id(mntns_id))
103
		return 0;
104

105
	id = bpf_get_current_pid_tgid();
106
	pid = (pid_t)id;
107
	tgid = id >> 32;
108
	if (bpf_map_update_elem(&execs, &pid, &empty_event, BPF_NOEXIST))
109
		return 0;
110

111
	event = bpf_map_lookup_elem(&execs, &pid);
112
	if (!event)
113
		return 0;
114

115
	bpf_map_update_elem(&pid_by_tgid, &tgid, &zero64, BPF_NOEXIST);
116

117
	pid_sum = bpf_map_lookup_elem(&pid_by_tgid, &tgid);
118
	if (!pid_sum)
119
		return 0;
120

121
	__atomic_add_fetch(pid_sum, (u64)pid, __ATOMIC_RELAXED);
122

123
	event->timestamp = bpf_ktime_get_boot_ns();
124
	event->pid = tgid;
125
	event->uid = uid;
126
	event->gid = gid;
127
	// loginuid is only available when CONFIG_AUDIT is set
128
	if (bpf_core_field_exists(task->loginuid))
129
		event->loginuid = BPF_CORE_READ(task, loginuid.val);
130
	else
131
		event->loginuid = 4294967295; // -1 or "no user id"
132
	// sessionid is only available when CONFIG_AUDIT is set
133
	if (bpf_core_field_exists(task->sessionid))
134
		event->sessionid = BPF_CORE_READ(task, sessionid);
135

136
	event->ppid = (pid_t)BPF_CORE_READ(task, real_parent, tgid);
137
	event->args_count = 0;
138
	event->args_size = 0;
139
	event->mntns_id = mntns_id;
140

141
#ifdef WITH_CWD
142
	fs = BPF_CORE_READ(task, fs);
143
	cwd = get_path_str(&fs->pwd);
144
	bpf_probe_read_kernel_str(event->cwd, MAX_STRING_SIZE, cwd);
145
#endif
146

147
	ret = bpf_probe_read_user_str(event->args, ARGSIZE,
148
				      (const char *)ctx->args[0]);
149
	if (ret <= ARGSIZE) {
150
		event->args_size += ret;
151
	} else {
152
		/* write an empty string */
153
		event->args[0] = '\0';
154
		event->args_size++;
155
	}
156

157
	event->args_count++;
158
#pragma unroll
159
	for (i = 1; i < TOTAL_MAX_ARGS && i < max_args; i++) {
160
		bpf_probe_read_user(&argp, sizeof(argp), &args[i]);
161
		if (!argp)
162
			return 0;
163

164
		if (event->args_size > LAST_ARG)
165
			return 0;
166

167
		ret = bpf_probe_read_user_str(&event->args[event->args_size],
168
					      ARGSIZE, argp);
169
		if (ret > ARGSIZE)
170
			return 0;
171

172
		event->args_count++;
173
		event->args_size += ret;
174
	}
175
	/* try to read one more argument to check if there is one */
176
	bpf_probe_read_user(&argp, sizeof(argp), &args[max_args]);
177
	if (!argp)
178
		return 0;
179

180
	/* pointer to max_args+1 isn't null, asume we have more arguments */
181
	event->args_count++;
182
	return 0;
183
}
184

185
static __always_inline bool has_upper_layer()
186
{
187
	struct task_struct *task = (struct task_struct *)bpf_get_current_task();
188
	struct inode *inode = BPF_CORE_READ(task, mm, exe_file, f_inode);
189
	if (!inode) {
190
		return false;
191
	}
192
	unsigned long sb_magic = BPF_CORE_READ(inode, i_sb, s_magic);
193

194
	if (sb_magic != OVERLAYFS_SUPER_MAGIC) {
195
		return false;
196
	}
197

198
	struct dentry *upperdentry;
199

200
	// struct ovl_inode defined in fs/overlayfs/ovl_entry.h
201
	// Unfortunately, not exported to vmlinux.h
202
	// and not available in /sys/kernel/btf/vmlinux
203
	// See https://github.com/cilium/ebpf/pull/1300
204
	// We only rely on vfs_inode and __upperdentry relative positions
205
	bpf_probe_read_kernel(&upperdentry, sizeof(upperdentry),
206
			      ((void *)inode) +
207
				      bpf_core_type_size(struct inode));
208
	return upperdentry != NULL;
209
}
210

211
SEC("tracepoint/syscalls/sys_exit_execve")
212
int ig_execve_x(struct trace_event_raw_sys_exit *ctx)
213
{
214
	u64 id;
215
	pid_t pid, tgid;
216
	pid_t execs_lookup_key;
217
	u64 *pid_sum;
218
	int ret;
219
	struct event *event;
220
	u32 uid = (u32)bpf_get_current_uid_gid();
221

222
	if (valid_uid(targ_uid) && targ_uid != uid)
223
		return 0;
224
	id = bpf_get_current_pid_tgid();
225
	pid = (pid_t)id;
226
	tgid = id >> 32;
227
	ret = ctx->ret;
228

229
	pid_sum = bpf_map_lookup_elem(&pid_by_tgid, &tgid);
230
	if (!pid_sum)
231
		return 0;
232

233
	// sys_enter_execve and sys_exit_execve might be called from different
234
	// threads. We need to lookup the pid from the tgid.
235
	execs_lookup_key = (ret == 0) ? (pid_t)*pid_sum : pid;
236
	event = bpf_map_lookup_elem(&execs, &execs_lookup_key);
237

238
	// Remove the tgid->pid mapping if the value reaches 0
239
	// or the execve() call was successful
240
	__atomic_add_fetch(pid_sum, (u64)-pid, __ATOMIC_RELAXED);
241
	if (*pid_sum == 0 || ret == 0)
242
		bpf_map_delete_elem(&pid_by_tgid, &tgid);
243

244
	if (!event)
245
		return 0;
246
	if (ignore_failed && ret < 0)
247
		goto cleanup;
248

249
	if (ret == 0) {
250
		event->upper_layer = has_upper_layer();
251
	}
252

253
	event->retval = ret;
254
	bpf_get_current_comm(&event->comm, sizeof(event->comm));
255
	size_t len = EVENT_SIZE(event);
256
	if (len <= sizeof(*event))
257
		bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, event,
258
				      len);
259
cleanup:
260
	bpf_map_delete_elem(&execs, &execs_lookup_key);
261
	return 0;
262
}
263

264
char LICENSE[] SEC("license") = "GPL";
265
inspektor-gadget

Использование cookies