inspektor-gadget

Форк
0
788 строк · 21.9 Кб
1
// Copyright 2023 The Inspektor Gadget authors
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
// Package containerhook detects when a container is created or terminated.
16
//
17
// It uses two mechanisms to detect new containers:
18
//  1. fanotify with FAN_OPEN_EXEC_PERM.
19
//  2. ebpf on the sys_enter_execve tracepoint to get the execve arguments.
20
//
21
// Using fanotify with FAN_OPEN_EXEC_PERM allows to call a callback function
22
// while the container is being created. The container is paused until the
23
// callback function returns.
24
//
25
// Using ebpf on the sys_enter_execve tracepoint allows to get the execve
26
// arguments without the need to read /proc/$pid/cmdline or /proc/$pid/comm.
27
// Reading /proc/$pid/cmdline is not possible using only fanotify when the
28
// tracer is not in the same pidns as the process being traced. This is the
29
// case when Inspektor Gadget is started with hostPID=false.
30
//
31
// https://github.com/inspektor-gadget/inspektor-gadget/blob/main/docs/devel/fanotify-ebpf.png
32
package containerhook
33

34
import (
35
	"encoding/json"
36
	"errors"
37
	"fmt"
38
	"io"
39
	"math"
40
	"os"
41
	"path/filepath"
42
	"strconv"
43
	"strings"
44
	"sync"
45
	"sync/atomic"
46
	"time"
47

48
	"github.com/cilium/ebpf"
49
	"github.com/cilium/ebpf/link"
50
	ocispec "github.com/opencontainers/runtime-spec/specs-go"
51
	"github.com/s3rj1k/go-fanotify/fanotify"
52
	log "github.com/sirupsen/logrus"
53
	"golang.org/x/sys/unix"
54

55
	"github.com/inspektor-gadget/inspektor-gadget/pkg/btfgen"
56
	"github.com/inspektor-gadget/inspektor-gadget/pkg/gadgets"
57
	"github.com/inspektor-gadget/inspektor-gadget/pkg/utils/host"
58
)
59

60
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target $TARGET -cc clang -cflags ${CFLAGS} -no-global-types -type record execruntime ./bpf/execruntime.bpf.c -- -I./bpf/
61

62
type EventType int
63

64
const (
65
	EventTypeAddContainer EventType = iota
66
	EventTypeRemoveContainer
67
)
68

69
// ContainerEvent is the notification for container creation or termination
70
type ContainerEvent struct {
71
	// Type is whether the container was added or removed
72
	Type EventType
73

74
	// ContainerID is the container id, typically a 64 hexadecimal string
75
	ContainerID string
76

77
	// ContainerName is the container name, typically two words with an underscore
78
	ContainerName string
79

80
	// ContainerPID is the process id of the container
81
	ContainerPID uint32
82

83
	// Container's configuration is the config.json from the OCI runtime
84
	// spec
85
	ContainerConfig *ocispec.Spec
86

87
	// Bundle is the directory containing the config.json from the OCI
88
	// runtime spec
89
	// See https://github.com/opencontainers/runtime-spec/blob/main/bundle.md
90
	Bundle string
91
}
92

93
type ContainerNotifyFunc func(notif ContainerEvent)
94

95
type watchedContainer struct {
96
	id  string
97
	pid int
98
}
99

100
type futureContainer struct {
101
	id        string
102
	name      string
103
	bundleDir string
104
	pidFile   string
105
}
106

107
type ContainerNotifier struct {
108
	runtimeBinaryNotify *fanotify.NotifyFD
109
	callback            ContainerNotifyFunc
110

111
	// containers is the set of containers that are being watched for
112
	// termination. This prevents duplicate calls to
113
	// AddWatchContainerTermination.
114
	//
115
	// Keys: Container ID
116
	containers   map[string]*watchedContainer
117
	containersMu sync.Mutex
118

119
	// futureContainers is the set of containers that are detected before
120
	// oci-runtime (runc/crun) creates the container e.g. detected via conmon
121
	//
122
	// Keys: Container ID
123
	futureContainers map[string]*futureContainer
124
	futureMu         sync.Mutex
125

126
	objs  execruntimeObjects
127
	links []link.Link
128

129
	// set to true when the notifier is closed is closed
130
	closed atomic.Bool
131
	// this channel is used in watchContainersTermination() to avoid having to wait for the
132
	// ticker to trigger before returning
133
	done chan bool
134

135
	wg sync.WaitGroup
136
}
137

138
// runtimePaths is the list of paths where the container runtime runc or crun
139
// could be installed. Depending on the Linux distribution, it could be in
140
// different locations.
141
//
142
// When this package is executed in a container, it prepends the
143
// HOST_ROOT env variable to the path.
144
var runtimePaths = []string{
145
	"/bin/runc",
146
	"/usr/bin/runc",
147
	"/usr/sbin/runc",
148
	"/usr/local/bin/runc",
149
	"/usr/local/sbin/runc",
150
	"/usr/lib/cri-o-runc/sbin/runc",
151
	"/run/torcx/unpack/docker/bin/runc",
152
	"/usr/bin/crun",
153
	"/usr/bin/conmon",
154
}
155

156
// initFanotify initializes the fanotify API with the flags we need
157
func initFanotify() (*fanotify.NotifyFD, error) {
158
	fanotifyFlags := uint(unix.FAN_CLOEXEC | unix.FAN_CLASS_CONTENT | unix.FAN_UNLIMITED_QUEUE | unix.FAN_UNLIMITED_MARKS | unix.FAN_NONBLOCK)
159
	openFlags := os.O_RDONLY | unix.O_LARGEFILE | unix.O_CLOEXEC
160
	return fanotify.Initialize(fanotifyFlags, openFlags)
161
}
162

163
// Supported detects if RuncNotifier is supported in the current environment
164
func Supported() bool {
165
	notifier, err := NewContainerNotifier(func(notif ContainerEvent) {})
166
	if notifier != nil {
167
		notifier.Close()
168
	}
169
	if err != nil {
170
		log.Warnf("ContainerNotifier: not supported: %s", err)
171
	}
172
	return err == nil
173
}
174

175
// NewContainerNotifier uses fanotify and ebpf to detect when a container is
176
// created or terminated, and call the callback on such event.
177
//
178
// Limitations:
179
// - the container runtime must be installed in one of the paths listed by runtimePaths
180
func NewContainerNotifier(callback ContainerNotifyFunc) (*ContainerNotifier, error) {
181
	n := &ContainerNotifier{
182
		callback:         callback,
183
		containers:       make(map[string]*watchedContainer),
184
		futureContainers: make(map[string]*futureContainer),
185
		done:             make(chan bool),
186
	}
187

188
	if err := n.install(); err != nil {
189
		n.Close()
190
		return nil, err
191
	}
192

193
	return n, nil
194
}
195

196
func (n *ContainerNotifier) installEbpf(fanotifyFd int) error {
197
	spec, err := loadExecruntime()
198
	if err != nil {
199
		return fmt.Errorf("load ebpf program for container-hook: %w", err)
200
	}
201

202
	fanotifyPrivateData, err := readPrivateDataFromFd(fanotifyFd)
203
	if err != nil {
204
		return fmt.Errorf("readPrivateDataFromFd: %w", err)
205
	}
206

207
	consts := map[string]interface{}{
208
		"tracer_group": fanotifyPrivateData,
209
	}
210
	if err := spec.RewriteConstants(consts); err != nil {
211
		return fmt.Errorf("RewriteConstants: %w", err)
212
	}
213

214
	opts := ebpf.CollectionOptions{
215
		Programs: ebpf.ProgramOptions{
216
			KernelTypes: btfgen.GetBTFSpec(),
217
		},
218
	}
219

220
	if err := spec.LoadAndAssign(&n.objs, &opts); err != nil {
221
		return fmt.Errorf("loading maps and programs: %w", err)
222
	}
223

224
	// Attach ebpf programs
225
	l, err := link.Kprobe("fsnotify_remove_first_event", n.objs.IgFaPickE, nil)
226
	if err != nil {
227
		return fmt.Errorf("attaching kprobe fsnotify_remove_first_event: %w", err)
228
	}
229
	n.links = append(n.links, l)
230

231
	l, err = link.Kretprobe("fsnotify_remove_first_event", n.objs.IgFaPickX, nil)
232
	if err != nil {
233
		return fmt.Errorf("attaching kretprobe fsnotify_remove_first_event: %w", err)
234
	}
235
	n.links = append(n.links, l)
236

237
	l, err = link.Tracepoint("syscalls", "sys_enter_execve", n.objs.IgExecveE, nil)
238
	if err != nil {
239
		return fmt.Errorf("attaching tracepoint: %w", err)
240
	}
241
	n.links = append(n.links, l)
242

243
	l, err = link.Tracepoint("syscalls", "sys_exit_execve", n.objs.IgExecveX, nil)
244
	if err != nil {
245
		return fmt.Errorf("attaching tracepoint: %w", err)
246
	}
247
	n.links = append(n.links, l)
248

249
	return nil
250
}
251

252
func (n *ContainerNotifier) install() error {
253
	// Start fanotify
254
	runtimeBinaryNotify, err := initFanotify()
255
	if err != nil {
256
		return err
257
	}
258
	n.runtimeBinaryNotify = runtimeBinaryNotify
259

260
	// Load, initialize and attach ebpf program
261
	err = n.installEbpf(runtimeBinaryNotify.Fd)
262
	if err != nil {
263
		return err
264
	}
265

266
	// Attach fanotify to various runtime binaries
267
	runtimeFound := false
268

269
	runtimePath := os.Getenv("RUNTIME_PATH")
270
	if runtimePath != "" {
271
		log.Debugf("container-hook: trying runtime from RUNTIME_PATH env variable at %s", runtimePath)
272

273
		if _, err := os.Stat(runtimePath); errors.Is(err, os.ErrNotExist) {
274
			return err
275
		}
276

277
		if err := runtimeBinaryNotify.Mark(unix.FAN_MARK_ADD, unix.FAN_OPEN_EXEC_PERM, unix.AT_FDCWD, runtimePath); err != nil {
278
			return fmt.Errorf("fanotify marking of %s: %w", runtimePath, err)
279
		}
280
		runtimeFound = true
281
	} else {
282
		for _, r := range runtimePaths {
283
			runtimePath := filepath.Join(host.HostRoot, r)
284

285
			log.Debugf("container-hook: trying runtime at %s", runtimePath)
286

287
			if _, err := os.Stat(runtimePath); errors.Is(err, os.ErrNotExist) {
288
				log.Debugf("container-hook: runc at %s not found", runtimePath)
289
				continue
290
			}
291

292
			if err := runtimeBinaryNotify.Mark(unix.FAN_MARK_ADD, unix.FAN_OPEN_EXEC_PERM, unix.AT_FDCWD, runtimePath); err != nil {
293
				log.Warnf("container-hook: failed to fanotify mark: %s", err)
294
				continue
295
			}
296
			runtimeFound = true
297
		}
298
	}
299

300
	if !runtimeFound {
301
		runtimeBinaryNotify.File.Close()
302
		return fmt.Errorf("no container runtime can be monitored with fanotify. The following paths were tested: %s. You can use the RUNTIME_PATH env variable to specify a custom path. If you are successful doing so, please open a PR to add your custom path to runtimePaths", strings.Join(runtimePaths, ","))
303
	}
304

305
	n.wg.Add(2)
306
	go n.watchContainersTermination()
307
	go n.watchRuntimeBinary()
308

309
	return nil
310
}
311

312
// AddWatchContainerTermination watches a container for termination and
313
// generates an event on the notifier. This is automatically called for new
314
// containers detected by ContainerNotifier, but it can also be called for
315
// containers detected externally such as initial containers.
316
func (n *ContainerNotifier) AddWatchContainerTermination(containerID string, containerPID int) error {
317
	n.containersMu.Lock()
318
	defer n.containersMu.Unlock()
319

320
	if _, ok := n.containers[containerID]; ok {
321
		// This container is already being watched for termination
322
		return nil
323
	}
324

325
	n.containers[containerID] = &watchedContainer{
326
		id:  containerID,
327
		pid: containerPID,
328
	}
329

330
	return nil
331
}
332

333
// watchContainerTermination waits until the container terminates
334
func (n *ContainerNotifier) watchContainersTermination() {
335
	defer n.wg.Done()
336

337
	ticker := time.NewTicker(time.Second)
338
	defer ticker.Stop()
339

340
	for {
341
		select {
342
		case <-n.done:
343
			return
344
		case <-ticker.C:
345
			if n.closed.Load() {
346
				return
347
			}
348

349
			dirEntries, err := os.ReadDir(host.HostProcFs)
350
			if err != nil {
351
				log.Errorf("reading /proc: %s", err)
352
				return
353
			}
354
			pids := make(map[int]bool)
355
			for _, entry := range dirEntries {
356
				pid, err := strconv.Atoi(entry.Name())
357
				if err != nil {
358
					// entry is not a process directory. Ignore.
359
					continue
360
				}
361
				pids[pid] = true
362
			}
363

364
			n.containersMu.Lock()
365
			for _, c := range n.containers {
366
				if pids[c.pid] {
367
					// container still running
368
					continue
369
				}
370

371
				go n.callback(ContainerEvent{
372
					Type:         EventTypeRemoveContainer,
373
					ContainerID:  c.id,
374
					ContainerPID: uint32(c.pid),
375
				})
376

377
				delete(n.containers, c.id)
378
			}
379
			n.containersMu.Unlock()
380
		}
381
	}
382
}
383

384
func (n *ContainerNotifier) watchPidFileIterate(
385
	pidFileDirNotify *fanotify.NotifyFD,
386
	bundleDir string,
387
	configJSONPath string,
388
	pidFile string,
389
	pidFileDir string,
390
) (bool, error) {
391
	// Get the next event from fanotify.
392
	// Even though the API allows to pass skipPIDs, we cannot use
393
	// it here because ResponseAllow would not be called.
394
	data, err := pidFileDirNotify.GetEvent()
395
	if err != nil {
396
		return false, fmt.Errorf("%w", err)
397
	}
398

399
	// data can be nil if the event received is from a process in skipPIDs.
400
	// In that case, skip and get the next event.
401
	if data == nil {
402
		return false, nil
403
	}
404

405
	// Don't leak the fd received by GetEvent
406
	defer data.Close()
407
	dataFile := data.File()
408
	defer dataFile.Close()
409

410
	if !data.MatchMask(unix.FAN_ACCESS_PERM) {
411
		// This should not happen: FAN_ACCESS_PERM is the only mask Marked
412
		return false, fmt.Errorf("fanotify: unknown event on runc: mask=%d pid=%d", data.Mask, data.Pid)
413
	}
414

415
	// This unblocks whoever is accessing the pidfile
416
	defer pidFileDirNotify.ResponseAllow(data)
417

418
	path, err := data.GetPath()
419
	if err != nil {
420
		return false, err
421
	}
422
	path = filepath.Join(host.HostRoot, path)
423

424
	// Consider files identical if they have the same device/inode,
425
	// even if the paths differ due to symlinks (for example,
426
	// the event's path is /run/... but the runc --pid-file argument
427
	// uses /var/run/..., where /var/run is a symlink to /run).
428
	filesAreIdentical, err := checkFilesAreIdentical(path, pidFile)
429
	if err != nil {
430
		return false, err
431
	} else if !filesAreIdentical {
432
		return false, nil
433
	}
434

435
	pidFileContent, err := io.ReadAll(dataFile)
436
	if err != nil {
437
		return false, err
438
	}
439
	if len(pidFileContent) == 0 {
440
		return false, fmt.Errorf("empty pid file")
441
	}
442
	containerPID, err := strconv.Atoi(string(pidFileContent))
443
	if err != nil {
444
		return false, err
445
	}
446

447
	// Unfortunately, Linux 5.4 doesn't respect ignore masks
448
	// See fix in Linux 5.9:
449
	// https://github.com/torvalds/linux/commit/497b0c5a7c0688c1b100a9c2e267337f677c198e
450
	// Workaround: remove parent mask. We don't need it anymore :)
451
	err = pidFileDirNotify.Mark(unix.FAN_MARK_REMOVE, unix.FAN_ACCESS_PERM|unix.FAN_EVENT_ON_CHILD, unix.AT_FDCWD, pidFileDir)
452
	if err != nil {
453
		return false, nil
454
	}
455

456
	bundleConfigJSON, err := os.ReadFile(configJSONPath)
457
	if err != nil {
458
		return false, err
459
	}
460
	containerConfig := &ocispec.Spec{}
461
	err = json.Unmarshal(bundleConfigJSON, containerConfig)
462
	if err != nil {
463
		return false, err
464
	}
465

466
	// cri-o appends userdata to bundleDir,
467
	// so we trim it here to get the correct containerID
468
	containerID := filepath.Base(filepath.Clean(strings.TrimSuffix(bundleDir, "userdata")))
469

470
	err = n.AddWatchContainerTermination(containerID, containerPID)
471
	if err != nil {
472
		log.Errorf("container %s with pid %d terminated before we could watch it: %s", containerID, containerPID, err)
473
		return true, nil
474
	}
475

476
	if containerPID > math.MaxUint32 {
477
		log.Errorf("Container PID (%d) exceeds math.MaxUint32 (%d)", containerPID, math.MaxUint32)
478
		return true, nil
479
	}
480

481
	var containerName string
482
	n.futureMu.Lock()
483
	fc, ok := n.futureContainers[containerID]
484
	if ok {
485
		containerName = fc.name
486
	}
487
	delete(n.futureContainers, containerID)
488
	n.futureMu.Unlock()
489

490
	n.callback(ContainerEvent{
491
		Type:            EventTypeAddContainer,
492
		ContainerID:     containerID,
493
		ContainerPID:    uint32(containerPID),
494
		ContainerConfig: containerConfig,
495
		Bundle:          bundleDir,
496
		ContainerName:   containerName,
497
	})
498

499
	return true, nil
500
}
501

502
func checkFilesAreIdentical(path1, path2 string) (bool, error) {
503
	// Since fanotify masks don't work on Linux 5.4, we could get a
504
	// notification for an unrelated file before the pid file is created
505
	// See fix in Linux 5.9:
506
	// https://github.com/torvalds/linux/commit/497b0c5a7c0688c1b100a9c2e267337f677c198e
507
	// In this case we should not return an error.
508
	if filepath.Base(path1) != filepath.Base(path2) {
509
		return false, nil
510
	}
511

512
	f1, err := os.Stat(path1)
513
	if err != nil {
514
		return false, err
515
	}
516

517
	f2, err := os.Stat(path2)
518
	if err != nil {
519
		return false, err
520
	}
521

522
	return os.SameFile(f1, f2), nil
523
}
524

525
func (n *ContainerNotifier) monitorRuntimeInstance(bundleDir string, pidFile string) error {
526
	fanotifyFlags := uint(unix.FAN_CLOEXEC | unix.FAN_CLASS_CONTENT | unix.FAN_UNLIMITED_QUEUE | unix.FAN_UNLIMITED_MARKS)
527
	openFlags := os.O_RDONLY | unix.O_LARGEFILE | unix.O_CLOEXEC
528

529
	pidFileDirNotify, err := fanotify.Initialize(fanotifyFlags, openFlags)
530
	if err != nil {
531
		return err
532
	}
533

534
	// The pidfile does not exist yet, so we cannot monitor it directly.
535
	// Instead we monitor its parent directory with FAN_EVENT_ON_CHILD to
536
	// get events on the directory's children.
537
	pidFileDir := filepath.Dir(pidFile)
538
	err = pidFileDirNotify.Mark(unix.FAN_MARK_ADD, unix.FAN_ACCESS_PERM|unix.FAN_EVENT_ON_CHILD, unix.AT_FDCWD, pidFileDir)
539
	if err != nil {
540
		pidFileDirNotify.File.Close()
541
		return fmt.Errorf("marking %s: %w", pidFileDir, err)
542
	}
543

544
	// watchPidFileIterate() will read config.json and it might be in the
545
	// same directory as the pid file. To avoid getting events unrelated to
546
	// the pidfile, add an ignore mask.
547
	//
548
	// This is best effort because the ignore mask is unfortunately not
549
	// respected until a fix in Linux 5.9:
550
	// https://github.com/torvalds/linux/commit/497b0c5a7c0688c1b100a9c2e267337f677c198e
551
	configJSONPath := filepath.Join(bundleDir, "config.json")
552
	if _, err := os.Stat(configJSONPath); errors.Is(err, os.ErrNotExist) {
553
		// podman might install config.json in the userdata directory
554
		configJSONPath = filepath.Join(bundleDir, "userdata", "config.json")
555
		if _, err := os.Stat(configJSONPath); errors.Is(err, os.ErrNotExist) {
556
			pidFileDirNotify.File.Close()
557
			return fmt.Errorf("config not found at %s", configJSONPath)
558
		}
559
	}
560
	err = pidFileDirNotify.Mark(unix.FAN_MARK_ADD|unix.FAN_MARK_IGNORED_MASK, unix.FAN_ACCESS_PERM, unix.AT_FDCWD, configJSONPath)
561
	if err != nil {
562
		pidFileDirNotify.File.Close()
563
		return fmt.Errorf("marking %s: %w", configJSONPath, err)
564
	}
565

566
	// similar to config.json, we ignore passwd file if it exists
567
	passwdPath := filepath.Join(bundleDir, "passwd")
568
	if _, err := os.Stat(passwdPath); !errors.Is(err, os.ErrNotExist) {
569
		err = pidFileDirNotify.Mark(unix.FAN_MARK_ADD|unix.FAN_MARK_IGNORED_MASK, unix.FAN_ACCESS_PERM, unix.AT_FDCWD, passwdPath)
570
		if err != nil {
571
			pidFileDirNotify.File.Close()
572
			return fmt.Errorf("marking passwd path: %w", err)
573
		}
574
	}
575

576
	n.wg.Add(1)
577
	go func() {
578
		defer n.wg.Done()
579
		defer pidFileDirNotify.File.Close()
580
		for {
581
			stop, err := n.watchPidFileIterate(pidFileDirNotify, bundleDir, configJSONPath, pidFile, pidFileDir)
582
			if n.closed.Load() {
583
				return
584
			}
585
			if err != nil {
586
				log.Warnf("error watching pid: %v\n", err)
587
				return
588
			}
589
			if stop {
590
				return
591
			}
592
		}
593
	}()
594

595
	return nil
596
}
597

598
func (n *ContainerNotifier) watchRuntimeBinary() {
599
	defer n.wg.Done()
600

601
	for {
602
		stop, err := n.watchRuntimeIterate()
603
		if n.closed.Load() {
604
			n.runtimeBinaryNotify.File.Close()
605
			return
606
		}
607
		if err != nil {
608
			log.Errorf("error watching runtime binary: %v\n", err)
609
		}
610
		if stop {
611
			n.runtimeBinaryNotify.File.Close()
612
			return
613
		}
614
	}
615
}
616

617
func (n *ContainerNotifier) parseConmonCmdline(cmdlineArr []string) {
618
	containerName := ""
619
	containerID := ""
620
	bundleDir := ""
621
	pidFile := ""
622

623
	for i := 0; i < len(cmdlineArr); i++ {
624
		verb := cmdlineArr[i]
625
		arg := ""
626
		if i+1 < len(cmdlineArr) {
627
			arg = cmdlineArr[i+1]
628
		}
629
		switch verb {
630
		case "-n", "--name":
631
			containerName = arg
632
			i++
633
		case "-c", "--cid":
634
			containerID = arg
635
			i++
636
		case "-b", "--bundle":
637
			bundleDir = arg
638
			i++
639
		case "-p", "--container-pidfile":
640
			pidFile = arg
641
			i++
642
		}
643
	}
644

645
	if containerName == "" || containerID == "" || bundleDir == "" || pidFile == "" {
646
		return
647
	}
648

649
	n.futureMu.Lock()
650
	n.futureContainers[containerID] = &futureContainer{
651
		id:        containerID,
652
		pidFile:   pidFile,
653
		bundleDir: bundleDir,
654
		name:      containerName,
655
	}
656
	n.futureMu.Unlock()
657
}
658

659
func (n *ContainerNotifier) parseOCIRuntime(comm string, cmdlineArr []string) {
660
	// Parse oci-runtime (runc/crun) command line
661
	createFound := false
662
	bundleDir := ""
663
	pidFile := ""
664

665
	for i := 0; i < len(cmdlineArr); i++ {
666
		if cmdlineArr[i] == "create" {
667
			createFound = true
668
			continue
669
		}
670
		if cmdlineArr[i] == "--bundle" && i+1 < len(cmdlineArr) {
671
			i++
672
			bundleDir = filepath.Join(host.HostRoot, cmdlineArr[i])
673
			continue
674
		}
675
		if cmdlineArr[i] == "--pid-file" && i+1 < len(cmdlineArr) {
676
			i++
677
			pidFile = filepath.Join(host.HostRoot, cmdlineArr[i])
678
			continue
679
		}
680
	}
681

682
	if createFound && bundleDir != "" && pidFile != "" {
683
		err := n.monitorRuntimeInstance(bundleDir, pidFile)
684
		if err != nil {
685
			log.Errorf("error monitoring runtime instance: %v\n", err)
686
		}
687
	}
688
}
689

690
func (n *ContainerNotifier) watchRuntimeIterate() (bool, error) {
691
	// Get the next event from fanotify.
692
	// Even though the API allows to pass skipPIDs, we cannot use it here
693
	// because ResponseAllow would not be called.
694
	data, err := n.runtimeBinaryNotify.GetEvent()
695
	if err != nil {
696
		return true, err
697
	}
698

699
	// data can be nil if the event received is from a process in skipPIDs.
700
	// In that case, skip and get the next event.
701
	if data == nil {
702
		return false, nil
703
	}
704

705
	// Don't leak the fd received by GetEvent
706
	defer data.Close()
707

708
	if !data.MatchMask(unix.FAN_OPEN_EXEC_PERM) {
709
		// This should not happen: FAN_OPEN_EXEC_PERM is the only mask Marked
710
		return false, fmt.Errorf("fanotify: unknown event on runc: mask=%d pid=%d", data.Mask, data.Pid)
711
	}
712

713
	// This unblocks the execution
714
	defer n.runtimeBinaryNotify.ResponseAllow(data)
715

716
	// Lookup entry in ebpf map ig_fa_records
717
	var record execruntimeRecord
718
	err = n.objs.IgFaRecords.LookupAndDelete(nil, &record)
719
	if err != nil {
720
		return false, fmt.Errorf("lookup record: %w", err)
721
	}
722

723
	// Skip empty record
724
	// This can happen when the ebpf code didn't find the exec args
725
	if record.Pid == 0 {
726
		log.Debugf("skip event with pid=0")
727
		return false, nil
728
	}
729
	if record.ArgsSize == 0 {
730
		log.Debugf("skip event without args")
731
		return false, nil
732
	}
733

734
	callerComm := strings.TrimRight(string(record.CallerComm[:]), "\x00")
735

736
	cmdlineArr := []string{}
737
	calleeComm := ""
738
	for _, arg := range strings.Split(string(record.Args[0:record.ArgsSize]), "\x00") {
739
		if arg != "" {
740
			cmdlineArr = append(cmdlineArr, arg)
741
		}
742
	}
743
	if len(cmdlineArr) == 0 {
744
		log.Debugf("cannot get cmdline for pid %d", record.Pid)
745
		return false, nil
746
	}
747
	if len(cmdlineArr) > 0 {
748
		calleeComm = filepath.Base(cmdlineArr[0])
749
	}
750

751
	log.Debugf("got event with pid=%d caller=%q callee=%q args=%v",
752
		record.Pid,
753
		callerComm, calleeComm,
754
		cmdlineArr)
755

756
	// runc is executing itself with unix.Exec(), so fanotify receives two
757
	// FAN_OPEN_EXEC_PERM events:
758
	//   1. from containerd-shim (or similar)
759
	//   2. from runc, by this re-execution.
760
	// This filter takes the first one.
761

762
	switch calleeComm {
763
	case "conmon":
764
		// Calling sequence: crio/podman -> conmon -> runc/crun
765
		n.parseConmonCmdline(cmdlineArr)
766
	case "runc", "crun":
767
		n.parseOCIRuntime(calleeComm, cmdlineArr)
768
	default:
769
		return false, nil
770
	}
771

772
	return false, nil
773
}
774

775
func (n *ContainerNotifier) Close() {
776
	n.closed.Store(true)
777
	close(n.done)
778
	if n.runtimeBinaryNotify != nil {
779
		n.runtimeBinaryNotify.File.Close()
780
	}
781
	n.wg.Wait()
782

783
	for _, l := range n.links {
784
		gadgets.CloseLink(l)
785
	}
786
	n.links = nil
787
	n.objs.Close()
788
}
789

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.