tetragon
1// SPDX-License-Identifier: Apache-2.0
2// Copyright Authors of Tetragon
3
4package caps5
6import (7"fmt"8"os"9"path/filepath"10"strconv"11"strings"12"sync"13
14"github.com/cilium/tetragon/api/v1/tetragon"15"github.com/cilium/tetragon/pkg/api/processapi"16"github.com/cilium/tetragon/pkg/logger"17"github.com/cilium/tetragon/pkg/option"18"github.com/cilium/tetragon/pkg/reader/namespace"19"golang.org/x/sys/unix"20)
21
22var (23// Set default last capability based on upstream unix go library24cap_last_cap = int32(unix.CAP_LAST_CAP)25lastCapOnce sync.Once26)
27
28// GetLastCap() Returns unix.CAP_LAST_CAP unless the kernel
29// defines another last cap which is the case for old kernels.
30func GetLastCap() int32 {31lastCapOnce.Do(func() {32d, err := os.ReadFile(filepath.Join(option.Config.ProcFS, "/sys/kernel/cap_last_cap"))33if err != nil {34logger.GetLogger().WithError(err).Warnf("Could not read kernel cap_last_cap, using default '%d' as cap_last_cap", cap_last_cap)35}36val, err := strconv.ParseInt(strings.TrimRight(string(d), "\n"), 10, 32)37if err != nil {38logger.GetLogger().WithError(err).Warnf("Could not parse cap_last_cap, using default '%d' as cap_last_cap", cap_last_cap)39return40}41// just silence some CodeQL42if val >= 0 && val < unix.CAP_LAST_CAP {43cap_last_cap = int32(val)44}45})46return cap_last_cap47}
48
49func isCapValid(capInt int32) bool {50if capInt >= 0 && capInt <= unix.CAP_LAST_CAP {51return true52}53
54return false55}
56
57// AreSubset() Checks if "a" is a subset of "set"
58// Rerturns true if all "a" capabilities are also in "set", otherwise
59// false.
60func AreSubset(a uint64, set uint64) bool {61return (!((a & ^uint64(set)) != 0))62}
63
64// capToMask() returns the mask of the corresponding u32
65func capToMask(cap int32) uint32 {66return uint32(1 << ((cap) & 31))67}
68
69// GetCapsFullSet() Returns up to date (go unix library) full set.
70func GetCapsFullSet() uint64 {71// Get last u32 bits72caps := uint64(capToMask(GetLastCap()+1)-1) << 3273// Get first u32 bits74caps |= uint64(^uint32(0))75
76return caps77}
78
79func GetCapability(capInt int32) (string, error) {80if !isCapValid(capInt) {81return "", fmt.Errorf("invalid capability value %d", capInt)82}83
84str, ok := capabilitiesString[uint64(capInt)]85if !ok {86return "", fmt.Errorf("could not map capability value %d", capInt)87}88
89return str, nil90}
91
92func GetCapabilities(capInt uint64) string {93var caps []string94for i := uint64(0); i < 64; i++ {95if (1<<i)&capInt != 0 {96caps = append(caps, capabilitiesString[i])97}98}99return strings.Join(caps, " ")100}
101
102func GetCapabilitiesHex(capInt uint64) string {103return fmt.Sprintf("%016x", capInt)104}
105
106/* uapi/linux/capability.h */
107var capabilitiesString = map[uint64]string{108/* In a system with the [_POSIX_CHOWN_RESTRICTED] option defined, this109overrides the restriction of changing file ownership and group
110ownership. */
1110: "CAP_CHOWN",112/* Override all DAC access, including ACL execute access if113[_POSIX_ACL] is defined. Excluding DAC access covered by
114CAP_LINUX_IMMUTABLE. */
115
1161: "DAC_OVERRIDE",117
118/* Overrides all DAC restrictions regarding read and search on files119and directories, including ACL restrictions if [_POSIX_ACL] is
120defined. Excluding DAC access covered by "$1"_LINUX_IMMUTABLE. */
1212: "CAP_DAC_READ_SEARCH",122
123/* Overrides all restrictions about allowed operations on files, where124file owner ID must be equal to the user ID, except where CAP_FSETID
125is applicable. It doesn't override MAC and DAC restrictions. */
126
1273: "CAP_FOWNER",128
129/* Overrides the following restrictions that the effective user ID130shall match the file owner ID when setting the S_ISUID and S_ISGID
131bits on that file; that the effective group ID (or one of the
132supplementary group IDs) shall match the file owner ID when setting
133the S_ISGID bit on that file; that the S_ISUID and S_ISGID bits are
134cleared on successful return from chown(2) (not implemented). */
135
1364: "CAP_FSETID",137
138/* Overrides the restriction that the real or effective user ID of a139process sending a signal must match the real or effective user ID
140of the process receiving the signal. */
141
1425: "CAP_KILL",143
144/* Allows setgid(2) manipulation */145/* Allows setgroups(2) */146/* Allows forged gids on socket credentials passing. */147
1486: "CAP_SETGID",149
150/* Allows set*uid(2) manipulation (including fsuid). */151/* Allows forged pids on socket credentials passing. */152
1537: "CAP_SETUID",154
155/**156** Linux-specific capabilities
157**/
158
159/* Without VFS support for capabilities:160* Transfer any capability in your permitted set to any pid,
161* remove any capability in your permitted set from any pid
162* With VFS support for capabilities (neither of above, but)
163* Add any capability from current's capability bounding set
164* to the current process' inheritable set
165* Allow taking bits out of capability bounding set
166* Allow modification of the securebits for a process
167*/
168
1698: "CAP_SETPCAP",170
171/* Allow modification of S_IMMUTABLE and S_APPEND file attributes */172
1739: "CAP_LINUX_IMMUTABLE",174
175/* Allows binding to TCP/UDP sockets below 1024 */176/* Allows binding to ATM VCIs below 32 */177
17810: "CAP_NET_BIND_SERVICE",179
180/* Allow broadcasting, listen to multicast */181
18211: "CAP_NET_BROADCAST",183
184/* Allow interface configuration */185/* Allow administration of IP firewall, masquerading and accounting */186/* Allow setting debug option on sockets */187/* Allow modification of routing tables */188/* Allow setting arbitrary process / process group ownership on189sockets */
190/* Allow binding to any address for transparent proxying (also via NET_RAW) */191/* Allow setting TOS (type of service) */192/* Allow setting promiscuous mode */193/* Allow clearing driver statistics */194/* Allow multicasting */195/* Allow read/write of device-specific registers */196/* Allow activation of ATM control sockets */197
19812: "CAP_NET_ADMIN",199
200/* Allow use of RAW sockets */201/* Allow use of PACKET sockets */202/* Allow binding to any address for transparent proxying (also via NET_ADMIN) */203
20413: "CAP_NET_RAW",205
206/* Allow locking of shared memory segments */207/* Allow mlock and mlockall (which doesn't really have anything to do208with IPC) */
209
21014: "CAP_IPC_LOCK",211
212/* Override IPC ownership checks */213
21415: "CAP_IPC_OWNER",215
216/* Insert and remove kernel modules - modify kernel without limit */21716: "CAP_SYS_MODULE",218
219/* Allow ioperm/iopl access */220/* Allow sending USB messages to any device via /dev/bus/usb */221
22217: "CAP_SYS_RAWIO",223
224/* Allow use of chroot() */225
22618: "CAP_SYS_CHROOT",227
228/* Allow ptrace() of any process */229
23019: "CAP_SYS_PTRACE",231/* Allow configuration of process accounting */232
23320: "CAP_SYS_PACCT",234
235/* Allow configuration of the secure attention key */236/* Allow administration of the random device */237/* Allow examination and configuration of disk quotas */238/* Allow setting the domainname */239/* Allow setting the hostname */240/* Allow calling bdflush() */241/* Allow mount() and umount(), setting up new smb connection */242/* Allow some autofs root ioctls */243/* Allow nfsservctl */244/* Allow VM86_REQUEST_IRQ */245/* Allow to read/write pci config on alpha */246/* Allow irix_prctl on mips (setstacksize) */247/* Allow flushing all cache on m68k (sys_cacheflush) */248/* Allow removing semaphores */249/* Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores250and shared memory */
251/* Allow locking/unlocking of shared memory segment */252/* Allow turning swap on/off */253/* Allow forged pids on socket credentials passing */254/* Allow setting readahead and flushing buffers on block devices */255/* Allow setting geometry in floppy driver */256/* Allow turning DMA on/off in xd driver */257/* Allow administration of md devices (mostly the above, but some258extra ioctls) */
259/* Allow tuning the ide driver */260/* Allow access to the nvram device */261/* Allow administration of apm_bios, serial and bttv (TV) device */262/* Allow manufacturer commands in isdn CAPI support driver */263/* Allow reading non-standardized portions of pci configuration space */264/* Allow DDI debug ioctl on sbpcd driver */265/* Allow setting up serial ports */266/* Allow sending raw qic-117 commands */267/* Allow enabling/disabling tagged queuing on SCSI controllers and sending268arbitrary SCSI commands */
269/* Allow setting encryption key on loopback filesystem */270/* Allow setting zone reclaim policy */271/* Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility */272
27321: "CAP_SYS_ADMIN",274
275/* Allow use of reboot() */276
27722: "CAP_SYS_BOOT",278
279/* Allow raising priority and setting priority on other (different280UID) processes */
281/* Allow use of FIFO and round-robin (realtime) scheduling on own282processes and setting the scheduling algorithm used by another
283process. */
284/* Allow setting cpu affinity on other processes */285
28623: "CAP_SYS_NICE",287
288/* Override resource limits. Set resource limits. */289/* Override quota limits. */290/* Override reserved space on ext2 filesystem */291/* Modify data journaling mode on ext3 filesystem (uses journaling292resources) */
293/* NOTE: ext2 honors fsuid when checking for resource overrides, so294you can override using fsuid too */
295/* Override size restrictions on IPC message queues */296/* Allow more than 64hz interrupts from the real-time clock */297/* Override max number of consoles on console allocation */298/* Override max number of keymaps */299/* Control memory reclaim behavior */300
30124: "CAP_SYS_RESOURCE",302
303/* Allow manipulation of system clock */304/* Allow irix_stime on mips */305/* Allow setting the real-time clock */306
30725: "CAP_SYS_TIME",308
309/* Allow configuration of tty devices */310/* Allow vhangup() of tty */311
31226: "CAP_SYS_TTY_CONFIG",313
314/* Allow the privileged aspects of mknod() */315
31627: "CAP_MKNOD",317
318/* Allow taking of leases on files */319
32028: "CAP_LEASE",321
322/* Allow writing the audit log via unicast netlink socket */323
32429: "CAP_AUDIT_WRITE",325
326/* Allow configuration of audit via unicast netlink socket */327
32830: "CAP_AUDIT_CONTROL",329
330/* Set or remove capabilities on files */331
33231: "CAP_SETFCAP",333
334/* Override MAC access.335The base kernel enforces no MAC policy.
336An LSM may enforce a MAC policy, and if it does and it chooses
337to implement capability based overrides of that policy, this is
338the capability it should use to do so. */
339
34032: "CAP_MAC_OVERRIDE",341
342/* Allow MAC configuration or state changes.343The base kernel requires no MAC configuration.
344An LSM may enforce a MAC policy, and if it does and it chooses
345to implement capability based checks on modifications to that
346policy or the data required to maintain it, this is the
347capability it should use to do so. */
348
34933: "CAP_MAC_ADMIN",350
351/* Allow configuring the kernel's syslog (printk behaviour) */352
35334: "CAP_SYSLOG",354
355/* Allow triggering something that will wake the system */356
35735: "CAP_WAKE_ALARM",358
359/* Allow preventing system suspends */360
36136: "CAP_BLOCK_SUSPEND",362
363/* Allow reading the audit log via multicast netlink socket */364
36537: "CAP_AUDIT_READ",366
367/*368* Allow system performance and observability privileged operations
369* using perf_events, i915_perf and other kernel subsystems
370*/
371
37238: "CAP_PERFMON",373
374/*375* CAP_BPF allows the following BPF operations:
376* - Creating all types of BPF maps
377* - Advanced verifier features
378* - Indirect variable access
379* - Bounded loops
380* - BPF to BPF function calls
381* - Scalar precision tracking
382* - Larger complexity limits
383* - Dead code elimination
384* - And potentially other features
385* - Loading BPF Type Format (BTF) data
386* - Retrieve xlated and JITed code of BPF programs
387* - Use bpf_spin_lock() helper
388*
389* CAP_PERFMON relaxes the verifier checks further:
390* - BPF progs can use of pointer-to-integer conversions
391* - speculation attack hardening measures are bypassed
392* - bpf_probe_read to read arbitrary kernel memory is allowed
393* - bpf_trace_printk to print kernel memory is allowed
394*
395* CAP_SYS_ADMIN is required to use bpf_probe_write_user.
396*
397* CAP_SYS_ADMIN is required to iterate system wide loaded
398* programs, maps, links, BTFs and convert their IDs to file descriptors.
399*
400* CAP_PERFMON and CAP_BPF are required to load tracing programs.
401* CAP_NET_ADMIN and CAP_BPF are required to load networking programs.
402*/
40339: "CAP_BPF",404
405/* Allow checkpoint/restore related operations */406/* Allow PID selection during clone3() */407/* Allow writing to ns_last_pid */408
40940: "CAP_CHECKPOINT_RESTORE",410}
411
412func GetPIDCaps(filename string) (uint32, uint64, uint64, uint64) {413pid := uint32(0)414permitted := uint64(0)415effective := uint64(0)416inheritable := uint64(0)417
418getValue64Hex := func(line string) (uint64, error) {419fields := strings.Fields(line)420if len(fields) < 2 {421return 0, fmt.Errorf("Fields to few arguments")422}423pidField := fields[len(fields)-1]424pid, err := strconv.ParseUint(pidField, 16, 64)425return pid, err426}427
428getValue32Int := func(line string) (uint32, error) {429fields := strings.Fields(line)430if len(fields) < 2 {431return 0, fmt.Errorf("Fields to few arguments")432}433pidField := fields[len(fields)-1]434pid, err := strconv.ParseUint(pidField, 10, 32)435return uint32(pid), err436}437
438file, err := os.ReadFile(filename)439if err != nil {440logger.GetLogger().WithError(err).Warnf("ReadFile failed: %s", filename)441return 0, 0, 0, 0442}443statuslines := strings.Split(string(file), "\n")444for _, line := range statuslines {445err = nil446if strings.Contains(line, "NStgid:") {447pid, err = getValue32Int(line)448}449if strings.Contains(line, "CapPrm:") {450permitted, err = getValue64Hex(line)451}452if strings.Contains(line, "CapEff:") {453effective, err = getValue64Hex(line)454}455if strings.Contains(line, "CapInh:") {456inheritable, err = getValue64Hex(line)457}458if err != nil {459logger.GetLogger().WithError(err).Warnf("ReadFile (%s) error: %s", line, filename)460}461}462return pid, permitted, effective, inheritable463}
464
465func GetCapabilitiesTypes(capInt uint64) []tetragon.CapabilitiesType {466var caps []tetragon.CapabilitiesType467for i := uint64(0); i < 64; i++ {468if (1<<i)&capInt != 0 {469e := tetragon.CapabilitiesType(i)470caps = append(caps, e)471}472}473return caps474}
475
476func GetMsgCapabilities(caps processapi.MsgCapabilities) *tetragon.Capabilities {477return &tetragon.Capabilities{478Permitted: GetCapabilitiesTypes(caps.Permitted),479Effective: GetCapabilitiesTypes(caps.Effective),480Inheritable: GetCapabilitiesTypes(caps.Inheritable),481}482}
483
484func GetCurrentCapabilities() *tetragon.Capabilities {485pidStr := strconv.Itoa(int(namespace.GetMyPidG()))486procCaps := filepath.Join(option.Config.ProcFS, pidStr, "status")487_, permitted, effective, inheritable := GetPIDCaps(procCaps)488
489return &tetragon.Capabilities{490Permitted: GetCapabilitiesTypes(permitted),491Effective: GetCapabilitiesTypes(effective),492Inheritable: GetCapabilitiesTypes(inheritable),493}494}
495
496func GetSecureBitsTypes(secBit uint32) []tetragon.SecureBitsType {497if secBit == 0 {498return nil499}500
501var bits []tetragon.SecureBitsType502
503if secBit&uint32(tetragon.SecureBitsType_SecBitNoRoot) != 0 {504bits = append(bits, tetragon.SecureBitsType_SecBitNoRoot)505}506
507if secBit&uint32(tetragon.SecureBitsType_SecBitNoRootLocked) != 0 {508bits = append(bits, tetragon.SecureBitsType_SecBitNoRootLocked)509}510
511if secBit&uint32(tetragon.SecureBitsType_SecBitNoSetUidFixup) != 0 {512bits = append(bits, tetragon.SecureBitsType_SecBitNoSetUidFixup)513}514
515if secBit&uint32(tetragon.SecureBitsType_SecBitNoSetUidFixupLocked) != 0 {516bits = append(bits, tetragon.SecureBitsType_SecBitNoSetUidFixupLocked)517}518
519if secBit&uint32(tetragon.SecureBitsType_SecBitKeepCaps) != 0 {520bits = append(bits, tetragon.SecureBitsType_SecBitKeepCaps)521}522
523if secBit&uint32(tetragon.SecureBitsType_SecBitKeepCapsLocked) != 0 {524bits = append(bits, tetragon.SecureBitsType_SecBitKeepCapsLocked)525}526
527if secBit&uint32(tetragon.SecureBitsType_SecBitNoCapAmbientRaise) != 0 {528bits = append(bits, tetragon.SecureBitsType_SecBitNoCapAmbientRaise)529}530
531if secBit&uint32(tetragon.SecureBitsType_SecBitNoCapAmbientRaiseLocked) != 0 {532bits = append(bits, tetragon.SecureBitsType_SecBitNoCapAmbientRaiseLocked)533}534
535return bits536}
537
538func GetPrivilegesChangedReasons(reasons uint32) []tetragon.ProcessPrivilegesChanged {539if reasons == 0 {540return nil541}542
543var bits []tetragon.ProcessPrivilegesChanged544
545if reasons&uint32(processapi.ExecveFileCaps) != 0 {546bits = append(bits, tetragon.ProcessPrivilegesChanged_PRIVILEGES_RAISED_EXEC_FILE_CAP)547}548
549if reasons&uint32(processapi.ExecveSetuidRoot) != 0 {550bits = append(bits, tetragon.ProcessPrivilegesChanged_PRIVILEGES_RAISED_EXEC_FILE_SETUID)551}552
553if reasons&uint32(processapi.ExecveSetgidRoot) != 0 {554bits = append(bits, tetragon.ProcessPrivilegesChanged_PRIVILEGES_RAISED_EXEC_FILE_SETGID)555}556
557if len(bits) > 0 {558return bits559}560
561return nil562}
563