podman

Форк
0
/
rootless_linux.go 
567 строк · 15.9 Кб
1
//go:build linux && cgo
2

3
package rootless
4

5
import (
6
	"bytes"
7
	"errors"
8
	"fmt"
9
	"os"
10
	"os/exec"
11
	gosignal "os/signal"
12
	"os/user"
13
	"runtime"
14
	"strconv"
15
	"strings"
16
	"sync"
17
	"unsafe"
18

19
	"github.com/containers/podman/v5/pkg/errorhandling"
20
	"github.com/containers/storage/pkg/idtools"
21
	pmount "github.com/containers/storage/pkg/mount"
22
	"github.com/containers/storage/pkg/unshare"
23
	"github.com/opencontainers/runtime-spec/specs-go"
24
	"github.com/sirupsen/logrus"
25
	"github.com/syndtr/gocapability/capability"
26
	"golang.org/x/sys/unix"
27
)
28

29
/*
30
#cgo remote CFLAGS: -Wall -Werror -DDISABLE_JOIN_SHORTCUT
31
#include <stdlib.h>
32
#include <sys/types.h>
33
extern uid_t rootless_uid();
34
extern uid_t rootless_gid();
35
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
36
extern int reexec_in_user_namespace_wait(int pid, int options);
37
extern int reexec_userns_join(int pid, char *pause_pid_file_path);
38
extern int is_fd_inherited(int fd);
39
*/
40
import "C"
41

42
const (
43
	numSig = 65 // max number of signals
44
)
45

46
func init() {
47
	rootlessUIDInit := int(C.rootless_uid())
48
	rootlessGIDInit := int(C.rootless_gid())
49
	if rootlessUIDInit != 0 {
50
		// we need this if we joined the user+mount namespace from the C code.
51
		if err := os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done"); err != nil {
52
			logrus.Errorf("Failed to set environment variable %s as %s", "_CONTAINERS_USERNS_CONFIGURED", "done")
53
		}
54
		if err := os.Setenv("_CONTAINERS_ROOTLESS_UID", strconv.Itoa(rootlessUIDInit)); err != nil {
55
			logrus.Errorf("Failed to set environment variable %s as %d", "_CONTAINERS_ROOTLESS_UID", rootlessUIDInit)
56
		}
57
		if err := os.Setenv("_CONTAINERS_ROOTLESS_GID", strconv.Itoa(rootlessGIDInit)); err != nil {
58
			logrus.Errorf("Failed to set environment variable %s as %d", "_CONTAINERS_ROOTLESS_GID", rootlessGIDInit)
59
		}
60
	}
61
}
62

63
func runInUser() error {
64
	return os.Setenv("_CONTAINERS_USERNS_CONFIGURED", "done")
65
}
66

67
var (
68
	isRootlessOnce sync.Once
69
	isRootless     bool
70
)
71

72
// IsRootless tells us if we are running in rootless mode
73
func IsRootless() bool {
74
	// unshare.IsRootless() is used to check if a user namespace is required.
75
	// Here we need to make sure that nested podman instances act
76
	// as if they have root privileges and pick paths on the host
77
	// that would normally be used for root.
78
	return unshare.IsRootless() && unshare.GetRootlessUID() > 0
79
}
80

81
// GetRootlessUID returns the UID of the user in the parent userNS
82
func GetRootlessUID() int {
83
	return unshare.GetRootlessUID()
84
}
85

86
// GetRootlessGID returns the GID of the user in the parent userNS
87
func GetRootlessGID() int {
88
	return unshare.GetRootlessGID()
89
}
90

91
func tryMappingTool(uid bool, pid int, hostID int, mappings []idtools.IDMap) error {
92
	var tool = "newuidmap"
93
	mode := os.ModeSetuid
94
	cap := capability.CAP_SETUID
95
	idtype := "setuid"
96
	if !uid {
97
		tool = "newgidmap"
98
		mode = os.ModeSetgid
99
		cap = capability.CAP_SETGID
100
		idtype = "setgid"
101
	}
102
	path, err := exec.LookPath(tool)
103
	if err != nil {
104
		return fmt.Errorf("command required for rootless mode with multiple IDs: %w", err)
105
	}
106

107
	appendTriplet := func(l []string, a, b, c int) []string {
108
		return append(l, strconv.Itoa(a), strconv.Itoa(b), strconv.Itoa(c))
109
	}
110

111
	args := []string{path, strconv.Itoa(pid)}
112
	args = appendTriplet(args, 0, hostID, 1)
113
	for _, i := range mappings {
114
		if hostID >= i.HostID && hostID < i.HostID+i.Size {
115
			what := "UID"
116
			where := "/etc/subuid"
117
			if !uid {
118
				what = "GID"
119
				where = "/etc/subgid"
120
			}
121
			return fmt.Errorf("invalid configuration: the specified mapping %d:%d in %q includes the user %s", i.HostID, i.Size, where, what)
122
		}
123
		args = appendTriplet(args, i.ContainerID+1, i.HostID, i.Size)
124
	}
125
	cmd := exec.Cmd{
126
		Path: path,
127
		Args: args,
128
	}
129

130
	if output, err := cmd.CombinedOutput(); err != nil {
131
		logrus.Errorf("running `%s`: %s", strings.Join(args, " "), output)
132
		errorStr := fmt.Sprintf("cannot set up namespace using %q", path)
133
		if isSet, err := unshare.IsSetID(cmd.Path, mode, cap); err != nil {
134
			logrus.Errorf("Failed to check for %s on %s: %v", idtype, path, err)
135
		} else if !isSet {
136
			errorStr = fmt.Sprintf("%s: should have %s or have filecaps %s", errorStr, idtype, idtype)
137
		}
138
		return fmt.Errorf("%v: %w", errorStr, err)
139
	}
140
	return nil
141
}
142

143
// joinUserAndMountNS re-exec podman in a new userNS and join the user and mount
144
// namespace of the specified PID without looking up its parent.  Useful to join directly
145
// the conmon process.
146
func joinUserAndMountNS(pid uint, pausePid string) (bool, int, error) {
147
	hasCapSysAdmin, err := unshare.HasCapSysAdmin()
148
	if err != nil {
149
		return false, 0, err
150
	}
151
	if (os.Geteuid() == 0 && hasCapSysAdmin) || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
152
		return false, 0, nil
153
	}
154

155
	cPausePid := C.CString(pausePid)
156
	defer C.free(unsafe.Pointer(cPausePid))
157

158
	pidC := C.reexec_userns_join(C.int(pid), cPausePid)
159
	if int(pidC) < 0 {
160
		return false, -1, fmt.Errorf("cannot re-exec process to join the existing user namespace")
161
	}
162

163
	return waitAndProxySignalsToChild(pidC)
164
}
165

166
// GetConfiguredMappings returns the additional IDs configured for the current user.
167
func GetConfiguredMappings(quiet bool) ([]idtools.IDMap, []idtools.IDMap, error) {
168
	var uids, gids []idtools.IDMap
169
	username := os.Getenv("USER")
170
	if username == "" {
171
		var id string
172
		if os.Geteuid() == 0 {
173
			id = strconv.Itoa(GetRootlessUID())
174
		} else {
175
			id = strconv.Itoa(os.Geteuid())
176
		}
177
		userID, err := user.LookupId(id)
178
		if err == nil {
179
			username = userID.Username
180
		}
181
	}
182
	mappings, err := idtools.NewIDMappings(username, username)
183
	if err != nil {
184
		logLevel := logrus.ErrorLevel
185
		if quiet || (os.Geteuid() == 0 && GetRootlessUID() == 0) {
186
			logLevel = logrus.DebugLevel
187
		}
188
		logrus.StandardLogger().Logf(logLevel, "cannot find UID/GID for user %s: %v - check rootless mode in man pages.", username, err)
189
	} else {
190
		uids = mappings.UIDs()
191
		gids = mappings.GIDs()
192
	}
193
	return uids, gids, nil
194
}
195

196
func copyMappings(from, to string) error {
197
	// when running as non-root always go through the newuidmap/newgidmap
198
	// configuration since this is the expectation when running on Kubernetes
199
	if os.Geteuid() != 0 {
200
		return errors.New("copying mappings is allowed only for root")
201
	}
202
	content, err := os.ReadFile(from)
203
	if err != nil {
204
		return err
205
	}
206
	// Both runc and crun check whether the current process is in a user namespace
207
	// by looking up 4294967295 in /proc/self/uid_map.  If the mappings would be
208
	// copied as they are, the check in the OCI runtimes would fail.  So just split
209
	// it in two different ranges.
210
	if bytes.Contains(content, []byte("4294967295")) {
211
		content = []byte("0 0 1\n1 1 4294967294\n")
212
	}
213
	return os.WriteFile(to, content, 0600)
214
}
215

216
func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) {
217
	hasCapSysAdmin, err := unshare.HasCapSysAdmin()
218
	if err != nil {
219
		return false, 0, err
220
	}
221

222
	if (os.Geteuid() == 0 && hasCapSysAdmin) || os.Getenv("_CONTAINERS_USERNS_CONFIGURED") != "" {
223
		if os.Getenv("_CONTAINERS_USERNS_CONFIGURED") == "init" {
224
			return false, 0, runInUser()
225
		}
226
		return false, 0, nil
227
	}
228

229
	if _, inContainer := os.LookupEnv("container"); !inContainer {
230
		if mounts, err := pmount.GetMounts(); err == nil {
231
			for _, m := range mounts {
232
				if m.Mountpoint == "/" {
233
					isShared := false
234
					for _, o := range strings.Split(m.Optional, ",") {
235
						if strings.HasPrefix(o, "shared:") {
236
							isShared = true
237
							break
238
						}
239
					}
240
					if !isShared {
241
						logrus.Warningf("%q is not a shared mount, this could cause issues or missing mounts with rootless containers", m.Mountpoint)
242
					}
243
					break
244
				}
245
			}
246
		}
247
	}
248

249
	cPausePid := C.CString(pausePid)
250
	defer C.free(unsafe.Pointer(cPausePid))
251

252
	cFileToRead := C.CString(fileToRead)
253
	defer C.free(unsafe.Pointer(cFileToRead))
254
	var fileOutputFD C.int
255
	if fileOutput != nil {
256
		fileOutputFD = C.int(fileOutput.Fd())
257
	}
258

259
	runtime.LockOSThread()
260
	defer runtime.UnlockOSThread()
261

262
	fds, err := unix.Socketpair(unix.AF_UNIX, unix.SOCK_DGRAM, 0)
263
	if err != nil {
264
		return false, -1, err
265
	}
266
	r, w := os.NewFile(uintptr(fds[0]), "sync host"), os.NewFile(uintptr(fds[1]), "sync child")
267

268
	var pid int
269

270
	defer errorhandling.CloseQuiet(r)
271
	defer errorhandling.CloseQuiet(w)
272
	defer func() {
273
		toWrite := []byte("0")
274
		if retErr != nil {
275
			toWrite = []byte("1")
276
		}
277
		if _, err := w.Write(toWrite); err != nil {
278
			logrus.Errorf("Failed to write byte 0: %q", err)
279
		}
280
		if retErr != nil && pid > 0 {
281
			if err := unix.Kill(pid, unix.SIGKILL); err != nil {
282
				if err != unix.ESRCH {
283
					logrus.Errorf("Failed to clean up process %d: %v", pid, err)
284
				}
285
			}
286
			C.reexec_in_user_namespace_wait(C.int(pid), 0)
287
		}
288
	}()
289

290
	pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
291
	pid = int(pidC)
292
	if pid < 0 {
293
		return false, -1, fmt.Errorf("cannot re-exec process")
294
	}
295

296
	uids, gids, err := GetConfiguredMappings(false)
297
	if err != nil {
298
		return false, -1, err
299
	}
300

301
	uidMap := fmt.Sprintf("/proc/%d/uid_map", pid)
302
	gidMap := fmt.Sprintf("/proc/%d/gid_map", pid)
303

304
	uidsMapped := false
305

306
	if err := copyMappings("/proc/self/uid_map", uidMap); err == nil {
307
		uidsMapped = true
308
	}
309

310
	if uids != nil && !uidsMapped {
311
		err := tryMappingTool(true, pid, os.Geteuid(), uids)
312
		// If some mappings were specified, do not ignore the error
313
		if err != nil && len(uids) > 0 {
314
			return false, -1, err
315
		}
316
		uidsMapped = err == nil
317
	}
318
	if !uidsMapped {
319
		logrus.Warnf("Using rootless single mapping into the namespace. This might break some images. Check /etc/subuid and /etc/subgid for adding sub*ids if not using a network user")
320
		setgroups := fmt.Sprintf("/proc/%d/setgroups", pid)
321
		err = os.WriteFile(setgroups, []byte("deny\n"), 0666)
322
		if err != nil {
323
			return false, -1, fmt.Errorf("cannot write setgroups file: %w", err)
324
		}
325
		logrus.Debugf("write setgroups file exited with 0")
326

327
		err = os.WriteFile(uidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Geteuid())), 0666)
328
		if err != nil {
329
			return false, -1, fmt.Errorf("cannot write uid_map: %w", err)
330
		}
331
		logrus.Debugf("write uid_map exited with 0")
332
	}
333

334
	gidsMapped := false
335
	if err := copyMappings("/proc/self/gid_map", gidMap); err == nil {
336
		gidsMapped = true
337
	}
338
	if gids != nil && !gidsMapped {
339
		err := tryMappingTool(false, pid, os.Getegid(), gids)
340
		// If some mappings were specified, do not ignore the error
341
		if err != nil && len(gids) > 0 {
342
			return false, -1, err
343
		}
344
		gidsMapped = err == nil
345
	}
346
	if !gidsMapped {
347
		err = os.WriteFile(gidMap, []byte(fmt.Sprintf("%d %d 1\n", 0, os.Getegid())), 0666)
348
		if err != nil {
349
			return false, -1, fmt.Errorf("cannot write gid_map: %w", err)
350
		}
351
	}
352

353
	_, err = w.WriteString("0")
354
	if err != nil {
355
		return false, -1, fmt.Errorf("write to sync pipe: %w", err)
356
	}
357

358
	b := make([]byte, 1)
359
	_, err = w.Read(b)
360
	if err != nil {
361
		return false, -1, fmt.Errorf("read from sync pipe: %w", err)
362
	}
363

364
	if fileOutput != nil {
365
		ret := C.reexec_in_user_namespace_wait(pidC, 0)
366
		if ret < 0 {
367
			return false, -1, errors.New("waiting for the re-exec process")
368
		}
369
		return true, 0, nil
370
	}
371

372
	if b[0] == '2' {
373
		// We have lost the race for writing the PID file, as probably another
374
		// process created a namespace and wrote the PID.
375
		// Try to join it.
376
		data, err := os.ReadFile(pausePid)
377
		if err == nil {
378
			var pid uint64
379
			pid, err = strconv.ParseUint(string(data), 10, 0)
380
			if err == nil {
381
				return joinUserAndMountNS(uint(pid), "")
382
			}
383
		}
384
		return false, -1, fmt.Errorf("setting up the process: %w", err)
385
	}
386

387
	if b[0] != '0' {
388
		return false, -1, errors.New("setting up the process")
389
	}
390

391
	return waitAndProxySignalsToChild(pidC)
392
}
393

394
func waitAndProxySignalsToChild(pid C.int) (bool, int, error) {
395
	signals := []os.Signal{}
396
	for sig := 0; sig < numSig; sig++ {
397
		if sig == int(unix.SIGTSTP) {
398
			continue
399
		}
400
		signals = append(signals, unix.Signal(sig))
401
	}
402

403
	// Disable all existing signal handlers, from now forward everything to the child and let
404
	// it deal with it. All we do is to wait and propagate the exit code from the child to our parent.
405
	gosignal.Reset()
406
	c := make(chan os.Signal, len(signals))
407
	gosignal.Notify(c, signals...)
408
	go func() {
409
		for s := range c {
410
			if s == unix.SIGCHLD || s == unix.SIGPIPE {
411
				continue
412
			}
413

414
			if err := unix.Kill(int(pid), s.(unix.Signal)); err != nil {
415
				if err != unix.ESRCH {
416
					logrus.Errorf("Failed to propagate signal to child process %d: %v", int(pid), err)
417
				}
418
			}
419
		}
420
	}()
421

422
	ret := C.reexec_in_user_namespace_wait(pid, 0)
423
	// child exited reset our signal proxy handler
424
	gosignal.Reset()
425
	if ret < 0 {
426
		return false, -1, errors.New("waiting for the re-exec process")
427
	}
428

429
	return true, int(ret), nil
430
}
431

432
// BecomeRootInUserNS re-exec podman in a new userNS.  It returns whether podman was re-executed
433
// into a new user namespace and the return code from the re-executed podman process.
434
// If podman was re-executed the caller needs to propagate the error code returned by the child
435
// process.
436
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
437
	return becomeRootInUserNS(pausePid, "", nil)
438
}
439

440
// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
441
// This is useful when there are already running containers and we
442
// don't have a pause process yet.  We can use the paths to the conmon
443
// processes to attempt joining their namespaces.
444
// If needNewNamespace is set, the file is read from a temporary user
445
// namespace, this is useful for containers that are running with a
446
// different uidmap and the unprivileged user has no way to read the
447
// file owned by the root in the container.
448
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
449
	var lastErr error
450
	var pausePid int
451

452
	for _, path := range paths {
453
		if !needNewNamespace {
454
			data, err := os.ReadFile(path)
455
			if err != nil {
456
				lastErr = err
457
				continue
458
			}
459

460
			pausePid, err = strconv.Atoi(string(data))
461
			if err != nil {
462
				lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
463
				continue
464
			}
465
		} else {
466
			r, w, err := os.Pipe()
467
			if err != nil {
468
				lastErr = err
469
				continue
470
			}
471

472
			defer errorhandling.CloseQuiet(r)
473

474
			if _, _, err := becomeRootInUserNS("", path, w); err != nil {
475
				w.Close()
476
				lastErr = err
477
				continue
478
			}
479

480
			if err := w.Close(); err != nil {
481
				return false, 0, err
482
			}
483
			defer func() {
484
				C.reexec_in_user_namespace_wait(-1, 0)
485
			}()
486

487
			b := make([]byte, 32)
488

489
			n, err := r.Read(b)
490
			if err != nil {
491
				lastErr = fmt.Errorf("cannot read %q: %w", path, err)
492
				continue
493
			}
494

495
			pausePid, err = strconv.Atoi(string(b[:n]))
496
			if err != nil {
497
				lastErr = err
498
				continue
499
			}
500
		}
501

502
		if pausePid > 0 && unix.Kill(pausePid, 0) == nil {
503
			joined, pid, err := joinUserAndMountNS(uint(pausePid), pausePidPath)
504
			if err == nil {
505
				return joined, pid, nil
506
			}
507
			lastErr = err
508
		}
509
	}
510
	if lastErr != nil {
511
		return false, 0, lastErr
512
	}
513
	return false, 0, fmt.Errorf("could not find any running process: %w", unix.ESRCH)
514
}
515

516
func matches(id int, configuredIDs []idtools.IDMap, currentIDs []specs.LinuxIDMapping) bool {
517
	// The first mapping is the host user, handle it separately.
518
	if currentIDs[0].HostID != uint32(id) || currentIDs[0].Size != 1 {
519
		return false
520
	}
521

522
	currentIDs = currentIDs[1:]
523
	if len(currentIDs) != len(configuredIDs) {
524
		return false
525
	}
526

527
	// It is fine to iterate sequentially as both slices are sorted.
528
	for i := range currentIDs {
529
		if currentIDs[i].HostID != uint32(configuredIDs[i].HostID) {
530
			return false
531
		}
532
		if currentIDs[i].Size != uint32(configuredIDs[i].Size) {
533
			return false
534
		}
535
	}
536

537
	return true
538
}
539

540
// ConfigurationMatches checks whether the additional uids/gids configured for the user
541
// match the current user namespace.
542
func ConfigurationMatches() (bool, error) {
543
	if !IsRootless() || os.Geteuid() != 0 {
544
		return true, nil
545
	}
546

547
	uids, gids, err := GetConfiguredMappings(false)
548
	if err != nil {
549
		return false, err
550
	}
551

552
	currentUIDs, currentGIDs, err := unshare.GetHostIDMappings("")
553
	if err != nil {
554
		return false, err
555
	}
556

557
	if !matches(GetRootlessUID(), uids, currentUIDs) {
558
		return false, err
559
	}
560

561
	return matches(GetRootlessGID(), gids, currentGIDs), nil
562
}
563

564
// IsFdInherited checks whether the fd is opened and valid to use
565
func IsFdInherited(fd int) bool {
566
	return int(C.is_fd_inherited(C.int(fd))) > 0
567
}
568

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.