podman

Форк
0
711 строк · 25.4 Кб
1
//go:build linux
2
// +build linux
3

4
package chroot
5

6
import (
7
	"errors"
8
	"fmt"
9
	"os"
10
	"os/exec"
11
	"path/filepath"
12
	"strings"
13
	"syscall"
14
	"time"
15

16
	"github.com/containers/buildah/copier"
17
	"github.com/containers/storage/pkg/mount"
18
	"github.com/containers/storage/pkg/unshare"
19
	"github.com/opencontainers/runc/libcontainer/apparmor"
20
	"github.com/opencontainers/runtime-spec/specs-go"
21
	"github.com/sirupsen/logrus"
22
	"github.com/syndtr/gocapability/capability"
23
	"golang.org/x/sys/unix"
24
)
25

26
var (
27
	rlimitsMap = map[string]int{
28
		"RLIMIT_AS":         unix.RLIMIT_AS,
29
		"RLIMIT_CORE":       unix.RLIMIT_CORE,
30
		"RLIMIT_CPU":        unix.RLIMIT_CPU,
31
		"RLIMIT_DATA":       unix.RLIMIT_DATA,
32
		"RLIMIT_FSIZE":      unix.RLIMIT_FSIZE,
33
		"RLIMIT_LOCKS":      unix.RLIMIT_LOCKS,
34
		"RLIMIT_MEMLOCK":    unix.RLIMIT_MEMLOCK,
35
		"RLIMIT_MSGQUEUE":   unix.RLIMIT_MSGQUEUE,
36
		"RLIMIT_NICE":       unix.RLIMIT_NICE,
37
		"RLIMIT_NOFILE":     unix.RLIMIT_NOFILE,
38
		"RLIMIT_NPROC":      unix.RLIMIT_NPROC,
39
		"RLIMIT_RSS":        unix.RLIMIT_RSS,
40
		"RLIMIT_RTPRIO":     unix.RLIMIT_RTPRIO,
41
		"RLIMIT_RTTIME":     unix.RLIMIT_RTTIME,
42
		"RLIMIT_SIGPENDING": unix.RLIMIT_SIGPENDING,
43
		"RLIMIT_STACK":      unix.RLIMIT_STACK,
44
	}
45
	rlimitsReverseMap = map[int]string{}
46
)
47

48
type runUsingChrootSubprocOptions struct {
49
	Spec        *specs.Spec
50
	BundlePath  string
51
	UIDMappings []syscall.SysProcIDMap
52
	GIDMappings []syscall.SysProcIDMap
53
}
54

55
func setPlatformUnshareOptions(spec *specs.Spec, cmd *unshare.Cmd) error {
56
	// If we have configured ID mappings, set them here so that they can apply to the child.
57
	hostUidmap, hostGidmap, err := unshare.GetHostIDMappings("")
58
	if err != nil {
59
		return err
60
	}
61
	uidmap, gidmap := spec.Linux.UIDMappings, spec.Linux.GIDMappings
62
	if len(uidmap) == 0 {
63
		// No UID mappings are configured for the container.  Borrow our parent's mappings.
64
		uidmap = append([]specs.LinuxIDMapping{}, hostUidmap...)
65
		for i := range uidmap {
66
			uidmap[i].HostID = uidmap[i].ContainerID
67
		}
68
	}
69
	if len(gidmap) == 0 {
70
		// No GID mappings are configured for the container.  Borrow our parent's mappings.
71
		gidmap = append([]specs.LinuxIDMapping{}, hostGidmap...)
72
		for i := range gidmap {
73
			gidmap[i].HostID = gidmap[i].ContainerID
74
		}
75
	}
76

77
	cmd.UnshareFlags = syscall.CLONE_NEWUTS | syscall.CLONE_NEWNS
78
	requestedUserNS := false
79
	for _, ns := range spec.Linux.Namespaces {
80
		if ns.Type == specs.UserNamespace {
81
			requestedUserNS = true
82
		}
83
	}
84
	if len(spec.Linux.UIDMappings) > 0 || len(spec.Linux.GIDMappings) > 0 || requestedUserNS {
85
		cmd.UnshareFlags = cmd.UnshareFlags | syscall.CLONE_NEWUSER
86
		cmd.UidMappings = uidmap
87
		cmd.GidMappings = gidmap
88
		cmd.GidMappingsEnableSetgroups = true
89
	}
90
	cmd.OOMScoreAdj = spec.Process.OOMScoreAdj
91
	return nil
92
}
93

94
func setContainerHostname(name string) {
95
	if err := unix.Sethostname([]byte(name)); err != nil {
96
		logrus.Debugf("failed to set hostname %q for process: %v", name, err)
97
	}
98
}
99

100
// logNamespaceDiagnostics knows which namespaces we want to create.
101
// Output debug messages when that differs from what we're being asked to do.
102
func logNamespaceDiagnostics(spec *specs.Spec) {
103
	sawMountNS := false
104
	sawUTSNS := false
105
	for _, ns := range spec.Linux.Namespaces {
106
		switch ns.Type {
107
		case specs.CgroupNamespace:
108
			if ns.Path != "" {
109
				logrus.Debugf("unable to join cgroup namespace, sorry about that")
110
			} else {
111
				logrus.Debugf("unable to create cgroup namespace, sorry about that")
112
			}
113
		case specs.IPCNamespace:
114
			if ns.Path != "" {
115
				logrus.Debugf("unable to join IPC namespace, sorry about that")
116
			} else {
117
				logrus.Debugf("unable to create IPC namespace, sorry about that")
118
			}
119
		case specs.MountNamespace:
120
			if ns.Path != "" {
121
				logrus.Debugf("unable to join mount namespace %q, creating a new one", ns.Path)
122
			}
123
			sawMountNS = true
124
		case specs.NetworkNamespace:
125
			if ns.Path != "" {
126
				logrus.Debugf("unable to join network namespace, sorry about that")
127
			} else {
128
				logrus.Debugf("unable to create network namespace, sorry about that")
129
			}
130
		case specs.PIDNamespace:
131
			if ns.Path != "" {
132
				logrus.Debugf("unable to join PID namespace, sorry about that")
133
			} else {
134
				logrus.Debugf("unable to create PID namespace, sorry about that")
135
			}
136
		case specs.UserNamespace:
137
			if ns.Path != "" {
138
				logrus.Debugf("unable to join user namespace, sorry about that")
139
			}
140
		case specs.UTSNamespace:
141
			if ns.Path != "" {
142
				logrus.Debugf("unable to join UTS namespace %q, creating a new one", ns.Path)
143
			}
144
			sawUTSNS = true
145
		}
146
	}
147
	if !sawMountNS {
148
		logrus.Debugf("mount namespace not requested, but creating a new one anyway")
149
	}
150
	if !sawUTSNS {
151
		logrus.Debugf("UTS namespace not requested, but creating a new one anyway")
152
	}
153
}
154

155
// setApparmorProfile sets the apparmor profile for ourselves, and hopefully any child processes that we'll start.
156
func setApparmorProfile(spec *specs.Spec) error {
157
	if !apparmor.IsEnabled() || spec.Process.ApparmorProfile == "" {
158
		return nil
159
	}
160
	if err := apparmor.ApplyProfile(spec.Process.ApparmorProfile); err != nil {
161
		return fmt.Errorf("setting apparmor profile to %q: %w", spec.Process.ApparmorProfile, err)
162
	}
163
	return nil
164
}
165

166
// setCapabilities sets capabilities for ourselves, to be more or less inherited by any processes that we'll start.
167
func setCapabilities(spec *specs.Spec, keepCaps ...string) error {
168
	currentCaps, err := capability.NewPid2(0)
169
	if err != nil {
170
		return fmt.Errorf("reading capabilities of current process: %w", err)
171
	}
172
	if err := currentCaps.Load(); err != nil {
173
		return fmt.Errorf("loading capabilities: %w", err)
174
	}
175
	caps, err := capability.NewPid2(0)
176
	if err != nil {
177
		return fmt.Errorf("reading capabilities of current process: %w", err)
178
	}
179
	capMap := map[capability.CapType][]string{
180
		capability.BOUNDING:    spec.Process.Capabilities.Bounding,
181
		capability.EFFECTIVE:   spec.Process.Capabilities.Effective,
182
		capability.INHERITABLE: []string{},
183
		capability.PERMITTED:   spec.Process.Capabilities.Permitted,
184
		capability.AMBIENT:     spec.Process.Capabilities.Ambient,
185
	}
186
	knownCaps := capability.List()
187
	noCap := capability.Cap(-1)
188
	for capType, capList := range capMap {
189
		for _, capToSet := range capList {
190
			cap := noCap
191
			for _, c := range knownCaps {
192
				if strings.EqualFold("CAP_"+c.String(), capToSet) {
193
					cap = c
194
					break
195
				}
196
			}
197
			if cap == noCap {
198
				return fmt.Errorf("mapping capability %q to a number", capToSet)
199
			}
200
			caps.Set(capType, cap)
201
		}
202
		for _, capToSet := range keepCaps {
203
			cap := noCap
204
			for _, c := range knownCaps {
205
				if strings.EqualFold("CAP_"+c.String(), capToSet) {
206
					cap = c
207
					break
208
				}
209
			}
210
			if cap == noCap {
211
				return fmt.Errorf("mapping capability %q to a number", capToSet)
212
			}
213
			if currentCaps.Get(capType, cap) {
214
				caps.Set(capType, cap)
215
			}
216
		}
217
	}
218
	if err = caps.Apply(capability.CAPS | capability.BOUNDS | capability.AMBS); err != nil {
219
		return fmt.Errorf("setting capabilities: %w", err)
220
	}
221
	return nil
222
}
223

224
func makeRlimit(limit specs.POSIXRlimit) unix.Rlimit {
225
	return unix.Rlimit{Cur: limit.Soft, Max: limit.Hard}
226
}
227

228
func createPlatformContainer(options runUsingChrootExecSubprocOptions) error {
229
	return errors.New("unsupported createPlatformContainer")
230
}
231

232
func mountFlagsForFSFlags(fsFlags uintptr) uintptr {
233
	var mountFlags uintptr
234
	for _, mapping := range []struct {
235
		fsFlag    uintptr
236
		mountFlag uintptr
237
	}{
238
		{unix.ST_MANDLOCK, unix.MS_MANDLOCK},
239
		{unix.ST_NOATIME, unix.MS_NOATIME},
240
		{unix.ST_NODEV, unix.MS_NODEV},
241
		{unix.ST_NODIRATIME, unix.MS_NODIRATIME},
242
		{unix.ST_NOEXEC, unix.MS_NOEXEC},
243
		{unix.ST_NOSUID, unix.MS_NOSUID},
244
		{unix.ST_RDONLY, unix.MS_RDONLY},
245
		{unix.ST_RELATIME, unix.MS_RELATIME},
246
		{unix.ST_SYNCHRONOUS, unix.MS_SYNCHRONOUS},
247
	} {
248
		if fsFlags&mapping.fsFlag == mapping.fsFlag {
249
			mountFlags |= mapping.mountFlag
250
		}
251
	}
252
	return mountFlags
253
}
254

255
func makeReadOnly(mntpoint string, flags uintptr) error {
256
	var fs unix.Statfs_t
257
	// Make sure it's read-only.
258
	if err := unix.Statfs(mntpoint, &fs); err != nil {
259
		return fmt.Errorf("checking if directory %q was bound read-only: %w", mntpoint, err)
260
	}
261
	if fs.Flags&unix.ST_RDONLY == 0 {
262
		// All callers currently pass MS_RDONLY in "flags", but in case they stop doing
263
		// that at some point in the future...
264
		if err := unix.Mount(mntpoint, mntpoint, "bind", flags|unix.MS_RDONLY|unix.MS_REMOUNT|unix.MS_BIND, ""); err != nil {
265
			return fmt.Errorf("remounting %s in mount namespace read-only: %w", mntpoint, err)
266
		}
267
	}
268
	return nil
269
}
270

271
// setupChrootBindMounts actually bind mounts things under the rootfs, and returns a
272
// callback that will clean up its work.
273
func setupChrootBindMounts(spec *specs.Spec, bundlePath string) (undoBinds func() error, err error) {
274
	var fs unix.Statfs_t
275
	undoBinds = func() error {
276
		if err2 := unix.Unmount(spec.Root.Path, unix.MNT_DETACH); err2 != nil {
277
			retries := 0
278
			for (err2 == unix.EBUSY || err2 == unix.EAGAIN) && retries < 50 {
279
				time.Sleep(50 * time.Millisecond)
280
				err2 = unix.Unmount(spec.Root.Path, unix.MNT_DETACH)
281
				retries++
282
			}
283
			if err2 != nil {
284
				logrus.Warnf("pkg/chroot: error unmounting %q (retried %d times): %v", spec.Root.Path, retries, err2)
285
				if err == nil {
286
					err = err2
287
				}
288
			}
289
		}
290
		return err
291
	}
292

293
	// Now bind mount all of those things to be under the rootfs's location in this
294
	// mount namespace.
295
	commonFlags := uintptr(unix.MS_BIND | unix.MS_REC | unix.MS_PRIVATE)
296
	bindFlags := commonFlags
297
	devFlags := commonFlags | unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_RDONLY
298
	procFlags := devFlags | unix.MS_NODEV
299
	sysFlags := devFlags | unix.MS_NODEV
300

301
	// Bind /dev read-only.
302
	subDev := filepath.Join(spec.Root.Path, "/dev")
303
	if err := unix.Mount("/dev", subDev, "bind", devFlags, ""); err != nil {
304
		if errors.Is(err, os.ErrNotExist) {
305
			err = os.Mkdir(subDev, 0755)
306
			if err == nil {
307
				err = unix.Mount("/dev", subDev, "bind", devFlags, "")
308
			}
309
		}
310
		if err != nil {
311
			return undoBinds, fmt.Errorf("bind mounting /dev from host into mount namespace: %w", err)
312
		}
313
	}
314
	// Make sure it's read-only.
315
	if err = unix.Statfs(subDev, &fs); err != nil {
316
		return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", subDev, err)
317
	}
318
	if fs.Flags&unix.ST_RDONLY == 0 {
319
		if err := unix.Mount(subDev, subDev, "bind", devFlags|unix.MS_REMOUNT|unix.MS_BIND, ""); err != nil {
320
			return undoBinds, fmt.Errorf("remounting /dev in mount namespace read-only: %w", err)
321
		}
322
	}
323
	logrus.Debugf("bind mounted %q to %q", "/dev", filepath.Join(spec.Root.Path, "/dev"))
324

325
	// Bind /proc read-only.
326
	subProc := filepath.Join(spec.Root.Path, "/proc")
327
	if err := unix.Mount("/proc", subProc, "bind", procFlags, ""); err != nil {
328
		if errors.Is(err, os.ErrNotExist) {
329
			err = os.Mkdir(subProc, 0755)
330
			if err == nil {
331
				err = unix.Mount("/proc", subProc, "bind", procFlags, "")
332
			}
333
		}
334
		if err != nil {
335
			return undoBinds, fmt.Errorf("bind mounting /proc from host into mount namespace: %w", err)
336
		}
337
	}
338
	logrus.Debugf("bind mounted %q to %q", "/proc", filepath.Join(spec.Root.Path, "/proc"))
339

340
	// Bind /sys read-only.
341
	subSys := filepath.Join(spec.Root.Path, "/sys")
342
	if err := unix.Mount("/sys", subSys, "bind", sysFlags, ""); err != nil {
343
		if errors.Is(err, os.ErrNotExist) {
344
			err = os.Mkdir(subSys, 0755)
345
			if err == nil {
346
				err = unix.Mount("/sys", subSys, "bind", sysFlags, "")
347
			}
348
		}
349
		if err != nil {
350
			return undoBinds, fmt.Errorf("bind mounting /sys from host into mount namespace: %w", err)
351
		}
352
	}
353
	if err := makeReadOnly(subSys, sysFlags); err != nil {
354
		return undoBinds, err
355
	}
356

357
	mnts, _ := mount.GetMounts()
358
	for _, m := range mnts {
359
		if !strings.HasPrefix(m.Mountpoint, "/sys/") &&
360
			m.Mountpoint != "/sys" {
361
			continue
362
		}
363
		subSys := filepath.Join(spec.Root.Path, m.Mountpoint)
364
		if err := unix.Mount(m.Mountpoint, subSys, "bind", sysFlags, ""); err != nil {
365
			msg := fmt.Sprintf("could not bind mount %q, skipping: %v", m.Mountpoint, err)
366
			if strings.HasPrefix(m.Mountpoint, "/sys") {
367
				logrus.Infof(msg)
368
			} else {
369
				logrus.Warningf(msg)
370
			}
371
			continue
372
		}
373
		if err := makeReadOnly(subSys, sysFlags); err != nil {
374
			return undoBinds, err
375
		}
376
	}
377
	logrus.Debugf("bind mounted %q to %q", "/sys", filepath.Join(spec.Root.Path, "/sys"))
378

379
	// Bind, overlay, or tmpfs mount everything we've been asked to mount.
380
	for _, m := range spec.Mounts {
381
		// Skip anything that we just mounted.
382
		switch m.Destination {
383
		case "/dev", "/proc", "/sys":
384
			logrus.Debugf("already bind mounted %q on %q", m.Destination, filepath.Join(spec.Root.Path, m.Destination))
385
			continue
386
		default:
387
			if strings.HasPrefix(m.Destination, "/dev/") {
388
				continue
389
			}
390
			if strings.HasPrefix(m.Destination, "/proc/") {
391
				continue
392
			}
393
			if strings.HasPrefix(m.Destination, "/sys/") {
394
				continue
395
			}
396
		}
397
		// Skip anything that isn't a bind or overlay or tmpfs mount.
398
		if m.Type != "bind" && m.Type != "tmpfs" && m.Type != "overlay" {
399
			logrus.Debugf("skipping mount of type %q on %q", m.Type, m.Destination)
400
			continue
401
		}
402
		// If the target is already there, we can just mount over it.
403
		var srcinfo os.FileInfo
404
		switch m.Type {
405
		case "bind":
406
			srcinfo, err = os.Stat(m.Source)
407
			if err != nil {
408
				return undoBinds, fmt.Errorf("examining %q for mounting in mount namespace: %w", m.Source, err)
409
			}
410
		case "overlay", "tmpfs":
411
			srcinfo, err = os.Stat("/")
412
			if err != nil {
413
				return undoBinds, fmt.Errorf("examining / to use as a template for a %s mount: %w", m.Type, err)
414
			}
415
		}
416
		target := filepath.Join(spec.Root.Path, m.Destination)
417
		// Check if target is a symlink.
418
		stat, err := os.Lstat(target)
419
		// If target is a symlink, follow the link and ensure the destination exists.
420
		if err == nil && stat != nil && (stat.Mode()&os.ModeSymlink != 0) {
421
			target, err = copier.Eval(spec.Root.Path, m.Destination, copier.EvalOptions{})
422
			if err != nil {
423
				return nil, fmt.Errorf("evaluating symlink %q: %w", target, err)
424
			}
425
			// Stat the destination of the evaluated symlink.
426
			_, err = os.Stat(target)
427
		}
428
		if err != nil {
429
			// If the target can't be stat()ted, check the error.
430
			if !errors.Is(err, os.ErrNotExist) {
431
				return undoBinds, fmt.Errorf("examining %q for mounting in mount namespace: %w", target, err)
432
			}
433
			// The target isn't there yet, so create it.  If the source is a directory,
434
			// we need a directory, otherwise we need a non-directory (i.e., a file).
435
			if srcinfo.IsDir() {
436
				if err = os.MkdirAll(target, 0755); err != nil {
437
					return undoBinds, fmt.Errorf("creating mountpoint %q in mount namespace: %w", target, err)
438
				}
439
			} else {
440
				if err = os.MkdirAll(filepath.Dir(target), 0755); err != nil {
441
					return undoBinds, fmt.Errorf("ensuring parent of mountpoint %q (%q) is present in mount namespace: %w", target, filepath.Dir(target), err)
442
				}
443
				var file *os.File
444
				if file, err = os.OpenFile(target, os.O_WRONLY|os.O_CREATE, 0755); err != nil {
445
					return undoBinds, fmt.Errorf("creating mountpoint %q in mount namespace: %w", target, err)
446
				}
447
				file.Close()
448
			}
449
		}
450
		// Sort out which flags we're asking for, and what statfs() should be telling us
451
		// if we successfully mounted with them.
452
		requestFlags := uintptr(0)
453
		expectedImportantFlags := uintptr(0)
454
		importantFlags := uintptr(0)
455
		possibleImportantFlags := uintptr(unix.ST_NODEV | unix.ST_NOEXEC | unix.ST_NOSUID | unix.ST_RDONLY)
456
		for _, option := range m.Options {
457
			switch option {
458
			case "nodev":
459
				requestFlags |= unix.MS_NODEV
460
				importantFlags |= unix.ST_NODEV
461
				expectedImportantFlags |= unix.ST_NODEV
462
			case "dev":
463
				requestFlags &= ^uintptr(unix.MS_NODEV)
464
				importantFlags |= unix.ST_NODEV
465
				expectedImportantFlags &= ^uintptr(unix.ST_NODEV)
466
			case "noexec":
467
				requestFlags |= unix.MS_NOEXEC
468
				importantFlags |= unix.ST_NOEXEC
469
				expectedImportantFlags |= unix.ST_NOEXEC
470
			case "exec":
471
				requestFlags &= ^uintptr(unix.MS_NOEXEC)
472
				importantFlags |= unix.ST_NOEXEC
473
				expectedImportantFlags &= ^uintptr(unix.ST_NOEXEC)
474
			case "nosuid":
475
				requestFlags |= unix.MS_NOSUID
476
				importantFlags |= unix.ST_NOSUID
477
				expectedImportantFlags |= unix.ST_NOSUID
478
			case "suid":
479
				requestFlags &= ^uintptr(unix.MS_NOSUID)
480
				importantFlags |= unix.ST_NOSUID
481
				expectedImportantFlags &= ^uintptr(unix.ST_NOSUID)
482
			case "ro":
483
				requestFlags |= unix.MS_RDONLY
484
				importantFlags |= unix.ST_RDONLY
485
				expectedImportantFlags |= unix.ST_RDONLY
486
			case "rw":
487
				requestFlags &= ^uintptr(unix.MS_RDONLY)
488
				importantFlags |= unix.ST_RDONLY
489
				expectedImportantFlags &= ^uintptr(unix.ST_RDONLY)
490
			}
491
		}
492
		switch m.Type {
493
		case "bind":
494
			// Do the initial bind mount.  We'll worry about the flags in a bit.
495
			logrus.Debugf("bind mounting %q on %q %v", m.Destination, filepath.Join(spec.Root.Path, m.Destination), m.Options)
496
			if err = unix.Mount(m.Source, target, "", bindFlags|requestFlags, ""); err != nil {
497
				return undoBinds, fmt.Errorf("bind mounting %q from host to %q in mount namespace (%q): %w", m.Source, m.Destination, target, err)
498
			}
499
			logrus.Debugf("bind mounted %q to %q", m.Source, target)
500
		case "tmpfs":
501
			// Mount a tmpfs.  We'll worry about the flags in a bit.
502
			if err = mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil {
503
				return undoBinds, fmt.Errorf("mounting tmpfs to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(append(m.Options, "private"), ","), err)
504
			}
505
			logrus.Debugf("mounted a tmpfs to %q", target)
506
		case "overlay":
507
			// Mount an overlay.  We'll worry about the flags in a bit.
508
			if err = mount.Mount(m.Source, target, m.Type, strings.Join(append(m.Options, "private"), ",")); err != nil {
509
				return undoBinds, fmt.Errorf("mounting overlay to %q in mount namespace (%q, %q): %w", m.Destination, target, strings.Join(append(m.Options, "private"), ","), err)
510
			}
511
			logrus.Debugf("mounted a overlay to %q", target)
512
		}
513
		// Time to worry about the flags.
514
		if err = unix.Statfs(target, &fs); err != nil {
515
			return undoBinds, fmt.Errorf("checking if volume %q was mounted with requested flags: %w", target, err)
516
		}
517
		effectiveImportantFlags := uintptr(fs.Flags) & importantFlags
518
		if effectiveImportantFlags != expectedImportantFlags {
519
			// Do a remount to try to get the desired flags to stick.
520
			effectiveUnimportantFlags := uintptr(fs.Flags) & ^possibleImportantFlags
521
			if err = unix.Mount(target, target, m.Type, unix.MS_REMOUNT|bindFlags|requestFlags|mountFlagsForFSFlags(effectiveUnimportantFlags), ""); err != nil {
522
				return undoBinds, fmt.Errorf("remounting %q in mount namespace with flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err)
523
			}
524
			// Check if the desired flags stuck.
525
			if err = unix.Statfs(target, &fs); err != nil {
526
				return undoBinds, fmt.Errorf("checking if directory %q was remounted with requested flags %#x instead of %#x: %w", target, requestFlags, effectiveImportantFlags, err)
527
			}
528
			newEffectiveImportantFlags := uintptr(fs.Flags) & importantFlags
529
			if newEffectiveImportantFlags != expectedImportantFlags {
530
				return undoBinds, fmt.Errorf("unable to remount %q with requested flags %#x instead of %#x, just got %#x back", target, requestFlags, effectiveImportantFlags, newEffectiveImportantFlags)
531
			}
532
		}
533
	}
534

535
	// Set up any read-only paths that we need to.  If we're running inside
536
	// of a container, some of these locations will already be read-only, in
537
	// which case can declare victory and move on.
538
	for _, roPath := range spec.Linux.ReadonlyPaths {
539
		r := filepath.Join(spec.Root.Path, roPath)
540
		target, err := filepath.EvalSymlinks(r)
541
		if err != nil {
542
			if errors.Is(err, os.ErrNotExist) {
543
				// No target, no problem.
544
				continue
545
			}
546
			return undoBinds, fmt.Errorf("checking %q for symlinks before marking it read-only: %w", r, err)
547
		}
548
		// Check if the location is already read-only.
549
		var fs unix.Statfs_t
550
		if err = unix.Statfs(target, &fs); err != nil {
551
			if errors.Is(err, os.ErrNotExist) {
552
				// No target, no problem.
553
				continue
554
			}
555
			return undoBinds, fmt.Errorf("checking if directory %q is already read-only: %w", target, err)
556
		}
557
		if fs.Flags&unix.ST_RDONLY == unix.ST_RDONLY {
558
			continue
559
		}
560
		// Mount the location over itself, so that we can remount it as read-only, making
561
		// sure to preserve any combination of nodev/noexec/nosuid that's already in play.
562
		roFlags := mountFlagsForFSFlags(uintptr(fs.Flags)) | unix.MS_RDONLY
563
		if err := unix.Mount(target, target, "", bindFlags|roFlags, ""); err != nil {
564
			if errors.Is(err, os.ErrNotExist) {
565
				// No target, no problem.
566
				continue
567
			}
568
			return undoBinds, fmt.Errorf("bind mounting %q onto itself in preparation for making it read-only: %w", target, err)
569
		}
570
		// Remount the location read-only.
571
		if err = unix.Statfs(target, &fs); err != nil {
572
			return undoBinds, fmt.Errorf("checking if directory %q was bound read-only: %w", target, err)
573
		}
574
		if fs.Flags&unix.ST_RDONLY == 0 {
575
			if err := unix.Mount(target, target, "", unix.MS_REMOUNT|unix.MS_RDONLY|bindFlags|mountFlagsForFSFlags(uintptr(fs.Flags)), ""); err != nil {
576
				return undoBinds, fmt.Errorf("remounting %q in mount namespace read-only: %w", target, err)
577
			}
578
		}
579
		// Check again.
580
		if err = unix.Statfs(target, &fs); err != nil {
581
			return undoBinds, fmt.Errorf("checking if directory %q was remounted read-only: %w", target, err)
582
		}
583
		if fs.Flags&unix.ST_RDONLY == 0 {
584
			// Still not read only.
585
			return undoBinds, fmt.Errorf("verifying that %q in mount namespace was remounted read-only: %w", target, err)
586
		}
587
	}
588

589
	// Create an empty directory for to use for masking directories.
590
	roEmptyDir := filepath.Join(bundlePath, "empty")
591
	if len(spec.Linux.MaskedPaths) > 0 {
592
		if err := os.Mkdir(roEmptyDir, 0700); err != nil {
593
			return undoBinds, fmt.Errorf("creating empty directory %q: %w", roEmptyDir, err)
594
		}
595
	}
596

597
	// Set up any masked paths that we need to.  If we're running inside of
598
	// a container, some of these locations will already be read-only tmpfs
599
	// filesystems or bind mounted to os.DevNull.  If we're not running
600
	// inside of a container, and nobody else has done that, we'll do it.
601
	for _, masked := range spec.Linux.MaskedPaths {
602
		t := filepath.Join(spec.Root.Path, masked)
603
		target, err := filepath.EvalSymlinks(t)
604
		if err != nil {
605
			target = t
606
		}
607
		// Get some info about the target.
608
		targetinfo, err := os.Stat(target)
609
		if err != nil {
610
			if errors.Is(err, os.ErrNotExist) {
611
				// No target, no problem.
612
				continue
613
			}
614
			return undoBinds, fmt.Errorf("examining %q for masking in mount namespace: %w", target, err)
615
		}
616
		if targetinfo.IsDir() {
617
			// The target's a directory.  Check if it's a read-only filesystem.
618
			var statfs unix.Statfs_t
619
			if err = unix.Statfs(target, &statfs); err != nil {
620
				return undoBinds, fmt.Errorf("checking if directory %q is a mountpoint: %w", target, err)
621
			}
622
			isReadOnly := statfs.Flags&unix.ST_RDONLY == unix.ST_RDONLY
623
			// Check if any of the IDs we're mapping could read it.
624
			var stat unix.Stat_t
625
			if err = unix.Stat(target, &stat); err != nil {
626
				return undoBinds, fmt.Errorf("checking permissions on directory %q: %w", target, err)
627
			}
628
			isAccessible := false
629
			if stat.Mode&unix.S_IROTH|unix.S_IXOTH != 0 {
630
				isAccessible = true
631
			}
632
			if !isAccessible && stat.Mode&unix.S_IROTH|unix.S_IXOTH != 0 {
633
				if len(spec.Linux.GIDMappings) > 0 {
634
					for _, mapping := range spec.Linux.GIDMappings {
635
						if stat.Gid >= mapping.ContainerID && stat.Gid < mapping.ContainerID+mapping.Size {
636
							isAccessible = true
637
							break
638
						}
639
					}
640
				}
641
			}
642
			if !isAccessible && stat.Mode&unix.S_IRUSR|unix.S_IXUSR != 0 {
643
				if len(spec.Linux.UIDMappings) > 0 {
644
					for _, mapping := range spec.Linux.UIDMappings {
645
						if stat.Uid >= mapping.ContainerID && stat.Uid < mapping.ContainerID+mapping.Size {
646
							isAccessible = true
647
							break
648
						}
649
					}
650
				}
651
			}
652
			// Check if it's empty.
653
			hasContent := false
654
			directory, err := os.Open(target)
655
			if err != nil {
656
				if !os.IsPermission(err) {
657
					return undoBinds, fmt.Errorf("opening directory %q: %w", target, err)
658
				}
659
			} else {
660
				names, err := directory.Readdirnames(0)
661
				directory.Close()
662
				if err != nil {
663
					return undoBinds, fmt.Errorf("reading contents of directory %q: %w", target, err)
664
				}
665
				hasContent = false
666
				for _, name := range names {
667
					switch name {
668
					case ".", "..":
669
						continue
670
					default:
671
						hasContent = true
672
					}
673
					if hasContent {
674
						break
675
					}
676
				}
677
			}
678
			// The target's a directory, so read-only bind mount an empty directory on it.
679
			roFlags := uintptr(syscall.MS_BIND | syscall.MS_NOSUID | syscall.MS_NODEV | syscall.MS_NOEXEC | syscall.MS_RDONLY)
680
			if !isReadOnly || (hasContent && isAccessible) {
681
				if err = unix.Mount(roEmptyDir, target, "bind", roFlags, ""); err != nil {
682
					return undoBinds, fmt.Errorf("masking directory %q in mount namespace: %w", target, err)
683
				}
684
				if err = unix.Statfs(target, &fs); err != nil {
685
					return undoBinds, fmt.Errorf("checking if masked directory %q was mounted read-only in mount namespace: %w", target, err)
686
				}
687
				if fs.Flags&unix.ST_RDONLY == 0 {
688
					if err = unix.Mount(target, target, "", syscall.MS_REMOUNT|roFlags|mountFlagsForFSFlags(uintptr(fs.Flags)), ""); err != nil {
689
						return undoBinds, fmt.Errorf("making sure masked directory %q in mount namespace is read only: %w", target, err)
690
					}
691
				}
692
			}
693
		} else {
694
			// If the target's is not a directory or os.DevNull, bind mount os.DevNull over it.
695
			if !isDevNull(targetinfo) {
696
				if err = unix.Mount(os.DevNull, target, "", uintptr(syscall.MS_BIND|syscall.MS_RDONLY|syscall.MS_PRIVATE), ""); err != nil {
697
					return undoBinds, fmt.Errorf("masking non-directory %q in mount namespace: %w", target, err)
698
				}
699
			}
700
		}
701
	}
702
	return undoBinds, nil
703
}
704

705
// setPdeathsig sets a parent-death signal for the process
706
func setPdeathsig(cmd *exec.Cmd) {
707
	if cmd.SysProcAttr == nil {
708
		cmd.SysProcAttr = &syscall.SysProcAttr{}
709
	}
710
	cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL
711
}
712

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.