podman

container_internal_linux.go
807 строк · 23.3 Кб
Перенос по словам
1
//go:build !remote
2

3
package libpod
4

5
import (
6
	"errors"
7
	"fmt"
8
	"io/fs"
9
	"os"
10
	"path"
11
	"path/filepath"
12
	"strings"
13
	"sync"
14
	"syscall"
15
	"time"
16

17
	"github.com/containers/common/libnetwork/slirp4netns"
18
	"github.com/containers/common/libnetwork/types"
19
	"github.com/containers/common/pkg/cgroups"
20
	"github.com/containers/common/pkg/config"
21
	"github.com/containers/podman/v5/libpod/define"
22
	"github.com/containers/podman/v5/pkg/rootless"
23
	spec "github.com/opencontainers/runtime-spec/specs-go"
24
	"github.com/opencontainers/runtime-tools/generate"
25
	"github.com/opencontainers/selinux/go-selinux/label"
26
	"github.com/sirupsen/logrus"
27
	"golang.org/x/sys/unix"
28
)
29

30
var (
31
	bindOptions = []string{define.TypeBind, "rprivate"}
32
)
33

34
func (c *Container) mountSHM(shmOptions string) error {
35
	contextType := "context"
36
	if c.config.LabelNested {
37
		contextType = "rootcontext"
38
	}
39

40
	if err := unix.Mount("shm", c.config.ShmDir, define.TypeTmpfs, unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV,
41
		label.FormatMountLabelByType(shmOptions, c.config.MountLabel, contextType)); err != nil {
42
		return fmt.Errorf("failed to mount shm tmpfs %q: %w", c.config.ShmDir, err)
43
	}
44
	return nil
45
}
46

47
func (c *Container) unmountSHM(mount string) error {
48
	if err := unix.Unmount(mount, 0); err != nil {
49
		if err != syscall.EINVAL && err != syscall.ENOENT {
50
			return fmt.Errorf("unmounting container %s SHM mount %s: %w", c.ID(), mount, err)
51
		}
52
		// If it's just an EINVAL or ENOENT, debug logs only
53
		logrus.Debugf("Container %s failed to unmount %s : %v", c.ID(), mount, err)
54
	}
55
	return nil
56
}
57

58
// prepare mounts the container and sets up other required resources like net
59
// namespaces
60
func (c *Container) prepare() error {
61
	var (
62
		wg                              sync.WaitGroup
63
		netNS                           string
64
		networkStatus                   map[string]types.StatusBlock
65
		createNetNSErr, mountStorageErr error
66
		mountPoint                      string
67
		tmpStateLock                    sync.Mutex
68
	)
69

70
	wg.Add(2)
71

72
	go func() {
73
		defer wg.Done()
74
		// Set up network namespace if not already set up
75
		noNetNS := c.state.NetNS == ""
76
		if c.config.CreateNetNS && noNetNS && !c.config.PostConfigureNetNS {
77
			netNS, networkStatus, createNetNSErr = c.runtime.createNetNS(c)
78
			if createNetNSErr != nil {
79
				return
80
			}
81

82
			tmpStateLock.Lock()
83
			defer tmpStateLock.Unlock()
84

85
			// Assign NetNS attributes to container
86
			c.state.NetNS = netNS
87
			c.state.NetworkStatus = networkStatus
88
		}
89
	}()
90
	// Mount storage if not mounted
91
	go func() {
92
		defer wg.Done()
93
		mountPoint, mountStorageErr = c.mountStorage()
94

95
		if mountStorageErr != nil {
96
			return
97
		}
98

99
		tmpStateLock.Lock()
100
		defer tmpStateLock.Unlock()
101

102
		// Finish up mountStorage
103
		c.state.Mounted = true
104
		c.state.Mountpoint = mountPoint
105

106
		logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint)
107
	}()
108

109
	wg.Wait()
110

111
	var createErr error
112
	if createNetNSErr != nil {
113
		createErr = createNetNSErr
114
	}
115
	if mountStorageErr != nil {
116
		if createErr != nil {
117
			logrus.Errorf("Preparing container %s: %v", c.ID(), createErr)
118
		}
119
		createErr = mountStorageErr
120
	}
121

122
	// Only trigger storage cleanup if mountStorage was successful.
123
	// Otherwise, we may mess up mount counters.
124
	if createNetNSErr != nil && mountStorageErr == nil {
125
		if err := c.cleanupStorage(); err != nil {
126
			// createErr is guaranteed non-nil, so print
127
			// unconditionally
128
			logrus.Errorf("Preparing container %s: %v", c.ID(), createErr)
129
			createErr = fmt.Errorf("unmounting storage for container %s after network create failure: %w", c.ID(), err)
130
		}
131
	}
132

133
	// It's OK to unconditionally trigger network cleanup. If the network
134
	// isn't ready it will do nothing.
135
	if createErr != nil {
136
		if err := c.cleanupNetwork(); err != nil {
137
			logrus.Errorf("Preparing container %s: %v", c.ID(), createErr)
138
			createErr = fmt.Errorf("cleaning up container %s network after setup failure: %w", c.ID(), err)
139
		}
140
	}
141

142
	if createErr != nil {
143
		return createErr
144
	}
145

146
	// Save changes to container state
147
	if err := c.save(); err != nil {
148
		return err
149
	}
150

151
	return nil
152
}
153

154
// cleanupNetwork unmounts and cleans up the container's network
155
func (c *Container) cleanupNetwork() error {
156
	if c.config.NetNsCtr != "" {
157
		return nil
158
	}
159
	netDisabled, err := c.NetworkDisabled()
160
	if err != nil {
161
		return err
162
	}
163
	if netDisabled {
164
		return nil
165
	}
166
	if c.state.NetNS == "" {
167
		logrus.Debugf("Network is already cleaned up, skipping...")
168
		return nil
169
	}
170

171
	// Stop the container's network namespace (if it has one)
172
	if err := c.runtime.teardownNetNS(c); err != nil {
173
		logrus.Errorf("Unable to clean up network for container %s: %q", c.ID(), err)
174
	}
175

176
	c.state.NetNS = ""
177
	c.state.NetworkStatus = nil
178

179
	if c.valid {
180
		return c.save()
181
	}
182

183
	return nil
184
}
185

186
// reloadNetwork reloads the network for the given container, recreating
187
// firewall rules.
188
func (c *Container) reloadNetwork() error {
189
	result, err := c.runtime.reloadContainerNetwork(c)
190
	if err != nil {
191
		return err
192
	}
193

194
	c.state.NetworkStatus = result
195

196
	return c.save()
197
}
198

199
// systemd expects to have /run, /run/lock and /tmp on tmpfs
200
// It also expects to be able to write to /sys/fs/cgroup/systemd and /var/log/journal
201
func (c *Container) setupSystemd(mounts []spec.Mount, g generate.Generator) error {
202
	var containerUUIDSet bool
203
	for _, s := range c.config.Spec.Process.Env {
204
		if strings.HasPrefix(s, "container_uuid=") {
205
			containerUUIDSet = true
206
			break
207
		}
208
	}
209
	if !containerUUIDSet {
210
		g.AddProcessEnv("container_uuid", c.ID()[:32])
211
	}
212
	// limit systemd-specific tmpfs mounts if specified
213
	// while creating a pod or ctr, if not, default back to 50%
214
	var shmSizeSystemdMntOpt string
215
	if c.config.ShmSizeSystemd != 0 {
216
		shmSizeSystemdMntOpt = fmt.Sprintf("size=%d", c.config.ShmSizeSystemd)
217
	}
218
	options := []string{"rw", "rprivate", "nosuid", "nodev"}
219
	for _, dest := range []string{"/run", "/run/lock"} {
220
		if MountExists(mounts, dest) {
221
			continue
222
		}
223
		tmpfsMnt := spec.Mount{
224
			Destination: dest,
225
			Type:        define.TypeTmpfs,
226
			Source:      define.TypeTmpfs,
227
			Options:     append(options, "tmpcopyup", shmSizeSystemdMntOpt),
228
		}
229
		g.AddMount(tmpfsMnt)
230
	}
231
	for _, dest := range []string{"/tmp", "/var/log/journal"} {
232
		if MountExists(mounts, dest) {
233
			continue
234
		}
235
		tmpfsMnt := spec.Mount{
236
			Destination: dest,
237
			Type:        define.TypeTmpfs,
238
			Source:      define.TypeTmpfs,
239
			Options:     append(options, "tmpcopyup", shmSizeSystemdMntOpt),
240
		}
241
		g.AddMount(tmpfsMnt)
242
	}
243

244
	unified, err := cgroups.IsCgroup2UnifiedMode()
245
	if err != nil {
246
		return err
247
	}
248

249
	hasCgroupNs := false
250
	for _, ns := range c.config.Spec.Linux.Namespaces {
251
		if ns.Type == spec.CgroupNamespace {
252
			hasCgroupNs = true
253
			break
254
		}
255
	}
256

257
	if unified {
258
		g.RemoveMount("/sys/fs/cgroup")
259

260
		var systemdMnt spec.Mount
261
		if hasCgroupNs {
262
			systemdMnt = spec.Mount{
263
				Destination: "/sys/fs/cgroup",
264
				Type:        "cgroup",
265
				Source:      "cgroup",
266
				Options:     []string{"private", "rw"},
267
			}
268
		} else {
269
			systemdMnt = spec.Mount{
270
				Destination: "/sys/fs/cgroup",
271
				Type:        define.TypeBind,
272
				Source:      "/sys/fs/cgroup",
273
				Options:     []string{define.TypeBind, "private", "rw"},
274
			}
275
		}
276
		g.AddMount(systemdMnt)
277
	} else {
278
		hasSystemdMount := MountExists(mounts, "/sys/fs/cgroup/systemd")
279
		if hasCgroupNs && !hasSystemdMount {
280
			return errors.New("cgroup namespace is not supported with cgroup v1 and systemd mode")
281
		}
282
		mountOptions := []string{define.TypeBind, "rprivate"}
283

284
		if !hasSystemdMount {
285
			skipMount := hasSystemdMount
286
			var statfs unix.Statfs_t
287
			if err := unix.Statfs("/sys/fs/cgroup/systemd", &statfs); err != nil {
288
				if errors.Is(err, os.ErrNotExist) {
289
					// If the mount is missing on the host, we cannot bind mount it so
290
					// just skip it.
291
					skipMount = true
292
				}
293
				mountOptions = append(mountOptions, "nodev", "noexec", "nosuid")
294
			} else {
295
				if statfs.Flags&unix.MS_NODEV == unix.MS_NODEV {
296
					mountOptions = append(mountOptions, "nodev")
297
				}
298
				if statfs.Flags&unix.MS_NOEXEC == unix.MS_NOEXEC {
299
					mountOptions = append(mountOptions, "noexec")
300
				}
301
				if statfs.Flags&unix.MS_NOSUID == unix.MS_NOSUID {
302
					mountOptions = append(mountOptions, "nosuid")
303
				}
304
				if statfs.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
305
					mountOptions = append(mountOptions, "ro")
306
				}
307
			}
308
			if !skipMount {
309
				systemdMnt := spec.Mount{
310
					Destination: "/sys/fs/cgroup/systemd",
311
					Type:        define.TypeBind,
312
					Source:      "/sys/fs/cgroup/systemd",
313
					Options:     mountOptions,
314
				}
315
				g.AddMount(systemdMnt)
316
				g.AddLinuxMaskedPaths("/sys/fs/cgroup/systemd/release_agent")
317
			}
318
		}
319
	}
320

321
	return nil
322
}
323

324
// Add an existing container's namespace to the spec
325
func (c *Container) addNamespaceContainer(g *generate.Generator, ns LinuxNS, ctr string, specNS spec.LinuxNamespaceType) error {
326
	nsCtr, err := c.runtime.state.Container(ctr)
327
	if err != nil {
328
		return fmt.Errorf("retrieving dependency %s of container %s from state: %w", ctr, c.ID(), err)
329
	}
330

331
	if specNS == spec.UTSNamespace {
332
		hostname := nsCtr.Hostname()
333
		// Joining an existing namespace, cannot set the hostname
334
		g.SetHostname("")
335
		g.AddProcessEnv("HOSTNAME", hostname)
336
	}
337

338
	nsPath, err := nsCtr.NamespacePath(ns)
339
	if err != nil {
340
		return err
341
	}
342

343
	if err := g.AddOrReplaceLinuxNamespace(string(specNS), nsPath); err != nil {
344
		return err
345
	}
346

347
	return nil
348
}
349

350
func isRootlessCgroupSet(cgroup string) bool {
351
	// old versions of podman were setting the CgroupParent to CgroupfsDefaultCgroupParent
352
	// by default.  Avoid breaking these versions and check whether the cgroup parent is
353
	// set to the default and in this case enable the old behavior.  It should not be a real
354
	// problem because the default CgroupParent is usually owned by root so rootless users
355
	// cannot access it.
356
	// This check might be lifted in a future version of Podman.
357
	// Check both that the cgroup or its parent is set to the default value (used by pods).
358
	return cgroup != CgroupfsDefaultCgroupParent && filepath.Dir(cgroup) != CgroupfsDefaultCgroupParent
359
}
360

361
func (c *Container) expectPodCgroup() (bool, error) {
362
	unified, err := cgroups.IsCgroup2UnifiedMode()
363
	if err != nil {
364
		return false, err
365
	}
366
	cgroupManager := c.CgroupManager()
367
	switch {
368
	case c.config.NoCgroups:
369
		return false, nil
370
	case cgroupManager == config.SystemdCgroupsManager:
371
		return !rootless.IsRootless() || unified, nil
372
	case cgroupManager == config.CgroupfsCgroupsManager:
373
		return !rootless.IsRootless(), nil
374
	default:
375
		return false, fmt.Errorf("invalid cgroup mode %s requested for pods: %w", cgroupManager, define.ErrInvalidArg)
376
	}
377
}
378

379
// Get cgroup path in a format suitable for the OCI spec
380
func (c *Container) getOCICgroupPath() (string, error) {
381
	unified, err := cgroups.IsCgroup2UnifiedMode()
382
	if err != nil {
383
		return "", err
384
	}
385
	cgroupManager := c.CgroupManager()
386
	switch {
387
	case c.config.NoCgroups:
388
		return "", nil
389
	case c.config.CgroupsMode == cgroupSplit:
390
		selfCgroup, err := cgroups.GetOwnCgroupDisallowRoot()
391
		if err != nil {
392
			return "", err
393
		}
394
		return filepath.Join(selfCgroup, fmt.Sprintf("libpod-payload-%s", c.ID())), nil
395
	case cgroupManager == config.SystemdCgroupsManager:
396
		// When the OCI runtime is set to use Systemd as a cgroup manager, it
397
		// expects cgroups to be passed as follows:
398
		// slice:prefix:name
399
		systemdCgroups := fmt.Sprintf("%s:libpod:%s", path.Base(c.config.CgroupParent), c.ID())
400
		logrus.Debugf("Setting Cgroups for container %s to %s", c.ID(), systemdCgroups)
401
		return systemdCgroups, nil
402
	case (rootless.IsRootless() && (cgroupManager == config.CgroupfsCgroupsManager || !unified)):
403
		if c.config.CgroupParent == "" || !isRootlessCgroupSet(c.config.CgroupParent) {
404
			return "", nil
405
		}
406
		fallthrough
407
	case cgroupManager == config.CgroupfsCgroupsManager:
408
		cgroupPath := filepath.Join(c.config.CgroupParent, fmt.Sprintf("libpod-%s", c.ID()))
409
		logrus.Debugf("Setting Cgroup path for container %s to %s", c.ID(), cgroupPath)
410
		return cgroupPath, nil
411
	default:
412
		return "", fmt.Errorf("invalid cgroup manager %s requested: %w", cgroupManager, define.ErrInvalidArg)
413
	}
414
}
415

416
func openDirectory(path string) (fd int, err error) {
417
	return unix.Open(path, unix.O_RDONLY|unix.O_PATH, 0)
418
}
419

420
func (c *Container) addNetworkNamespace(g *generate.Generator) error {
421
	if c.config.CreateNetNS {
422
		if c.config.PostConfigureNetNS {
423
			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), ""); err != nil {
424
				return err
425
			}
426
		} else {
427
			if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), c.state.NetNS); err != nil {
428
				return err
429
			}
430
		}
431
	}
432
	return nil
433
}
434

435
func (c *Container) addSystemdMounts(g *generate.Generator) error {
436
	if c.Systemd() {
437
		if err := c.setupSystemd(g.Mounts(), *g); err != nil {
438
			return err
439
		}
440
	}
441
	return nil
442
}
443

444
func (c *Container) addSharedNamespaces(g *generate.Generator) error {
445
	if c.config.IPCNsCtr != "" {
446
		if err := c.addNamespaceContainer(g, IPCNS, c.config.IPCNsCtr, spec.IPCNamespace); err != nil {
447
			return err
448
		}
449
	}
450
	if c.config.MountNsCtr != "" {
451
		if err := c.addNamespaceContainer(g, MountNS, c.config.MountNsCtr, spec.MountNamespace); err != nil {
452
			return err
453
		}
454
	}
455
	if c.config.NetNsCtr != "" {
456
		if err := c.addNamespaceContainer(g, NetNS, c.config.NetNsCtr, spec.NetworkNamespace); err != nil {
457
			return err
458
		}
459
	}
460
	if c.config.PIDNsCtr != "" {
461
		if err := c.addNamespaceContainer(g, PIDNS, c.config.PIDNsCtr, spec.PIDNamespace); err != nil {
462
			return err
463
		}
464
	}
465
	if c.config.UserNsCtr != "" {
466
		if err := c.addNamespaceContainer(g, UserNS, c.config.UserNsCtr, spec.UserNamespace); err != nil {
467
			return err
468
		}
469
		if len(g.Config.Linux.UIDMappings) == 0 {
470
			// runc complains if no mapping is specified, even if we join another ns.  So provide a dummy mapping
471
			g.AddLinuxUIDMapping(uint32(0), uint32(0), uint32(1))
472
			g.AddLinuxGIDMapping(uint32(0), uint32(0), uint32(1))
473
		}
474
	}
475

476
	availableUIDs, availableGIDs, err := rootless.GetAvailableIDMaps()
477
	if err != nil {
478
		if os.IsNotExist(err) {
479
			// The kernel-provided files only exist if user namespaces are supported
480
			logrus.Debugf("User or group ID mappings not available: %s", err)
481
		} else {
482
			return err
483
		}
484
	} else {
485
		g.Config.Linux.UIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.UIDMappings, availableUIDs)
486
		g.Config.Linux.GIDMappings = rootless.MaybeSplitMappings(g.Config.Linux.GIDMappings, availableGIDs)
487
	}
488

489
	// Hostname handling:
490
	// If we have a UTS namespace, set Hostname in the OCI spec.
491
	// Set the HOSTNAME environment variable unless explicitly overridden by
492
	// the user (already present in OCI spec). If we don't have a UTS ns,
493
	// set it to the host's hostname instead.
494
	hostname := c.Hostname()
495
	foundUTS := false
496

497
	for _, i := range c.config.Spec.Linux.Namespaces {
498
		if i.Type == spec.UTSNamespace && i.Path == "" {
499
			foundUTS = true
500
			g.SetHostname(hostname)
501
			break
502
		}
503
	}
504
	if !foundUTS {
505
		tmpHostname, err := os.Hostname()
506
		if err != nil {
507
			return err
508
		}
509
		hostname = tmpHostname
510
	}
511
	needEnv := true
512
	for _, checkEnv := range g.Config.Process.Env {
513
		if strings.SplitN(checkEnv, "=", 2)[0] == "HOSTNAME" {
514
			needEnv = false
515
			break
516
		}
517
	}
518
	if needEnv {
519
		g.AddProcessEnv("HOSTNAME", hostname)
520
	}
521

522
	if c.config.UTSNsCtr != "" {
523
		if err := c.addNamespaceContainer(g, UTSNS, c.config.UTSNsCtr, spec.UTSNamespace); err != nil {
524
			return err
525
		}
526
	}
527
	if c.config.CgroupNsCtr != "" {
528
		if err := c.addNamespaceContainer(g, CgroupNS, c.config.CgroupNsCtr, spec.CgroupNamespace); err != nil {
529
			return err
530
		}
531
	}
532

533
	if c.config.UserNsCtr == "" && c.config.IDMappings.AutoUserNs {
534
		if err := g.AddOrReplaceLinuxNamespace(string(spec.UserNamespace), ""); err != nil {
535
			return err
536
		}
537
		g.ClearLinuxUIDMappings()
538
		for _, uidmap := range c.config.IDMappings.UIDMap {
539
			g.AddLinuxUIDMapping(uint32(uidmap.HostID), uint32(uidmap.ContainerID), uint32(uidmap.Size))
540
		}
541
		g.ClearLinuxGIDMappings()
542
		for _, gidmap := range c.config.IDMappings.GIDMap {
543
			g.AddLinuxGIDMapping(uint32(gidmap.HostID), uint32(gidmap.ContainerID), uint32(gidmap.Size))
544
		}
545
	}
546
	return nil
547
}
548

549
func (c *Container) addRootPropagation(g *generate.Generator, mounts []spec.Mount) error {
550
	// Determine property of RootPropagation based on volume properties. If
551
	// a volume is shared, then keep root propagation shared. This should
552
	// work for slave and private volumes too.
553
	//
554
	// For slave volumes, it can be either [r]shared/[r]slave.
555
	//
556
	// For private volumes any root propagation value should work.
557
	rootPropagation := ""
558
	for _, m := range mounts {
559
		for _, opt := range m.Options {
560
			switch opt {
561
			case MountShared, MountRShared:
562
				if rootPropagation != MountShared && rootPropagation != MountRShared {
563
					rootPropagation = MountShared
564
				}
565
			case MountSlave, MountRSlave:
566
				if rootPropagation != MountShared && rootPropagation != MountRShared && rootPropagation != MountSlave && rootPropagation != MountRSlave {
567
					rootPropagation = MountRSlave
568
				}
569
			}
570
		}
571
	}
572
	if rootPropagation != "" {
573
		logrus.Debugf("Set root propagation to %q", rootPropagation)
574
		if err := g.SetLinuxRootPropagation(rootPropagation); err != nil {
575
			return err
576
		}
577
	}
578
	return nil
579
}
580

581
func (c *Container) setProcessLabel(g *generate.Generator) {
582
	g.SetProcessSelinuxLabel(c.ProcessLabel())
583
}
584

585
func (c *Container) setMountLabel(g *generate.Generator) {
586
	g.SetLinuxMountLabel(c.MountLabel())
587
}
588

589
func (c *Container) setCgroupsPath(g *generate.Generator) error {
590
	cgroupPath, err := c.getOCICgroupPath()
591
	if err != nil {
592
		return err
593
	}
594
	g.SetLinuxCgroupsPath(cgroupPath)
595
	return nil
596
}
597

598
// addSpecialDNS adds special dns servers for slirp4netns and pasta
599
func (c *Container) addSpecialDNS(nameservers []string) []string {
600
	if c.pastaResult != nil {
601
		nameservers = append(nameservers, c.pastaResult.DNSForwardIPs...)
602
	}
603

604
	// slirp4netns has a built in DNS forwarder.
605
	if c.config.NetMode.IsSlirp4netns() {
606
		slirp4netnsDNS, err := slirp4netns.GetDNS(c.slirp4netnsSubnet)
607
		if err != nil {
608
			logrus.Warn("Failed to determine Slirp4netns DNS: ", err.Error())
609
		} else {
610
			nameservers = append(nameservers, slirp4netnsDNS.String())
611
		}
612
	}
613
	return nameservers
614
}
615

616
func (c *Container) isSlirp4netnsIPv6() bool {
617
	if c.config.NetMode.IsSlirp4netns() {
618
		extraOptions := c.config.NetworkOptions[slirp4netns.BinaryName]
619
		options := make([]string, 0, len(c.runtime.config.Engine.NetworkCmdOptions.Get())+len(extraOptions))
620
		options = append(options, c.runtime.config.Engine.NetworkCmdOptions.Get()...)
621
		options = append(options, extraOptions...)
622

623
		// loop backwards as the last argument wins and we can exit early
624
		// This should be kept in sync with c/common/libnetwork/slirp4netns.
625
		for i := len(options) - 1; i >= 0; i-- {
626
			switch options[i] {
627
			case "enable_ipv6=true":
628
				return true
629
			case "enable_ipv6=false":
630
				return false
631
			}
632
		}
633
		// default is true
634
		return true
635
	}
636

637
	return false
638
}
639

640
// check for net=none
641
func (c *Container) hasNetNone() bool {
642
	if !c.config.CreateNetNS {
643
		for _, ns := range c.config.Spec.Linux.Namespaces {
644
			if ns.Type == spec.NetworkNamespace {
645
				if ns.Path == "" {
646
					return true
647
				}
648
			}
649
		}
650
	}
651
	return false
652
}
653

654
func setVolumeAtime(mountPoint string, st os.FileInfo) error {
655
	stat := st.Sys().(*syscall.Stat_t)
656
	atime := time.Unix(int64(stat.Atim.Sec), int64(stat.Atim.Nsec)) //nolint: unconvert
657
	if err := os.Chtimes(mountPoint, atime, st.ModTime()); err != nil {
658
		return err
659
	}
660
	return nil
661
}
662

663
func (c *Container) makePlatformBindMounts() error {
664
	// Make /etc/hostname
665
	// This should never change, so no need to recreate if it exists
666
	if _, ok := c.state.BindMounts["/etc/hostname"]; !ok {
667
		hostnamePath, err := c.writeStringToRundir("hostname", c.Hostname())
668
		if err != nil {
669
			return fmt.Errorf("creating hostname file for container %s: %w", c.ID(), err)
670
		}
671
		c.state.BindMounts["/etc/hostname"] = hostnamePath
672
	}
673
	return nil
674
}
675

676
func (c *Container) getConmonPidFd() int {
677
	if c.state.ConmonPID != 0 {
678
		// Track lifetime of conmon precisely using pidfd_open + poll.
679
		// There are many cases for this to fail, for instance conmon is dead
680
		// or pidfd_open is not supported (pre linux 5.3), so fall back to the
681
		// traditional loop with poll + sleep
682
		if fd, err := unix.PidfdOpen(c.state.ConmonPID, 0); err == nil {
683
			return fd
684
		} else if err != unix.ENOSYS && err != unix.ESRCH {
685
			logrus.Debugf("PidfdOpen(%d) failed: %v", c.state.ConmonPID, err)
686
		}
687
	}
688
	return -1
689
}
690

691
type safeMountInfo struct {
692
	// file is the open File.
693
	file *os.File
694

695
	// mountPoint is the mount point.
696
	mountPoint string
697
}
698

699
// Close releases the resources allocated with the safe mount info.
700
func (s *safeMountInfo) Close() {
701
	_ = unix.Unmount(s.mountPoint, unix.MNT_DETACH)
702
	_ = s.file.Close()
703
}
704

705
// safeMountSubPath securely mounts a subpath inside a volume to a new temporary location.
706
// The function checks that the subpath is a valid subpath within the volume and that it
707
// does not escape the boundaries of the mount point (volume).
708
//
709
// The caller is responsible for closing the file descriptor and unmounting the subpath
710
// when it's no longer needed.
711
func (c *Container) safeMountSubPath(mountPoint, subpath string) (s *safeMountInfo, err error) {
712
	joinedPath := filepath.Clean(filepath.Join(mountPoint, subpath))
713
	fd, err := unix.Open(joinedPath, unix.O_RDONLY|unix.O_PATH, 0)
714
	if err != nil {
715
		return nil, err
716
	}
717
	f := os.NewFile(uintptr(fd), joinedPath)
718
	defer func() {
719
		if err != nil {
720
			f.Close()
721
		}
722
	}()
723

724
	// Once we got the file descriptor, we need to check that the subpath is a valid.  We
725
	// refer to the open FD so there won't be other path lookups (and no risk to follow a symlink).
726
	fdPath := fmt.Sprintf("/proc/%d/fd/%d", os.Getpid(), f.Fd())
727
	p, err := os.Readlink(fdPath)
728
	if err != nil {
729
		return nil, err
730
	}
731
	relPath, err := filepath.Rel(mountPoint, p)
732
	if err != nil {
733
		return nil, err
734
	}
735
	if relPath == ".." || strings.HasPrefix(relPath, "../") {
736
		return nil, fmt.Errorf("subpath %q is outside of the volume %q", subpath, mountPoint)
737
	}
738

739
	fi, err := os.Stat(fdPath)
740
	if err != nil {
741
		return nil, err
742
	}
743
	var npath string
744
	switch {
745
	case fi.Mode()&fs.ModeSymlink != 0:
746
		return nil, fmt.Errorf("file %q is a symlink", joinedPath)
747
	case fi.IsDir():
748
		npath, err = os.MkdirTemp(c.state.RunDir, "subpath")
749
		if err != nil {
750
			return nil, err
751
		}
752
	default:
753
		tmp, err := os.CreateTemp(c.state.RunDir, "subpath")
754
		if err != nil {
755
			return nil, err
756
		}
757
		tmp.Close()
758
		npath = tmp.Name()
759
	}
760
	if err := unix.Mount(fdPath, npath, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
761
		return nil, err
762
	}
763
	return &safeMountInfo{
764
		file:       f,
765
		mountPoint: npath,
766
	}, nil
767
}
768

769
func (c *Container) makePlatformMtabLink(etcInTheContainerFd, rootUID, rootGID int) error {
770
	// If /etc/mtab does not exist in container image, then we need to
771
	// create it, so that mount command within the container will work.
772
	err := unix.Symlinkat("/proc/mounts", etcInTheContainerFd, "mtab")
773
	if err != nil && !os.IsExist(err) {
774
		return fmt.Errorf("creating /etc/mtab symlink: %w", err)
775
	}
776
	// If the symlink was created, then also chown it to root in the container
777
	if err == nil && (rootUID != 0 || rootGID != 0) {
778
		err = unix.Fchownat(etcInTheContainerFd, "mtab", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW)
779
		if err != nil {
780
			return fmt.Errorf("chown /etc/mtab: %w", err)
781
		}
782
	}
783
	return nil
784
}
785

786
func (c *Container) getPlatformRunPath() (string, error) {
787
	return "/run", nil
788
}
789

790
func (c *Container) addMaskedPaths(g *generate.Generator) {
791
	if !c.config.Privileged && g.Config != nil && g.Config.Linux != nil && len(g.Config.Linux.MaskedPaths) > 0 {
792
		g.AddLinuxMaskedPaths("/sys/devices/virtual/powercap")
793
	}
794
}
795

796
func (c *Container) hasPrivateUTS() bool {
797
	privateUTS := false
798
	if c.config.Spec.Linux != nil {
799
		for _, ns := range c.config.Spec.Linux.Namespaces {
800
			if ns.Type == spec.UTSNamespace {
801
				privateUTS = true
802
				break
803
			}
804
		}
805
	}
806
	return privateUTS
807
}
808
podman

Использование cookies