podman

Форк
0
1386 строк · 42.8 Кб
1
//go:build linux
2
// +build linux
3

4
package buildah
5

6
import (
7
	"context"
8
	"errors"
9
	"fmt"
10
	"os"
11
	"path/filepath"
12
	"strings"
13
	"syscall"
14

15
	"github.com/containers/buildah/bind"
16
	"github.com/containers/buildah/chroot"
17
	"github.com/containers/buildah/copier"
18
	"github.com/containers/buildah/define"
19
	"github.com/containers/buildah/internal"
20
	"github.com/containers/buildah/internal/tmpdir"
21
	"github.com/containers/buildah/internal/volumes"
22
	"github.com/containers/buildah/pkg/overlay"
23
	"github.com/containers/buildah/pkg/parse"
24
	butil "github.com/containers/buildah/pkg/util"
25
	"github.com/containers/buildah/util"
26
	"github.com/containers/common/libnetwork/etchosts"
27
	"github.com/containers/common/libnetwork/pasta"
28
	"github.com/containers/common/libnetwork/resolvconf"
29
	"github.com/containers/common/libnetwork/slirp4netns"
30
	nettypes "github.com/containers/common/libnetwork/types"
31
	netUtil "github.com/containers/common/libnetwork/util"
32
	"github.com/containers/common/pkg/capabilities"
33
	"github.com/containers/common/pkg/chown"
34
	"github.com/containers/common/pkg/config"
35
	"github.com/containers/common/pkg/hooks"
36
	hooksExec "github.com/containers/common/pkg/hooks/exec"
37
	"github.com/containers/storage/pkg/fileutils"
38
	"github.com/containers/storage/pkg/idtools"
39
	"github.com/containers/storage/pkg/ioutils"
40
	"github.com/containers/storage/pkg/lockfile"
41
	"github.com/containers/storage/pkg/stringid"
42
	"github.com/containers/storage/pkg/unshare"
43
	"github.com/docker/go-units"
44
	"github.com/opencontainers/runtime-spec/specs-go"
45
	"github.com/opencontainers/runtime-tools/generate"
46
	"github.com/sirupsen/logrus"
47
	"golang.org/x/exp/slices"
48
	"golang.org/x/sys/unix"
49
	"tags.cncf.io/container-device-interface/pkg/cdi"
50
)
51

52
var (
53
	// We dont want to remove destinations with /etc, /dev, /sys,
54
	// /proc as rootfs already contains these files and unionfs
55
	// will create a `whiteout` i.e `.wh` files on removal of
56
	// overlapping files from these directories.  everything other
57
	// than these will be cleaned up
58
	nonCleanablePrefixes = []string{
59
		"/etc", "/dev", "/sys", "/proc",
60
	}
61
)
62

63
func setChildProcess() error {
64
	if err := unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(1), 0, 0, 0); err != nil {
65
		fmt.Fprintf(os.Stderr, "prctl(PR_SET_CHILD_SUBREAPER, 1): %v\n", err)
66
		return err
67
	}
68
	return nil
69
}
70

71
func (b *Builder) cdiSetupDevicesInSpec(deviceSpecs []string, configDir string, spec *specs.Spec) ([]string, error) {
72
	leftoverDevices := deviceSpecs
73
	registry, err := cdi.NewCache()
74
	if err != nil {
75
		return nil, fmt.Errorf("creating CDI registry: %w", err)
76
	}
77
	var configDirs []string
78
	if b.CDIConfigDir != "" {
79
		configDirs = append(configDirs, b.CDIConfigDir)
80
	}
81
	if configDir != "" {
82
		configDirs = append(configDirs, configDir)
83
	}
84
	// TODO: CdiSpecDirs will be in containers/common v0.59.0 or later?
85
	// defConfig, err := config.Default()
86
	// if err != nil {
87
	//	return nil, fmt.Errorf("failed to get container config: %w", err)
88
	// }
89
	// configDirs = append(configDirs, defConfig.Engine.CdiSpecDirs.Get()...)
90
	if len(configDirs) > 0 {
91
		if err := registry.Configure(cdi.WithSpecDirs(configDirs...)); err != nil {
92
			return nil, fmt.Errorf("CDI registry ignored configured directories %v: %w", configDirs, err)
93
		}
94
	}
95
	if err := registry.Refresh(); err != nil {
96
		logrus.Warnf("CDI registry refresh: %v", err)
97
	} else {
98
		leftoverDevices, err = registry.InjectDevices(spec, deviceSpecs...)
99
		if err != nil {
100
			logrus.Debugf("CDI device injection: %v, unresolved list %v", err, leftoverDevices)
101
		}
102
	}
103
	removed := slices.DeleteFunc(slices.Clone(deviceSpecs), func(t string) bool { return slices.Contains(leftoverDevices, t) })
104
	logrus.Debugf("CDI taking care of devices %v, leaving devices %v", removed, leftoverDevices)
105
	return leftoverDevices, nil
106
}
107

108
// Extract the device list so that we can still try to make it work if
109
// we're running rootless and can't just mknod() the device nodes.
110
func separateDevicesFromRuntimeSpec(g *generate.Generator) define.ContainerDevices {
111
	var result define.ContainerDevices
112
	if g.Config != nil && g.Config.Linux != nil {
113
		for _, device := range g.Config.Linux.Devices {
114
			var bDevice define.BuildahDevice
115
			bDevice.Path = device.Path
116
			switch device.Type {
117
			case "b":
118
				bDevice.Type = 'b'
119
			case "c":
120
				bDevice.Type = 'c'
121
			case "u":
122
				bDevice.Type = 'u'
123
			case "p":
124
				bDevice.Type = 'p'
125
			}
126
			bDevice.Major = device.Major
127
			bDevice.Minor = device.Minor
128
			if device.FileMode != nil {
129
				bDevice.FileMode = *device.FileMode
130
			}
131
			if device.UID != nil {
132
				bDevice.Uid = *device.UID
133
			}
134
			if device.GID != nil {
135
				bDevice.Gid = *device.GID
136
			}
137
			bDevice.Source = device.Path
138
			bDevice.Destination = device.Path
139
			result = append(result, bDevice)
140
		}
141
	}
142
	g.ClearLinuxDevices()
143
	return result
144
}
145

146
// Run runs the specified command in the container's root filesystem.
147
func (b *Builder) Run(command []string, options RunOptions) error {
148
	p, err := os.MkdirTemp(tmpdir.GetTempDir(), define.Package)
149
	if err != nil {
150
		return err
151
	}
152
	// On some hosts like AH, /tmp is a symlink and we need an
153
	// absolute path.
154
	path, err := filepath.EvalSymlinks(p)
155
	if err != nil {
156
		return err
157
	}
158
	logrus.Debugf("using %q to hold bundle data", path)
159
	defer func() {
160
		if err2 := os.RemoveAll(path); err2 != nil {
161
			options.Logger.Error(err2)
162
		}
163
	}()
164

165
	gp, err := generate.New("linux")
166
	if err != nil {
167
		return fmt.Errorf("generating new 'linux' runtime spec: %w", err)
168
	}
169
	g := &gp
170

171
	isolation := options.Isolation
172
	if isolation == define.IsolationDefault {
173
		isolation = b.Isolation
174
		if isolation == define.IsolationDefault {
175
			isolation, err = parse.IsolationOption("")
176
			if err != nil {
177
				logrus.Debugf("got %v while trying to determine default isolation, guessing OCI", err)
178
				isolation = IsolationOCI
179
			} else if isolation == IsolationDefault {
180
				isolation = IsolationOCI
181
			}
182
		}
183
	}
184
	if err := checkAndOverrideIsolationOptions(isolation, &options); err != nil {
185
		return err
186
	}
187

188
	// hardwire the environment to match docker build to avoid subtle and hard-to-debug differences due to containers.conf
189
	b.configureEnvironment(g, options, []string{"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"})
190

191
	if b.CommonBuildOpts == nil {
192
		return fmt.Errorf("invalid format on container you must recreate the container")
193
	}
194

195
	if err := addCommonOptsToSpec(b.CommonBuildOpts, g); err != nil {
196
		return err
197
	}
198

199
	workDir := b.WorkDir()
200
	if options.WorkingDir != "" {
201
		g.SetProcessCwd(options.WorkingDir)
202
		workDir = options.WorkingDir
203
	} else if b.WorkDir() != "" {
204
		g.SetProcessCwd(b.WorkDir())
205
	}
206
	setupSelinux(g, b.ProcessLabel, b.MountLabel)
207
	mountPoint, err := b.Mount(b.MountLabel)
208
	if err != nil {
209
		return fmt.Errorf("mounting container %q: %w", b.ContainerID, err)
210
	}
211
	defer func() {
212
		if err := b.Unmount(); err != nil {
213
			options.Logger.Errorf("error unmounting container: %v", err)
214
		}
215
	}()
216
	g.SetRootPath(mountPoint)
217
	if len(command) > 0 {
218
		command = runLookupPath(g, command)
219
		g.SetProcessArgs(command)
220
	} else {
221
		g.SetProcessArgs(nil)
222
	}
223

224
	// Combine the working container's set of devices with the ones for just this run.
225
	deviceSpecs := append(append([]string{}, options.DeviceSpecs...), b.DeviceSpecs...)
226
	deviceSpecs, err = b.cdiSetupDevicesInSpec(deviceSpecs, options.CDIConfigDir, g.Config) // makes changes to more than just the device list
227
	if err != nil {
228
		return err
229
	}
230
	devices := separateDevicesFromRuntimeSpec(g)
231
	for _, deviceSpec := range deviceSpecs {
232
		device, err := parse.DeviceFromPath(deviceSpec)
233
		if err != nil {
234
			return fmt.Errorf("setting up device %q: %w", deviceSpec, err)
235
		}
236
		devices = append(devices, device...)
237
	}
238
	devices = append(append(devices, options.Devices...), b.Devices...)
239

240
	// Mount devices, if any, and if we're rootless attempt to work around not
241
	// being able to create device nodes by bind-mounting them from the host, like podman does.
242
	if unshare.IsRootless() {
243
		// We are going to create bind mounts for devices
244
		// but we need to make sure that we don't override
245
		// anything which is already in OCI spec.
246
		mounts := make(map[string]interface{})
247
		for _, m := range g.Mounts() {
248
			mounts[m.Destination] = true
249
		}
250
		newMounts := []specs.Mount{}
251
		for _, d := range devices {
252
			// Default permission is read-only.
253
			perm := "ro"
254
			// Get permission configured for this device but only process `write`
255
			// permission in rootless since `mknod` is not supported anyways.
256
			if strings.Contains(string(d.Rule.Permissions), "w") {
257
				perm = "rw"
258
			}
259
			devMnt := specs.Mount{
260
				Destination: d.Destination,
261
				Type:        parse.TypeBind,
262
				Source:      d.Source,
263
				Options:     []string{"slave", "nosuid", "noexec", perm, "rbind"},
264
			}
265
			// Podman parity: podman skips these two devices hence we do the same.
266
			if d.Path == "/dev/ptmx" || strings.HasPrefix(d.Path, "/dev/tty") {
267
				continue
268
			}
269
			// Device is already in OCI spec do not re-mount.
270
			if _, found := mounts[d.Path]; found {
271
				continue
272
			}
273
			newMounts = append(newMounts, devMnt)
274
		}
275
		g.Config.Mounts = append(newMounts, g.Config.Mounts...)
276
	} else {
277
		for _, d := range devices {
278
			sDev := specs.LinuxDevice{
279
				Type:     string(d.Type),
280
				Path:     d.Path,
281
				Major:    d.Major,
282
				Minor:    d.Minor,
283
				FileMode: &d.FileMode,
284
				UID:      &d.Uid,
285
				GID:      &d.Gid,
286
			}
287
			g.AddDevice(sDev)
288
			g.AddLinuxResourcesDevice(true, string(d.Type), &d.Major, &d.Minor, string(d.Permissions))
289
		}
290
	}
291

292
	setupMaskedPaths(g)
293
	setupReadOnlyPaths(g)
294

295
	setupTerminal(g, options.Terminal, options.TerminalSize)
296

297
	configureNetwork, networkString, err := b.configureNamespaces(g, &options)
298
	if err != nil {
299
		return err
300
	}
301

302
	homeDir, err := b.configureUIDGID(g, mountPoint, options)
303
	if err != nil {
304
		return err
305
	}
306

307
	g.SetProcessNoNewPrivileges(b.CommonBuildOpts.NoNewPrivileges)
308

309
	g.SetProcessApparmorProfile(b.CommonBuildOpts.ApparmorProfile)
310

311
	// Now grab the spec from the generator.  Set the generator to nil so that future contributors
312
	// will quickly be able to tell that they're supposed to be modifying the spec directly from here.
313
	spec := g.Config
314
	g = nil
315

316
	// Set the seccomp configuration using the specified profile name.  Some syscalls are
317
	// allowed if certain capabilities are to be granted (example: CAP_SYS_CHROOT and chroot),
318
	// so we sorted out the capabilities lists first.
319
	if err = setupSeccomp(spec, b.CommonBuildOpts.SeccompProfilePath); err != nil {
320
		return err
321
	}
322

323
	uid, gid := spec.Process.User.UID, spec.Process.User.GID
324
	if spec.Linux != nil {
325
		uid, gid, err = util.GetHostIDs(spec.Linux.UIDMappings, spec.Linux.GIDMappings, uid, gid)
326
		if err != nil {
327
			return err
328
		}
329
	}
330

331
	idPair := &idtools.IDPair{UID: int(uid), GID: int(gid)}
332

333
	mode := os.FileMode(0755)
334
	coptions := copier.MkdirOptions{
335
		ChownNew: idPair,
336
		ChmodNew: &mode,
337
	}
338
	if err := copier.Mkdir(mountPoint, filepath.Join(mountPoint, spec.Process.Cwd), coptions); err != nil {
339
		return err
340
	}
341

342
	bindFiles := make(map[string]string)
343
	volumes := b.Volumes()
344

345
	// Figure out who owns files that will appear to be owned by UID/GID 0 in the container.
346
	rootUID, rootGID, err := util.GetHostRootIDs(spec)
347
	if err != nil {
348
		return err
349
	}
350
	rootIDPair := &idtools.IDPair{UID: int(rootUID), GID: int(rootGID)}
351

352
	hostsFile := ""
353
	if !options.NoHosts && !slices.Contains(volumes, config.DefaultHostsFile) && options.ConfigureNetwork != define.NetworkDisabled {
354
		hostsFile, err = b.createHostsFile(path, rootIDPair)
355
		if err != nil {
356
			return err
357
		}
358
		bindFiles[config.DefaultHostsFile] = hostsFile
359

360
		// Only add entries here if we do not have to do setup network,
361
		// if we do we have to do it much later after the network setup.
362
		if !configureNetwork {
363
			var entries etchosts.HostEntries
364
			isHost := true
365
			if spec.Linux != nil {
366
				for _, ns := range spec.Linux.Namespaces {
367
					if ns.Type == specs.NetworkNamespace {
368
						isHost = false
369
						break
370
					}
371
				}
372
			}
373
			// add host entry for local ip when running in host network
374
			if spec.Hostname != "" && isHost {
375
				ip := netUtil.GetLocalIP()
376
				if ip != "" {
377
					entries = append(entries, etchosts.HostEntry{
378
						Names: []string{spec.Hostname},
379
						IP:    ip,
380
					})
381
				}
382
			}
383
			err = b.addHostsEntries(hostsFile, mountPoint, entries, nil)
384
			if err != nil {
385
				return err
386
			}
387
		}
388
	}
389

390
	if !options.NoHostname && !(slices.Contains(volumes, "/etc/hostname")) {
391
		hostnameFile, err := b.generateHostname(path, spec.Hostname, rootIDPair)
392
		if err != nil {
393
			return err
394
		}
395
		// Bind /etc/hostname
396
		bindFiles["/etc/hostname"] = hostnameFile
397
	}
398

399
	resolvFile := ""
400
	if !slices.Contains(volumes, resolvconf.DefaultResolvConf) && options.ConfigureNetwork != define.NetworkDisabled && !(len(b.CommonBuildOpts.DNSServers) == 1 && strings.ToLower(b.CommonBuildOpts.DNSServers[0]) == "none") {
401
		resolvFile, err = b.createResolvConf(path, rootIDPair)
402
		if err != nil {
403
			return err
404
		}
405
		bindFiles[resolvconf.DefaultResolvConf] = resolvFile
406

407
		// Only add entries here if we do not have to do setup network,
408
		// if we do we have to do it much later after the network setup.
409
		if !configureNetwork {
410
			err = b.addResolvConfEntries(resolvFile, nil, spec.Linux.Namespaces, false, true)
411
			if err != nil {
412
				return err
413
			}
414
		}
415
	}
416
	// Empty file, so no need to recreate if it exists
417
	if _, ok := bindFiles["/run/.containerenv"]; !ok {
418
		containerenvPath := filepath.Join(path, "/run/.containerenv")
419
		if err = os.MkdirAll(filepath.Dir(containerenvPath), 0755); err != nil {
420
			return err
421
		}
422

423
		rootless := 0
424
		if unshare.IsRootless() {
425
			rootless = 1
426
		}
427
		// Populate the .containerenv with container information
428
		containerenv := fmt.Sprintf(`
429
engine="buildah-%s"
430
name=%q
431
id=%q
432
image=%q
433
imageid=%q
434
rootless=%d
435
`, define.Version, b.Container, b.ContainerID, b.FromImage, b.FromImageID, rootless)
436

437
		if err = ioutils.AtomicWriteFile(containerenvPath, []byte(containerenv), 0755); err != nil {
438
			return err
439
		}
440
		if err := relabel(containerenvPath, b.MountLabel, false); err != nil {
441
			return err
442
		}
443

444
		bindFiles["/run/.containerenv"] = containerenvPath
445
	}
446

447
	// Setup OCI hooks
448
	_, err = b.setupOCIHooks(spec, (len(options.Mounts) > 0 || len(volumes) > 0))
449
	if err != nil {
450
		return fmt.Errorf("unable to setup OCI hooks: %w", err)
451
	}
452

453
	runMountInfo := runMountInfo{
454
		WorkDir:          workDir,
455
		ContextDir:       options.ContextDir,
456
		Secrets:          options.Secrets,
457
		SSHSources:       options.SSHSources,
458
		StageMountPoints: options.StageMountPoints,
459
		SystemContext:    options.SystemContext,
460
	}
461

462
	runArtifacts, err := b.setupMounts(mountPoint, spec, path, options.Mounts, bindFiles, volumes, b.CommonBuildOpts.Volumes, options.RunMounts, runMountInfo)
463
	if err != nil {
464
		return fmt.Errorf("resolving mountpoints for container %q: %w", b.ContainerID, err)
465
	}
466
	if runArtifacts.SSHAuthSock != "" {
467
		sshenv := "SSH_AUTH_SOCK=" + runArtifacts.SSHAuthSock
468
		spec.Process.Env = append(spec.Process.Env, sshenv)
469
	}
470

471
	// following run was called from `buildah run`
472
	// and some images were mounted for this run
473
	// add them to cleanup artifacts
474
	if len(options.ExternalImageMounts) > 0 {
475
		runArtifacts.MountedImages = append(runArtifacts.MountedImages, options.ExternalImageMounts...)
476
	}
477

478
	defer func() {
479
		if err := b.cleanupRunMounts(options.SystemContext, mountPoint, runArtifacts); err != nil {
480
			options.Logger.Errorf("unable to cleanup run mounts %v", err)
481
		}
482
	}()
483

484
	defer b.cleanupTempVolumes()
485

486
	switch isolation {
487
	case define.IsolationOCI:
488
		var moreCreateArgs []string
489
		if options.NoPivot {
490
			moreCreateArgs = append(moreCreateArgs, "--no-pivot")
491
		}
492
		err = b.runUsingRuntimeSubproc(isolation, options, configureNetwork, networkString, moreCreateArgs, spec,
493
			mountPoint, path, define.Package+"-"+filepath.Base(path), b.Container, hostsFile, resolvFile)
494
	case IsolationChroot:
495
		err = chroot.RunUsingChroot(spec, path, homeDir, options.Stdin, options.Stdout, options.Stderr)
496
	case IsolationOCIRootless:
497
		moreCreateArgs := []string{"--no-new-keyring"}
498
		if options.NoPivot {
499
			moreCreateArgs = append(moreCreateArgs, "--no-pivot")
500
		}
501
		err = b.runUsingRuntimeSubproc(isolation, options, configureNetwork, networkString, moreCreateArgs, spec,
502
			mountPoint, path, define.Package+"-"+filepath.Base(path), b.Container, hostsFile, resolvFile)
503
	default:
504
		err = errors.New("don't know how to run this command")
505
	}
506
	return err
507
}
508

509
func (b *Builder) setupOCIHooks(config *specs.Spec, hasVolumes bool) (map[string][]specs.Hook, error) {
510
	allHooks := make(map[string][]specs.Hook)
511
	if len(b.CommonBuildOpts.OCIHooksDir) == 0 {
512
		if unshare.IsRootless() {
513
			return nil, nil
514
		}
515
		for _, hDir := range []string{hooks.DefaultDir, hooks.OverrideDir} {
516
			manager, err := hooks.New(context.Background(), []string{hDir}, []string{})
517
			if err != nil {
518
				if errors.Is(err, os.ErrNotExist) {
519
					continue
520
				}
521
				return nil, err
522
			}
523
			ociHooks, err := manager.Hooks(config, b.ImageAnnotations, hasVolumes)
524
			if err != nil {
525
				return nil, err
526
			}
527
			if len(ociHooks) > 0 || config.Hooks != nil {
528
				logrus.Warnf("Implicit hook directories are deprecated; set --hooks-dir=%q explicitly to continue to load ociHooks from this directory", hDir)
529
			}
530
			for i, hook := range ociHooks {
531
				allHooks[i] = hook
532
			}
533
		}
534
	} else {
535
		manager, err := hooks.New(context.Background(), b.CommonBuildOpts.OCIHooksDir, []string{})
536
		if err != nil {
537
			return nil, err
538
		}
539

540
		allHooks, err = manager.Hooks(config, b.ImageAnnotations, hasVolumes)
541
		if err != nil {
542
			return nil, err
543
		}
544
	}
545

546
	hookErr, err := hooksExec.RuntimeConfigFilter(context.Background(), allHooks["precreate"], config, hooksExec.DefaultPostKillTimeout) //nolint:staticcheck
547
	if err != nil {
548
		logrus.Warnf("Container: precreate hook: %v", err)
549
		if hookErr != nil && hookErr != err {
550
			logrus.Debugf("container: precreate hook (hook error): %v", hookErr)
551
		}
552
		return nil, err
553
	}
554
	return allHooks, nil
555
}
556

557
func addCommonOptsToSpec(commonOpts *define.CommonBuildOptions, g *generate.Generator) error {
558
	// Resources - CPU
559
	if commonOpts.CPUPeriod != 0 {
560
		g.SetLinuxResourcesCPUPeriod(commonOpts.CPUPeriod)
561
	}
562
	if commonOpts.CPUQuota != 0 {
563
		g.SetLinuxResourcesCPUQuota(commonOpts.CPUQuota)
564
	}
565
	if commonOpts.CPUShares != 0 {
566
		g.SetLinuxResourcesCPUShares(commonOpts.CPUShares)
567
	}
568
	if commonOpts.CPUSetCPUs != "" {
569
		g.SetLinuxResourcesCPUCpus(commonOpts.CPUSetCPUs)
570
	}
571
	if commonOpts.CPUSetMems != "" {
572
		g.SetLinuxResourcesCPUMems(commonOpts.CPUSetMems)
573
	}
574

575
	// Resources - Memory
576
	if commonOpts.Memory != 0 {
577
		g.SetLinuxResourcesMemoryLimit(commonOpts.Memory)
578
	}
579
	if commonOpts.MemorySwap != 0 {
580
		g.SetLinuxResourcesMemorySwap(commonOpts.MemorySwap)
581
	}
582

583
	// cgroup membership
584
	if commonOpts.CgroupParent != "" {
585
		g.SetLinuxCgroupsPath(commonOpts.CgroupParent)
586
	}
587

588
	defaultContainerConfig, err := config.Default()
589
	if err != nil {
590
		return fmt.Errorf("failed to get container config: %w", err)
591
	}
592
	// Other process resource limits
593
	if err := addRlimits(commonOpts.Ulimit, g, defaultContainerConfig.Containers.DefaultUlimits.Get()); err != nil {
594
		return err
595
	}
596

597
	logrus.Debugf("Resources: %#v", commonOpts)
598
	return nil
599
}
600

601
func setupSlirp4netnsNetwork(config *config.Config, netns, cid string, options, hostnames []string) (func(), *netResult, error) {
602
	// we need the TmpDir for the slirp4netns code
603
	if err := os.MkdirAll(config.Engine.TmpDir, 0o751); err != nil {
604
		return nil, nil, fmt.Errorf("failed to create tempdir: %w", err)
605
	}
606
	res, err := slirp4netns.Setup(&slirp4netns.SetupOptions{
607
		Config:       config,
608
		ContainerID:  cid,
609
		Netns:        netns,
610
		ExtraOptions: options,
611
		Pdeathsig:    syscall.SIGKILL,
612
	})
613
	if err != nil {
614
		return nil, nil, err
615
	}
616

617
	ip, err := slirp4netns.GetIP(res.Subnet)
618
	if err != nil {
619
		return nil, nil, fmt.Errorf("get slirp4netns ip: %w", err)
620
	}
621

622
	dns, err := slirp4netns.GetDNS(res.Subnet)
623
	if err != nil {
624
		return nil, nil, fmt.Errorf("get slirp4netns dns ip: %w", err)
625
	}
626

627
	result := &netResult{
628
		entries:           etchosts.HostEntries{{IP: ip.String(), Names: hostnames}},
629
		dnsServers:        []string{dns.String()},
630
		ipv6:              res.IPv6,
631
		keepHostResolvers: true,
632
	}
633

634
	return func() {
635
		syscall.Kill(res.Pid, syscall.SIGKILL) // nolint:errcheck
636
		var status syscall.WaitStatus
637
		syscall.Wait4(res.Pid, &status, 0, nil) // nolint:errcheck
638
	}, result, nil
639
}
640

641
func setupPasta(config *config.Config, netns string, options, hostnames []string) (func(), *netResult, error) {
642
	res, err := pasta.Setup2(&pasta.SetupOptions{
643
		Config:       config,
644
		Netns:        netns,
645
		ExtraOptions: options,
646
	})
647
	if err != nil {
648
		return nil, nil, err
649
	}
650

651
	var entries etchosts.HostEntries
652
	if len(res.IPAddresses) > 0 {
653
		entries = etchosts.HostEntries{{IP: res.IPAddresses[0].String(), Names: hostnames}}
654
	}
655

656
	result := &netResult{
657
		entries:           entries,
658
		dnsServers:        res.DNSForwardIPs,
659
		excludeIPs:        res.IPAddresses,
660
		ipv6:              res.IPv6,
661
		keepHostResolvers: true,
662
	}
663

664
	return nil, result, nil
665
}
666

667
func (b *Builder) runConfigureNetwork(pid int, isolation define.Isolation, options RunOptions, network, containerName string, hostnames []string) (func(), *netResult, error) {
668
	netns := fmt.Sprintf("/proc/%d/ns/net", pid)
669
	var configureNetworks []string
670
	defConfig, err := config.Default()
671
	if err != nil {
672
		return nil, nil, fmt.Errorf("failed to get container config: %w", err)
673
	}
674

675
	name, networkOpts, hasOpts := strings.Cut(network, ":")
676
	var netOpts []string
677
	if hasOpts {
678
		netOpts = strings.Split(networkOpts, ",")
679
	}
680
	if isolation == IsolationOCIRootless && name == "" {
681
		switch defConfig.Network.DefaultRootlessNetworkCmd {
682
		case slirp4netns.BinaryName, "":
683
			name = slirp4netns.BinaryName
684
		case pasta.BinaryName:
685
			name = pasta.BinaryName
686
		default:
687
			return nil, nil, fmt.Errorf("invalid default_rootless_network_cmd option %q",
688
				defConfig.Network.DefaultRootlessNetworkCmd)
689
		}
690
	}
691

692
	switch {
693
	case name == slirp4netns.BinaryName:
694
		return setupSlirp4netnsNetwork(defConfig, netns, containerName, netOpts, hostnames)
695
	case name == pasta.BinaryName:
696
		return setupPasta(defConfig, netns, netOpts, hostnames)
697

698
	// Basically default case except we make sure to not split an empty
699
	// name as this would return a slice with one empty string which is
700
	// not a valid network name.
701
	case len(network) > 0:
702
		// old syntax allow comma separated network names
703
		configureNetworks = strings.Split(network, ",")
704
	}
705

706
	if isolation == IsolationOCIRootless {
707
		return nil, nil, errors.New("cannot use networks as rootless")
708
	}
709

710
	if len(configureNetworks) == 0 {
711
		configureNetworks = []string{b.NetworkInterface.DefaultNetworkName()}
712
	}
713

714
	// Make sure we can access the container's network namespace,
715
	// even after it exits, to successfully tear down the
716
	// interfaces.  Ensure this by opening a handle to the network
717
	// namespace, and using our copy to both configure and
718
	// deconfigure it.
719
	netFD, err := unix.Open(netns, unix.O_RDONLY, 0)
720
	if err != nil {
721
		return nil, nil, fmt.Errorf("opening network namespace: %w", err)
722
	}
723
	mynetns := fmt.Sprintf("/proc/%d/fd/%d", unix.Getpid(), netFD)
724

725
	networks := make(map[string]nettypes.PerNetworkOptions, len(configureNetworks))
726
	for i, network := range configureNetworks {
727
		networks[network] = nettypes.PerNetworkOptions{
728
			InterfaceName: fmt.Sprintf("eth%d", i),
729
		}
730
	}
731

732
	opts := nettypes.NetworkOptions{
733
		ContainerID:   containerName,
734
		ContainerName: containerName,
735
		Networks:      networks,
736
	}
737
	netStatus, err := b.NetworkInterface.Setup(mynetns, nettypes.SetupOptions{NetworkOptions: opts})
738
	if err != nil {
739
		return nil, nil, err
740
	}
741

742
	teardown := func() {
743
		err := b.NetworkInterface.Teardown(mynetns, nettypes.TeardownOptions{NetworkOptions: opts})
744
		if err != nil {
745
			options.Logger.Errorf("failed to cleanup network: %v", err)
746
		}
747
	}
748

749
	return teardown, netStatusToNetResult(netStatus, hostnames), nil
750
}
751

752
// Create pipes to use for relaying stdio.
753
func runMakeStdioPipe(uid, gid int) ([][]int, error) {
754
	stdioPipe := make([][]int, 3)
755
	for i := range stdioPipe {
756
		stdioPipe[i] = make([]int, 2)
757
		if err := unix.Pipe(stdioPipe[i]); err != nil {
758
			return nil, fmt.Errorf("creating pipe for container FD %d: %w", i, err)
759
		}
760
	}
761
	if err := unix.Fchown(stdioPipe[unix.Stdin][0], uid, gid); err != nil {
762
		return nil, fmt.Errorf("setting owner of stdin pipe descriptor: %w", err)
763
	}
764
	if err := unix.Fchown(stdioPipe[unix.Stdout][1], uid, gid); err != nil {
765
		return nil, fmt.Errorf("setting owner of stdout pipe descriptor: %w", err)
766
	}
767
	if err := unix.Fchown(stdioPipe[unix.Stderr][1], uid, gid); err != nil {
768
		return nil, fmt.Errorf("setting owner of stderr pipe descriptor: %w", err)
769
	}
770
	return stdioPipe, nil
771
}
772

773
func setupNamespaces(logger *logrus.Logger, g *generate.Generator, namespaceOptions define.NamespaceOptions, idmapOptions define.IDMappingOptions, policy define.NetworkConfigurationPolicy) (configureNetwork bool, networkString string, configureUTS bool, err error) {
774
	defaultContainerConfig, err := config.Default()
775
	if err != nil {
776
		return false, "", false, fmt.Errorf("failed to get container config: %w", err)
777
	}
778

779
	addSysctl := func(prefixes []string) error {
780
		for _, sysctl := range defaultContainerConfig.Sysctls() {
781
			splitn := strings.SplitN(sysctl, "=", 2)
782
			if len(splitn) > 2 {
783
				return fmt.Errorf("sysctl %q defined in containers.conf must be formatted name=value", sysctl)
784
			}
785
			for _, prefix := range prefixes {
786
				if strings.HasPrefix(splitn[0], prefix) {
787
					g.AddLinuxSysctl(splitn[0], splitn[1])
788
				}
789
			}
790
		}
791
		return nil
792
	}
793

794
	// Set namespace options in the container configuration.
795
	configureUserns := false
796
	specifiedNetwork := false
797
	for _, namespaceOption := range namespaceOptions {
798
		switch namespaceOption.Name {
799
		case string(specs.IPCNamespace):
800
			if !namespaceOption.Host {
801
				if err := addSysctl([]string{"fs.mqueue"}); err != nil {
802
					return false, "", false, err
803
				}
804
			}
805
		case string(specs.UserNamespace):
806
			configureUserns = false
807
			if !namespaceOption.Host && namespaceOption.Path == "" {
808
				configureUserns = true
809
			}
810
		case string(specs.NetworkNamespace):
811
			specifiedNetwork = true
812
			configureNetwork = false
813
			if !namespaceOption.Host && (namespaceOption.Path == "" || !filepath.IsAbs(namespaceOption.Path)) {
814
				if namespaceOption.Path != "" && !filepath.IsAbs(namespaceOption.Path) {
815
					networkString = namespaceOption.Path
816
					namespaceOption.Path = ""
817
				}
818
				configureNetwork = (policy != define.NetworkDisabled)
819
			}
820
		case string(specs.UTSNamespace):
821
			configureUTS = false
822
			if !namespaceOption.Host {
823
				if namespaceOption.Path == "" {
824
					configureUTS = true
825
				}
826
				if err := addSysctl([]string{"kernel.hostname", "kernel.domainame"}); err != nil {
827
					return false, "", false, err
828
				}
829
			}
830
		}
831
		if namespaceOption.Host {
832
			if err := g.RemoveLinuxNamespace(namespaceOption.Name); err != nil {
833
				return false, "", false, fmt.Errorf("removing %q namespace for run: %w", namespaceOption.Name, err)
834
			}
835
		} else if err := g.AddOrReplaceLinuxNamespace(namespaceOption.Name, namespaceOption.Path); err != nil {
836
			if namespaceOption.Path == "" {
837
				return false, "", false, fmt.Errorf("adding new %q namespace for run: %w", namespaceOption.Name, err)
838
			}
839
			return false, "", false, fmt.Errorf("adding %q namespace %q for run: %w", namespaceOption.Name, namespaceOption.Path, err)
840
		}
841
	}
842

843
	// If we've got mappings, we're going to have to create a user namespace.
844
	if len(idmapOptions.UIDMap) > 0 || len(idmapOptions.GIDMap) > 0 || configureUserns {
845
		if err := g.AddOrReplaceLinuxNamespace(string(specs.UserNamespace), ""); err != nil {
846
			return false, "", false, fmt.Errorf("adding new %q namespace for run: %w", string(specs.UserNamespace), err)
847
		}
848
		hostUidmap, hostGidmap, err := unshare.GetHostIDMappings("")
849
		if err != nil {
850
			return false, "", false, err
851
		}
852
		for _, m := range idmapOptions.UIDMap {
853
			g.AddLinuxUIDMapping(m.HostID, m.ContainerID, m.Size)
854
		}
855
		if len(idmapOptions.UIDMap) == 0 {
856
			for _, m := range hostUidmap {
857
				g.AddLinuxUIDMapping(m.ContainerID, m.ContainerID, m.Size)
858
			}
859
		}
860
		for _, m := range idmapOptions.GIDMap {
861
			g.AddLinuxGIDMapping(m.HostID, m.ContainerID, m.Size)
862
		}
863
		if len(idmapOptions.GIDMap) == 0 {
864
			for _, m := range hostGidmap {
865
				g.AddLinuxGIDMapping(m.ContainerID, m.ContainerID, m.Size)
866
			}
867
		}
868
		if !specifiedNetwork {
869
			if err := g.AddOrReplaceLinuxNamespace(string(specs.NetworkNamespace), ""); err != nil {
870
				return false, "", false, fmt.Errorf("adding new %q namespace for run: %w", string(specs.NetworkNamespace), err)
871
			}
872
			configureNetwork = (policy != define.NetworkDisabled)
873
		}
874
	} else {
875
		if err := g.RemoveLinuxNamespace(string(specs.UserNamespace)); err != nil {
876
			return false, "", false, fmt.Errorf("removing %q namespace for run: %w", string(specs.UserNamespace), err)
877
		}
878
		if !specifiedNetwork {
879
			if err := g.RemoveLinuxNamespace(string(specs.NetworkNamespace)); err != nil {
880
				return false, "", false, fmt.Errorf("removing %q namespace for run: %w", string(specs.NetworkNamespace), err)
881
			}
882
		}
883
	}
884
	if configureNetwork {
885
		if err := addSysctl([]string{"net"}); err != nil {
886
			return false, "", false, err
887
		}
888
	}
889
	return configureNetwork, networkString, configureUTS, nil
890
}
891

892
func (b *Builder) configureNamespaces(g *generate.Generator, options *RunOptions) (bool, string, error) {
893
	defaultNamespaceOptions, err := DefaultNamespaceOptions()
894
	if err != nil {
895
		return false, "", err
896
	}
897

898
	namespaceOptions := defaultNamespaceOptions
899
	namespaceOptions.AddOrReplace(b.NamespaceOptions...)
900
	namespaceOptions.AddOrReplace(options.NamespaceOptions...)
901

902
	networkPolicy := options.ConfigureNetwork
903
	//Nothing was specified explicitly so network policy should be inherited from builder
904
	if networkPolicy == NetworkDefault {
905
		networkPolicy = b.ConfigureNetwork
906

907
		// If builder policy was NetworkDisabled and
908
		// we want to disable network for this run.
909
		// reset options.ConfigureNetwork to NetworkDisabled
910
		// since it will be treated as source of truth later.
911
		if networkPolicy == NetworkDisabled {
912
			options.ConfigureNetwork = networkPolicy
913
		}
914
	}
915
	if networkPolicy == NetworkDisabled {
916
		namespaceOptions.AddOrReplace(define.NamespaceOptions{{Name: string(specs.NetworkNamespace), Host: false}}...)
917
	}
918
	configureNetwork, networkString, configureUTS, err := setupNamespaces(options.Logger, g, namespaceOptions, b.IDMappingOptions, networkPolicy)
919
	if err != nil {
920
		return false, "", err
921
	}
922

923
	if configureUTS {
924
		if options.Hostname != "" {
925
			g.SetHostname(options.Hostname)
926
		} else if b.Hostname() != "" {
927
			g.SetHostname(b.Hostname())
928
		} else {
929
			g.SetHostname(stringid.TruncateID(b.ContainerID))
930
		}
931
	} else {
932
		g.SetHostname("")
933
	}
934

935
	found := false
936
	spec := g.Config
937
	for i := range spec.Process.Env {
938
		if strings.HasPrefix(spec.Process.Env[i], "HOSTNAME=") {
939
			found = true
940
			break
941
		}
942
	}
943
	if !found {
944
		spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("HOSTNAME=%s", spec.Hostname))
945
	}
946

947
	return configureNetwork, networkString, nil
948
}
949

950
func runSetupBoundFiles(bundlePath string, bindFiles map[string]string) (mounts []specs.Mount) {
951
	for dest, src := range bindFiles {
952
		options := []string{"rbind"}
953
		if strings.HasPrefix(src, bundlePath) {
954
			options = append(options, bind.NoBindOption)
955
		}
956
		mounts = append(mounts, specs.Mount{
957
			Source:      src,
958
			Destination: dest,
959
			Type:        "bind",
960
			Options:     options,
961
		})
962
	}
963
	return mounts
964
}
965

966
func addRlimits(ulimit []string, g *generate.Generator, defaultUlimits []string) error {
967
	var (
968
		ul  *units.Ulimit
969
		err error
970
		// setup rlimits
971
		nofileSet bool
972
		nprocSet  bool
973
	)
974

975
	ulimit = append(defaultUlimits, ulimit...)
976
	for _, u := range ulimit {
977
		if ul, err = butil.ParseUlimit(u); err != nil {
978
			return fmt.Errorf("ulimit option %q requires name=SOFT:HARD, failed to be parsed: %w", u, err)
979
		}
980

981
		if strings.ToUpper(ul.Name) == "NOFILE" {
982
			nofileSet = true
983
		}
984
		if strings.ToUpper(ul.Name) == "NPROC" {
985
			nprocSet = true
986
		}
987
		g.AddProcessRlimits("RLIMIT_"+strings.ToUpper(ul.Name), uint64(ul.Hard), uint64(ul.Soft))
988
	}
989
	if !nofileSet {
990
		max := define.RLimitDefaultValue
991
		var rlimit unix.Rlimit
992
		if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err == nil {
993
			if max < rlimit.Max || unshare.IsRootless() {
994
				max = rlimit.Max
995
			}
996
		} else {
997
			logrus.Warnf("Failed to return RLIMIT_NOFILE ulimit %q", err)
998
		}
999
		g.AddProcessRlimits("RLIMIT_NOFILE", max, max)
1000
	}
1001
	if !nprocSet {
1002
		max := define.RLimitDefaultValue
1003
		var rlimit unix.Rlimit
1004
		if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err == nil {
1005
			if max < rlimit.Max || unshare.IsRootless() {
1006
				max = rlimit.Max
1007
			}
1008
		} else {
1009
			logrus.Warnf("Failed to return RLIMIT_NPROC ulimit %q", err)
1010
		}
1011
		g.AddProcessRlimits("RLIMIT_NPROC", max, max)
1012
	}
1013

1014
	return nil
1015
}
1016

1017
func (b *Builder) runSetupVolumeMounts(mountLabel string, volumeMounts []string, optionMounts []specs.Mount, idMaps IDMaps) (mounts []specs.Mount, Err error) {
1018
	// Make sure the overlay directory is clean before running
1019
	containerDir, err := b.store.ContainerDirectory(b.ContainerID)
1020
	if err != nil {
1021
		return nil, fmt.Errorf("looking up container directory for %s: %w", b.ContainerID, err)
1022
	}
1023
	if err := overlay.CleanupContent(containerDir); err != nil {
1024
		return nil, fmt.Errorf("cleaning up overlay content for %s: %w", b.ContainerID, err)
1025
	}
1026

1027
	parseMount := func(mountType, host, container string, options []string) (specs.Mount, error) {
1028
		var foundrw, foundro, foundz, foundZ, foundO, foundU bool
1029
		var rootProp, upperDir, workDir string
1030
		for _, opt := range options {
1031
			switch opt {
1032
			case "rw":
1033
				foundrw = true
1034
			case "ro":
1035
				foundro = true
1036
			case "z":
1037
				foundz = true
1038
			case "Z":
1039
				foundZ = true
1040
			case "O":
1041
				foundO = true
1042
			case "U":
1043
				foundU = true
1044
			case "private", "rprivate", "slave", "rslave", "shared", "rshared":
1045
				rootProp = opt
1046
			}
1047

1048
			if strings.HasPrefix(opt, "upperdir") {
1049
				splitOpt := strings.SplitN(opt, "=", 2)
1050
				if len(splitOpt) > 1 {
1051
					upperDir = splitOpt[1]
1052
				}
1053
			}
1054
			if strings.HasPrefix(opt, "workdir") {
1055
				splitOpt := strings.SplitN(opt, "=", 2)
1056
				if len(splitOpt) > 1 {
1057
					workDir = splitOpt[1]
1058
				}
1059
			}
1060
		}
1061
		if !foundrw && !foundro {
1062
			options = append(options, "rw")
1063
		}
1064
		if foundz {
1065
			if err := relabel(host, mountLabel, true); err != nil {
1066
				return specs.Mount{}, err
1067
			}
1068
		}
1069
		if foundZ {
1070
			if err := relabel(host, mountLabel, false); err != nil {
1071
				return specs.Mount{}, err
1072
			}
1073
		}
1074
		if foundU {
1075
			if err := chown.ChangeHostPathOwnership(host, true, idMaps.processUID, idMaps.processGID); err != nil {
1076
				return specs.Mount{}, err
1077
			}
1078
		}
1079
		if foundO {
1080
			if (upperDir != "" && workDir == "") || (workDir != "" && upperDir == "") {
1081
				return specs.Mount{}, errors.New("if specifying upperdir then workdir must be specified or vice versa")
1082
			}
1083

1084
			containerDir, err := b.store.ContainerDirectory(b.ContainerID)
1085
			if err != nil {
1086
				return specs.Mount{}, err
1087
			}
1088

1089
			contentDir, err := overlay.TempDir(containerDir, idMaps.rootUID, idMaps.rootGID)
1090
			if err != nil {
1091
				return specs.Mount{}, fmt.Errorf("failed to create TempDir in the %s directory: %w", containerDir, err)
1092
			}
1093

1094
			overlayOpts := overlay.Options{
1095
				RootUID:                idMaps.rootUID,
1096
				RootGID:                idMaps.rootGID,
1097
				UpperDirOptionFragment: upperDir,
1098
				WorkDirOptionFragment:  workDir,
1099
				GraphOpts:              b.store.GraphOptions(),
1100
			}
1101

1102
			overlayMount, err := overlay.MountWithOptions(contentDir, host, container, &overlayOpts)
1103
			if err == nil {
1104
				b.TempVolumes[contentDir] = true
1105
			}
1106

1107
			// If chown true, add correct ownership to the overlay temp directories.
1108
			if foundU {
1109
				if err := chown.ChangeHostPathOwnership(contentDir, true, idMaps.processUID, idMaps.processGID); err != nil {
1110
					return specs.Mount{}, err
1111
				}
1112
			}
1113

1114
			return overlayMount, err
1115
		}
1116
		if rootProp == "" {
1117
			options = append(options, "private")
1118
		}
1119
		if mountType != "tmpfs" {
1120
			mountType = "bind"
1121
			options = append(options, "rbind")
1122
		}
1123
		return specs.Mount{
1124
			Destination: container,
1125
			Type:        mountType,
1126
			Source:      host,
1127
			Options:     options,
1128
		}, nil
1129
	}
1130

1131
	// Bind mount volumes specified for this particular Run() invocation
1132
	for _, i := range optionMounts {
1133
		logrus.Debugf("setting up mounted volume at %q", i.Destination)
1134
		mount, err := parseMount(i.Type, i.Source, i.Destination, i.Options)
1135
		if err != nil {
1136
			return nil, err
1137
		}
1138
		mounts = append(mounts, mount)
1139
	}
1140
	// Bind mount volumes given by the user when the container was created
1141
	for _, i := range volumeMounts {
1142
		var options []string
1143
		spliti := parse.SplitStringWithColonEscape(i)
1144
		if len(spliti) > 2 {
1145
			options = strings.Split(spliti[2], ",")
1146
		}
1147
		options = append(options, "rbind")
1148
		mount, err := parseMount("bind", spliti[0], spliti[1], options)
1149
		if err != nil {
1150
			return nil, err
1151
		}
1152
		mounts = append(mounts, mount)
1153
	}
1154
	return mounts, nil
1155
}
1156

1157
func setupMaskedPaths(g *generate.Generator) {
1158
	for _, mp := range config.DefaultMaskedPaths {
1159
		g.AddLinuxMaskedPaths(mp)
1160
	}
1161
}
1162

1163
func setupReadOnlyPaths(g *generate.Generator) {
1164
	for _, rp := range config.DefaultReadOnlyPaths {
1165
		g.AddLinuxReadonlyPaths(rp)
1166
	}
1167
}
1168

1169
func setupCapAdd(g *generate.Generator, caps ...string) error {
1170
	for _, cap := range caps {
1171
		if err := g.AddProcessCapabilityBounding(cap); err != nil {
1172
			return fmt.Errorf("adding %q to the bounding capability set: %w", cap, err)
1173
		}
1174
		if err := g.AddProcessCapabilityEffective(cap); err != nil {
1175
			return fmt.Errorf("adding %q to the effective capability set: %w", cap, err)
1176
		}
1177
		if err := g.AddProcessCapabilityPermitted(cap); err != nil {
1178
			return fmt.Errorf("adding %q to the permitted capability set: %w", cap, err)
1179
		}
1180
		if err := g.AddProcessCapabilityAmbient(cap); err != nil {
1181
			return fmt.Errorf("adding %q to the ambient capability set: %w", cap, err)
1182
		}
1183
	}
1184
	return nil
1185
}
1186

1187
func setupCapDrop(g *generate.Generator, caps ...string) error {
1188
	for _, cap := range caps {
1189
		if err := g.DropProcessCapabilityBounding(cap); err != nil {
1190
			return fmt.Errorf("removing %q from the bounding capability set: %w", cap, err)
1191
		}
1192
		if err := g.DropProcessCapabilityEffective(cap); err != nil {
1193
			return fmt.Errorf("removing %q from the effective capability set: %w", cap, err)
1194
		}
1195
		if err := g.DropProcessCapabilityPermitted(cap); err != nil {
1196
			return fmt.Errorf("removing %q from the permitted capability set: %w", cap, err)
1197
		}
1198
		if err := g.DropProcessCapabilityAmbient(cap); err != nil {
1199
			return fmt.Errorf("removing %q from the ambient capability set: %w", cap, err)
1200
		}
1201
	}
1202
	return nil
1203
}
1204

1205
func setupCapabilities(g *generate.Generator, defaultCapabilities, adds, drops []string) error {
1206
	g.ClearProcessCapabilities()
1207
	if err := setupCapAdd(g, defaultCapabilities...); err != nil {
1208
		return err
1209
	}
1210
	for _, c := range adds {
1211
		if strings.ToLower(c) == "all" {
1212
			adds = capabilities.AllCapabilities()
1213
			break
1214
		}
1215
	}
1216
	for _, c := range drops {
1217
		if strings.ToLower(c) == "all" {
1218
			g.ClearProcessCapabilities()
1219
			return nil
1220
		}
1221
	}
1222
	if err := setupCapAdd(g, adds...); err != nil {
1223
		return err
1224
	}
1225
	return setupCapDrop(g, drops...)
1226
}
1227

1228
func addOrReplaceMount(mounts []specs.Mount, mount specs.Mount) []specs.Mount {
1229
	for i := range mounts {
1230
		if mounts[i].Destination == mount.Destination {
1231
			mounts[i] = mount
1232
			return mounts
1233
		}
1234
	}
1235
	return append(mounts, mount)
1236
}
1237

1238
// setupSpecialMountSpecChanges creates special mounts for depending on the namespaces
1239
// logic taken from podman and adapted for buildah
1240
// https://github.com/containers/podman/blob/4ba71f955a944790edda6e007e6d074009d437a7/pkg/specgen/generate/oci.go#L178
1241
func setupSpecialMountSpecChanges(spec *specs.Spec, shmSize string) ([]specs.Mount, error) {
1242
	mounts := spec.Mounts
1243
	isRootless := unshare.IsRootless()
1244
	isNewUserns := false
1245
	isNetns := false
1246
	isPidns := false
1247
	isIpcns := false
1248

1249
	for _, namespace := range spec.Linux.Namespaces {
1250
		switch namespace.Type {
1251
		case specs.NetworkNamespace:
1252
			isNetns = true
1253
		case specs.UserNamespace:
1254
			isNewUserns = true
1255
		case specs.PIDNamespace:
1256
			isPidns = true
1257
		case specs.IPCNamespace:
1258
			isIpcns = true
1259
		}
1260
	}
1261

1262
	addCgroup := true
1263
	// mount sys when root and no userns or when a new netns is created
1264
	canMountSys := (!isRootless && !isNewUserns) || isNetns
1265
	if !canMountSys {
1266
		addCgroup = false
1267
		sys := "/sys"
1268
		sysMnt := specs.Mount{
1269
			Destination: sys,
1270
			Type:        "bind",
1271
			Source:      sys,
1272
			Options:     []string{bind.NoBindOption, "rprivate", "nosuid", "noexec", "nodev", "ro", "rbind"},
1273
		}
1274
		mounts = addOrReplaceMount(mounts, sysMnt)
1275
	}
1276

1277
	gid5Available := true
1278
	if isRootless {
1279
		_, gids, err := unshare.GetHostIDMappings("")
1280
		if err != nil {
1281
			return nil, err
1282
		}
1283
		gid5Available = checkIdsGreaterThan5(gids)
1284
	}
1285
	if gid5Available && len(spec.Linux.GIDMappings) > 0 {
1286
		gid5Available = checkIdsGreaterThan5(spec.Linux.GIDMappings)
1287
	}
1288
	if !gid5Available {
1289
		// If we have no GID mappings, the gid=5 default option would fail, so drop it.
1290
		devPts := specs.Mount{
1291
			Destination: "/dev/pts",
1292
			Type:        "devpts",
1293
			Source:      "devpts",
1294
			Options:     []string{"rprivate", "nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620"},
1295
		}
1296
		mounts = addOrReplaceMount(mounts, devPts)
1297
	}
1298

1299
	isUserns := isNewUserns || isRootless
1300

1301
	if isUserns && !isIpcns {
1302
		devMqueue := "/dev/mqueue"
1303
		devMqueueMnt := specs.Mount{
1304
			Destination: devMqueue,
1305
			Type:        "bind",
1306
			Source:      devMqueue,
1307
			Options:     []string{bind.NoBindOption, "bind", "nosuid", "noexec", "nodev"},
1308
		}
1309
		mounts = addOrReplaceMount(mounts, devMqueueMnt)
1310
	}
1311
	if isUserns && !isPidns {
1312
		proc := "/proc"
1313
		procMount := specs.Mount{
1314
			Destination: proc,
1315
			Type:        "bind",
1316
			Source:      proc,
1317
			Options:     []string{bind.NoBindOption, "rbind", "nosuid", "noexec", "nodev"},
1318
		}
1319
		mounts = addOrReplaceMount(mounts, procMount)
1320
	}
1321

1322
	if addCgroup {
1323
		cgroupMnt := specs.Mount{
1324
			Destination: "/sys/fs/cgroup",
1325
			Type:        "cgroup",
1326
			Source:      "cgroup",
1327
			Options:     []string{"rprivate", "nosuid", "noexec", "nodev", "relatime", "rw"},
1328
		}
1329
		mounts = addOrReplaceMount(mounts, cgroupMnt)
1330
	}
1331

1332
	// if userns and host ipc bind mount shm
1333
	if isUserns && !isIpcns {
1334
		// bind mount /dev/shm when it exists
1335
		if err := fileutils.Exists("/dev/shm"); err == nil {
1336
			shmMount := specs.Mount{
1337
				Source:      "/dev/shm",
1338
				Type:        "bind",
1339
				Destination: "/dev/shm",
1340
				Options:     []string{bind.NoBindOption, "rbind", "nosuid", "noexec", "nodev"},
1341
			}
1342
			mounts = addOrReplaceMount(mounts, shmMount)
1343
		}
1344
	} else if shmSize != "" {
1345
		shmMount := specs.Mount{
1346
			Source:      "shm",
1347
			Destination: "/dev/shm",
1348
			Type:        "tmpfs",
1349
			Options:     []string{"private", "nodev", "noexec", "nosuid", "mode=1777", "size=" + shmSize},
1350
		}
1351
		mounts = addOrReplaceMount(mounts, shmMount)
1352
	}
1353

1354
	return mounts, nil
1355
}
1356

1357
func checkIdsGreaterThan5(ids []specs.LinuxIDMapping) bool {
1358
	for _, r := range ids {
1359
		if r.ContainerID <= 5 && 5 < r.ContainerID+r.Size {
1360
			return true
1361
		}
1362
	}
1363
	return false
1364
}
1365

1366
// If this function succeeds and returns a non-nil *lockfile.LockFile, the caller must unlock it (when??).
1367
func (b *Builder) getCacheMount(tokens []string, stageMountPoints map[string]internal.StageMountDetails, idMaps IDMaps, workDir string) (*specs.Mount, *lockfile.LockFile, error) {
1368
	var optionMounts []specs.Mount
1369
	mount, targetLock, err := volumes.GetCacheMount(tokens, b.store, b.MountLabel, stageMountPoints, workDir)
1370
	if err != nil {
1371
		return nil, nil, err
1372
	}
1373
	succeeded := false
1374
	defer func() {
1375
		if !succeeded && targetLock != nil {
1376
			targetLock.Unlock()
1377
		}
1378
	}()
1379
	optionMounts = append(optionMounts, mount)
1380
	volumes, err := b.runSetupVolumeMounts(b.MountLabel, nil, optionMounts, idMaps)
1381
	if err != nil {
1382
		return nil, nil, err
1383
	}
1384
	succeeded = true
1385
	return &volumes[0], targetLock, nil
1386
}
1387

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.