podman

container_internal.go
2590 строк · 82.0 Кб
Перенос по словам
1
//go:build !remote
2

3
package libpod
4

5
import (
6
	"bytes"
7
	"context"
8
	"errors"
9
	"fmt"
10
	"io"
11
	"io/fs"
12
	"os"
13
	"path/filepath"
14
	"strconv"
15
	"strings"
16
	"time"
17

18
	metadata "github.com/checkpoint-restore/checkpointctl/lib"
19
	"github.com/containers/buildah/copier"
20
	"github.com/containers/buildah/pkg/overlay"
21
	butil "github.com/containers/buildah/util"
22
	"github.com/containers/common/libnetwork/etchosts"
23
	"github.com/containers/common/pkg/cgroups"
24
	"github.com/containers/common/pkg/chown"
25
	"github.com/containers/common/pkg/config"
26
	"github.com/containers/common/pkg/hooks"
27
	"github.com/containers/common/pkg/hooks/exec"
28
	"github.com/containers/common/pkg/timezone"
29
	cutil "github.com/containers/common/pkg/util"
30
	"github.com/containers/podman/v5/libpod/define"
31
	"github.com/containers/podman/v5/libpod/events"
32
	"github.com/containers/podman/v5/libpod/shutdown"
33
	"github.com/containers/podman/v5/pkg/ctime"
34
	"github.com/containers/podman/v5/pkg/lookup"
35
	"github.com/containers/podman/v5/pkg/rootless"
36
	"github.com/containers/podman/v5/pkg/selinux"
37
	"github.com/containers/podman/v5/pkg/systemd/notifyproxy"
38
	"github.com/containers/podman/v5/pkg/util"
39
	"github.com/containers/storage"
40
	"github.com/containers/storage/pkg/chrootarchive"
41
	"github.com/containers/storage/pkg/fileutils"
42
	"github.com/containers/storage/pkg/idmap"
43
	"github.com/containers/storage/pkg/idtools"
44
	"github.com/containers/storage/pkg/lockfile"
45
	"github.com/containers/storage/pkg/mount"
46
	"github.com/coreos/go-systemd/v22/daemon"
47
	securejoin "github.com/cyphar/filepath-securejoin"
48
	spec "github.com/opencontainers/runtime-spec/specs-go"
49
	"github.com/opencontainers/runtime-tools/generate"
50
	"github.com/opencontainers/selinux/go-selinux/label"
51
	"github.com/sirupsen/logrus"
52
	"golang.org/x/exp/slices"
53
	"golang.org/x/sys/unix"
54
)
55

56
const (
57
	// name of the directory holding the artifacts
58
	artifactsDir      = "artifacts"
59
	execDirPermission = 0755
60
	preCheckpointDir  = "pre-checkpoint"
61
)
62

63
// rootFsSize gets the size of the container, which can be divided notionally
64
// into two parts.  The first is the part of its size that can be directly
65
// attributed to its base image, if it has one.  The second is the set of
66
// changes that the container has had made relative to that base image.  Both
67
// parts include some ancillary data, and we count that, too.
68
func (c *Container) rootFsSize() (int64, error) {
69
	if c.config.Rootfs != "" {
70
		return 0, nil
71
	}
72
	if c.runtime.store == nil {
73
		return 0, nil
74
	}
75

76
	container, err := c.runtime.store.Container(c.ID())
77
	if err != nil {
78
		return 0, err
79
	}
80

81
	size := int64(0)
82
	if container.ImageID != "" {
83
		size, err = c.runtime.store.ImageSize(container.ImageID)
84
		if err != nil {
85
			return 0, err
86
		}
87
	}
88

89
	layerSize, err := c.runtime.store.ContainerSize(c.ID())
90

91
	return size + layerSize, err
92
}
93

94
// rwSize gets the combined size of the writeable layer and any ancillary data
95
// for a given container.
96
func (c *Container) rwSize() (int64, error) {
97
	if c.config.Rootfs != "" {
98
		size, err := util.SizeOfPath(c.config.Rootfs)
99
		return int64(size), err
100
	}
101

102
	layerSize, err := c.runtime.store.ContainerSize(c.ID())
103
	if err != nil {
104
		return 0, err
105
	}
106

107
	return layerSize, nil
108
}
109

110
// bundlePath returns the path to the container's root filesystem - where the OCI spec will be
111
// placed, amongst other things
112
func (c *Container) bundlePath() string {
113
	if c.runtime.storageConfig.TransientStore {
114
		return c.state.RunDir
115
	}
116
	return c.config.StaticDir
117
}
118

119
// ControlSocketPath returns the path to the container's control socket for things like tty
120
// resizing
121
func (c *Container) ControlSocketPath() string {
122
	return filepath.Join(c.bundlePath(), "ctl")
123
}
124

125
// CheckpointVolumesPath returns the path to the directory containing the checkpointed volumes
126
func (c *Container) CheckpointVolumesPath() string {
127
	return filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
128
}
129

130
// CheckpointPath returns the path to the directory containing the checkpoint
131
func (c *Container) CheckpointPath() string {
132
	return filepath.Join(c.bundlePath(), metadata.CheckpointDirectory)
133
}
134

135
// PreCheckpointPath returns the path to the directory containing the pre-checkpoint-images
136
func (c *Container) PreCheckPointPath() string {
137
	return filepath.Join(c.bundlePath(), preCheckpointDir)
138
}
139

140
// AttachSocketPath retrieves the path of the container's attach socket
141
func (c *Container) AttachSocketPath() (string, error) {
142
	return c.ociRuntime.AttachSocketPath(c)
143
}
144

145
// exitFilePath gets the path to the container's exit file
146
func (c *Container) exitFilePath() (string, error) {
147
	return c.ociRuntime.ExitFilePath(c)
148
}
149

150
func (c *Container) oomFilePath() (string, error) {
151
	return c.ociRuntime.OOMFilePath(c)
152
}
153

154
// Wait for the container's exit file to appear.
155
// When it does, update our state based on it.
156
func (c *Container) waitForExitFileAndSync() error {
157
	exitFile, err := c.exitFilePath()
158
	if err != nil {
159
		return err
160
	}
161

162
	chWait := make(chan error)
163
	defer close(chWait)
164

165
	_, err = cutil.WaitForFile(exitFile, chWait, time.Second*5)
166
	if err != nil {
167
		// Exit file did not appear
168
		// Reset our state
169
		c.state.ExitCode = -1
170
		c.state.FinishedTime = time.Now()
171
		c.state.State = define.ContainerStateStopped
172

173
		if err2 := c.save(); err2 != nil {
174
			logrus.Errorf("Saving container %s state: %v", c.ID(), err2)
175
		}
176

177
		return err
178
	}
179

180
	if err := c.checkExitFile(); err != nil {
181
		return err
182
	}
183

184
	return c.save()
185
}
186

187
// Handle the container exit file.
188
// The exit file is used to supply container exit time and exit code.
189
// This assumes the exit file already exists.
190
// Also check for an oom file to determine if the container was oom killed or not.
191
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
192
	c.state.FinishedTime = ctime.Created(fi)
193
	statusCodeStr, err := os.ReadFile(exitFile)
194
	if err != nil {
195
		return fmt.Errorf("failed to read exit file for container %s: %w", c.ID(), err)
196
	}
197
	statusCode, err := strconv.Atoi(string(statusCodeStr))
198
	if err != nil {
199
		return fmt.Errorf("converting exit status code (%q, err) for container %s to int: %w",
200
			c.ID(), statusCodeStr, err)
201
	}
202
	c.state.ExitCode = int32(statusCode)
203

204
	oomFilePath, err := c.oomFilePath()
205
	if err != nil {
206
		return err
207
	}
208
	if err = fileutils.Exists(oomFilePath); err == nil {
209
		c.state.OOMKilled = true
210
	}
211

212
	c.state.Exited = true
213

214
	// Write an event for the container's death
215
	c.newContainerExitedEvent(c.state.ExitCode)
216

217
	return c.runtime.state.AddContainerExitCode(c.ID(), c.state.ExitCode)
218
}
219

220
func (c *Container) shouldRestart() bool {
221
	if c.config.HealthCheckOnFailureAction == define.HealthCheckOnFailureActionRestart {
222
		isUnhealthy, err := c.isUnhealthy()
223
		if err != nil {
224
			logrus.Errorf("Checking if container is unhealthy: %v", err)
225
		} else if isUnhealthy {
226
			return true
227
		}
228
	}
229

230
	// Explicitly stopped by user, do not restart again.
231
	if c.state.StoppedByUser {
232
		return false
233
	}
234

235
	// If we did not get a restart policy match, return false
236
	// Do the same if we're not a policy that restarts.
237
	if !c.state.RestartPolicyMatch ||
238
		c.config.RestartPolicy == define.RestartPolicyNo ||
239
		c.config.RestartPolicy == define.RestartPolicyNone {
240
		return false
241
	}
242

243
	// If we're RestartPolicyOnFailure, we need to check retries and exit
244
	// code.
245
	if c.config.RestartPolicy == define.RestartPolicyOnFailure {
246
		if c.state.ExitCode == 0 {
247
			return false
248
		}
249

250
		// If we don't have a max retries set, continue
251
		if c.config.RestartRetries > 0 {
252
			if c.state.RestartCount >= c.config.RestartRetries {
253
				return false
254
			}
255
		}
256
	}
257
	return true
258
}
259

260
// Handle container restart policy.
261
// This is called when a container has exited, and was not explicitly stopped by
262
// an API call to stop the container or pod it is in.
263
func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr error) {
264
	if !c.shouldRestart() {
265
		return false, nil
266
	}
267
	logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy)
268

269
	// Need to check if dependencies are alive.
270
	if err := c.checkDependenciesAndHandleError(); err != nil {
271
		return false, err
272
	}
273

274
	if c.config.HealthCheckConfig != nil {
275
		if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
276
			return false, err
277
		}
278
	}
279

280
	// Is the container running again?
281
	// If so, we don't have to do anything
282
	if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
283
		return false, nil
284
	} else if c.state.State == define.ContainerStateUnknown {
285
		return false, fmt.Errorf("invalid container state encountered in restart attempt: %w", define.ErrInternal)
286
	}
287

288
	c.newContainerEvent(events.Restart)
289

290
	// Increment restart count
291
	c.state.RestartCount++
292
	logrus.Debugf("Container %s now on retry %d", c.ID(), c.state.RestartCount)
293
	if err := c.save(); err != nil {
294
		return false, err
295
	}
296

297
	defer func() {
298
		if retErr != nil {
299
			if err := c.cleanup(ctx); err != nil {
300
				logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
301
			}
302
		}
303
	}()
304

305
	// Always teardown the network, trying to reuse the netns has caused
306
	// a significant amount of bugs in this code here. It also never worked
307
	// for containers with user namespaces. So once and for all simplify this
308
	// by never reusing the netns. Originally this was done to have a faster
309
	// restart of containers but with netavark now we are much faster so it
310
	// shouldn't be that noticeable in practice. It also makes more sense to
311
	// reconfigure the netns as it is likely that the container exited due
312
	// some broken network state in which case reusing would just cause more
313
	// harm than good.
314
	if err := c.cleanupNetwork(); err != nil {
315
		return false, err
316
	}
317

318
	if err := c.prepare(); err != nil {
319
		return false, err
320
	}
321

322
	if c.state.State == define.ContainerStateStopped {
323
		// Reinitialize the container if we need to
324
		if err := c.reinit(ctx, true); err != nil {
325
			return false, err
326
		}
327
	} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
328
		// Initialize the container
329
		if err := c.init(ctx, true); err != nil {
330
			return false, err
331
		}
332
	}
333
	if err := c.start(ctx); err != nil {
334
		return false, err
335
	}
336
	return true, nil
337
}
338

339
// Ensure that the container is in a specific state or state.
340
// Returns true if the container is in one of the given states,
341
// or false otherwise.
342
func (c *Container) ensureState(states ...define.ContainerStatus) bool {
343
	for _, state := range states {
344
		if state == c.state.State {
345
			return true
346
		}
347
	}
348
	return false
349
}
350

351
// Sync this container with on-disk state and runtime status
352
// Should only be called with container lock held
353
// This function should suffice to ensure a container's state is accurate and
354
// it is valid for use.
355
func (c *Container) syncContainer() error {
356
	if err := c.runtime.state.UpdateContainer(c); err != nil {
357
		return err
358
	}
359
	// If runtime knows about the container, update its status in runtime
360
	// And then save back to disk
361
	if c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStatePaused) {
362
		oldState := c.state.State
363

364
		if err := c.checkExitFile(); err != nil {
365
			return err
366
		}
367

368
		// Only save back to DB if state changed
369
		if c.state.State != oldState {
370
			// Check for a restart policy match
371
			if c.config.RestartPolicy != define.RestartPolicyNone && c.config.RestartPolicy != define.RestartPolicyNo &&
372
				(oldState == define.ContainerStateRunning || oldState == define.ContainerStatePaused) &&
373
				(c.state.State == define.ContainerStateStopped || c.state.State == define.ContainerStateExited) &&
374
				!c.state.StoppedByUser {
375
				c.state.RestartPolicyMatch = true
376
			}
377

378
			if err := c.save(); err != nil {
379
				return err
380
			}
381
		}
382
	}
383

384
	if !c.valid {
385
		return fmt.Errorf("container %s is not valid: %w", c.ID(), define.ErrCtrRemoved)
386
	}
387

388
	return nil
389
}
390

391
func (c *Container) setupStorageMapping(dest, from *storage.IDMappingOptions) {
392
	*dest = *from
393
	// If we are creating a container inside a pod, we always want to inherit the
394
	// userns settings from the infra container. So clear the auto userns settings
395
	// so that we don't request storage for a new uid/gid map.
396
	if c.PodID() != "" && !c.IsInfra() {
397
		dest.AutoUserNs = false
398
	}
399
	if dest.AutoUserNs {
400
		overrides := c.getUserOverrides()
401
		dest.AutoUserNsOpts.PasswdFile = overrides.ContainerEtcPasswdPath
402
		dest.AutoUserNsOpts.GroupFile = overrides.ContainerEtcGroupPath
403
		if c.config.User != "" {
404
			initialSize := uint32(0)
405
			parts := strings.Split(c.config.User, ":")
406
			for _, p := range parts {
407
				s, err := strconv.ParseUint(p, 10, 32)
408
				if err == nil && uint32(s) > initialSize {
409
					initialSize = uint32(s)
410
				}
411
			}
412
			dest.AutoUserNsOpts.InitialSize = initialSize + 1
413
		}
414
	} else if c.config.Spec.Linux != nil {
415
		dest.UIDMap = nil
416
		for _, r := range c.config.Spec.Linux.UIDMappings {
417
			u := idtools.IDMap{
418
				ContainerID: int(r.ContainerID),
419
				HostID:      int(r.HostID),
420
				Size:        int(r.Size),
421
			}
422
			dest.UIDMap = append(dest.UIDMap, u)
423
		}
424
		dest.GIDMap = nil
425
		for _, r := range c.config.Spec.Linux.GIDMappings {
426
			g := idtools.IDMap{
427
				ContainerID: int(r.ContainerID),
428
				HostID:      int(r.HostID),
429
				Size:        int(r.Size),
430
			}
431
			dest.GIDMap = append(dest.GIDMap, g)
432
		}
433
		dest.HostUIDMapping = false
434
		dest.HostGIDMapping = false
435
	}
436
}
437

438
// Create container root filesystem for use
439
func (c *Container) setupStorage(ctx context.Context) error {
440
	if !c.valid {
441
		return fmt.Errorf("container %s is not valid: %w", c.ID(), define.ErrCtrRemoved)
442
	}
443

444
	if c.state.State != define.ContainerStateConfigured {
445
		return fmt.Errorf("container %s must be in Configured state to have storage set up: %w", c.ID(), define.ErrCtrStateInvalid)
446
	}
447

448
	// Need both an image ID and image name, plus a bool telling us whether to use the image configuration
449
	if c.config.Rootfs == "" && (c.config.RootfsImageID == "" || c.config.RootfsImageName == "") {
450
		return fmt.Errorf("must provide image ID and image name to use an image: %w", define.ErrInvalidArg)
451
	}
452
	options := storage.ContainerOptions{
453
		IDMappingOptions: storage.IDMappingOptions{
454
			HostUIDMapping: true,
455
			HostGIDMapping: true,
456
		},
457
		LabelOpts: c.config.LabelOpts,
458
	}
459

460
	options.StorageOpt = c.config.StorageOpts
461

462
	if c.restoreFromCheckpoint && c.config.ProcessLabel != "" && c.config.MountLabel != "" {
463
		// If restoring from a checkpoint, the root file-system needs
464
		// to be mounted with the same SELinux labels as it was mounted
465
		// previously. But only if both labels have been set. For
466
		// privileged containers or '--ipc host' only ProcessLabel will
467
		// be set and so we will skip it for cases like that.
468
		if options.Flags == nil {
469
			options.Flags = make(map[string]interface{})
470
		}
471
		options.Flags["ProcessLabel"] = c.config.ProcessLabel
472
		options.Flags["MountLabel"] = c.config.MountLabel
473
	}
474
	if c.config.Privileged {
475
		privOpt := func(opt string) bool {
476
			for _, privopt := range []string{"nodev", "nosuid", "noexec"} {
477
				if opt == privopt {
478
					return true
479
				}
480
			}
481
			return false
482
		}
483

484
		defOptions, err := storage.GetMountOptions(c.runtime.store.GraphDriverName(), c.runtime.store.GraphOptions())
485
		if err != nil {
486
			return fmt.Errorf("getting default mount options: %w", err)
487
		}
488
		var newOptions []string
489
		for _, opt := range defOptions {
490
			if !privOpt(opt) {
491
				newOptions = append(newOptions, opt)
492
			}
493
		}
494
		options.MountOpts = newOptions
495
	}
496

497
	options.Volatile = c.config.Volatile
498

499
	c.setupStorageMapping(&options.IDMappingOptions, &c.config.IDMappings)
500

501
	// Unless the user has specified a name, use a randomly generated one.
502
	// Note that name conflicts may occur (see #11735), so we need to loop.
503
	generateName := c.config.Name == ""
504
	var containerInfo ContainerInfo
505
	var containerInfoErr error
506
	for {
507
		if generateName {
508
			name, err := c.runtime.generateName()
509
			if err != nil {
510
				return err
511
			}
512
			c.config.Name = name
513
		}
514
		containerInfo, containerInfoErr = c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options)
515

516
		if !generateName || !errors.Is(containerInfoErr, storage.ErrDuplicateName) {
517
			break
518
		}
519
	}
520
	if containerInfoErr != nil {
521
		if errors.Is(containerInfoErr, storage.ErrDuplicateName) {
522
			if _, err := c.runtime.LookupContainer(c.config.Name); errors.Is(err, define.ErrNoSuchCtr) {
523
				return fmt.Errorf("creating container storage: %w by an external entity", containerInfoErr)
524
			}
525
		}
526
		return fmt.Errorf("creating container storage: %w", containerInfoErr)
527
	}
528

529
	// Only reconfig IDMappings if layer was mounted from storage.
530
	// If it's an external overlay do not reset IDmappings.
531
	if !c.config.RootfsOverlay {
532
		c.config.IDMappings.UIDMap = containerInfo.UIDMap
533
		c.config.IDMappings.GIDMap = containerInfo.GIDMap
534
	}
535

536
	processLabel, err := c.processLabel(containerInfo.ProcessLabel)
537
	if err != nil {
538
		return err
539
	}
540
	c.config.ProcessLabel = processLabel
541
	c.config.MountLabel = containerInfo.MountLabel
542
	c.config.StaticDir = containerInfo.Dir
543
	c.state.RunDir = containerInfo.RunDir
544

545
	if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
546
		if err := idtools.SafeChown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil {
547
			return err
548
		}
549

550
		if err := idtools.SafeChown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil {
551
			return err
552
		}
553
	}
554

555
	// Set the default Entrypoint and Command
556
	if containerInfo.Config != nil {
557
		// Set CMD in the container to the default configuration only if ENTRYPOINT is not set by the user.
558
		if c.config.Entrypoint == nil && c.config.Command == nil {
559
			c.config.Command = containerInfo.Config.Config.Cmd
560
		}
561
		if c.config.Entrypoint == nil {
562
			c.config.Entrypoint = containerInfo.Config.Config.Entrypoint
563
		}
564
	}
565

566
	artifacts := filepath.Join(c.config.StaticDir, artifactsDir)
567
	if err := os.MkdirAll(artifacts, 0755); err != nil {
568
		return fmt.Errorf("creating artifacts directory: %w", err)
569
	}
570

571
	return nil
572
}
573

574
func (c *Container) processLabel(processLabel string) (string, error) {
575
	if !c.Systemd() && !c.ociRuntime.SupportsKVM() {
576
		return processLabel, nil
577
	}
578
	ctrSpec, err := c.specFromState()
579
	if err != nil {
580
		return "", err
581
	}
582
	label, ok := ctrSpec.Annotations[define.InspectAnnotationLabel]
583
	if !ok || !strings.Contains(label, "type:") {
584
		switch {
585
		case c.ociRuntime.SupportsKVM():
586
			return selinux.KVMLabel(processLabel)
587
		case c.Systemd():
588
			return selinux.InitLabel(processLabel)
589
		}
590
	}
591
	return processLabel, nil
592
}
593

594
// Tear down a container's storage prior to removal
595
func (c *Container) teardownStorage() error {
596
	if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
597
		return fmt.Errorf("cannot remove storage for container %s as it is running or paused: %w", c.ID(), define.ErrCtrStateInvalid)
598
	}
599

600
	artifacts := filepath.Join(c.config.StaticDir, artifactsDir)
601
	if err := os.RemoveAll(artifacts); err != nil {
602
		return fmt.Errorf("removing container %s artifacts %q: %w", c.ID(), artifacts, err)
603
	}
604

605
	if err := c.cleanupStorage(); err != nil {
606
		return fmt.Errorf("failed to clean up container %s storage: %w", c.ID(), err)
607
	}
608

609
	if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil {
610
		// If the container has already been removed, warn but do not
611
		// error - we wanted it gone, it is already gone.
612
		// Potentially another tool using containers/storage already
613
		// removed it?
614
		if errors.Is(err, storage.ErrNotAContainer) || errors.Is(err, storage.ErrContainerUnknown) {
615
			logrus.Infof("Storage for container %s already removed", c.ID())
616
			return nil
617
		}
618

619
		return fmt.Errorf("removing container %s root filesystem: %w", c.ID(), err)
620
	}
621

622
	return nil
623
}
624

625
// Reset resets state fields to default values.
626
// It is performed before a refresh and clears the state after a reboot.
627
// It does not save the results - assumes the database will do that for us.
628
func resetContainerState(state *ContainerState) {
629
	state.PID = 0
630
	state.ConmonPID = 0
631
	state.Mountpoint = ""
632
	state.Mounted = false
633
	// Reset state.
634
	// Almost all states are reset to either Configured or Exited,
635
	// except ContainerStateRemoving which is preserved.
636
	switch state.State {
637
	case define.ContainerStateStopped, define.ContainerStateExited, define.ContainerStateStopping, define.ContainerStateRunning, define.ContainerStatePaused:
638
		// All containers that ran at any point during the last boot
639
		// must be placed in the Exited state.
640
		state.State = define.ContainerStateExited
641
	case define.ContainerStateConfigured, define.ContainerStateCreated:
642
		state.State = define.ContainerStateConfigured
643
	case define.ContainerStateUnknown:
644
		// Something really strange must have happened to get us here.
645
		// Reset to configured, maybe the reboot cleared things up?
646
		state.State = define.ContainerStateConfigured
647
	}
648
	state.ExecSessions = make(map[string]*ExecSession)
649
	state.LegacyExecSessions = nil
650
	state.BindMounts = make(map[string]string)
651
	state.StoppedByUser = false
652
	state.RestartPolicyMatch = false
653
	state.RestartCount = 0
654
	state.Checkpointed = false
655
	state.Restored = false
656
	state.CheckpointedTime = time.Time{}
657
	state.RestoredTime = time.Time{}
658
	state.CheckpointPath = ""
659
	state.CheckpointLog = ""
660
	state.RestoreLog = ""
661
	state.StartupHCPassed = false
662
	state.StartupHCSuccessCount = 0
663
	state.StartupHCFailureCount = 0
664
	state.NetNS = ""
665
	state.NetworkStatus = nil
666
}
667

668
// Refresh refreshes the container's state after a restart.
669
// Refresh cannot perform any operations that would lock another container.
670
// We cannot guarantee any other container has a valid lock at the time it is
671
// running.
672
func (c *Container) refresh() error {
673
	// Don't need a full sync, but we do need to update from the database to
674
	// pick up potentially-missing container state
675
	if err := c.runtime.state.UpdateContainer(c); err != nil {
676
		return err
677
	}
678

679
	if !c.valid {
680
		return fmt.Errorf("container %s is not valid - may have been removed: %w", c.ID(), define.ErrCtrRemoved)
681
	}
682

683
	// We need to get the container's temporary directory from c/storage
684
	// It was lost in the reboot and must be recreated
685
	dir, err := c.runtime.storageService.GetRunDir(c.ID())
686
	if err != nil {
687
		return fmt.Errorf("retrieving temporary directory for container %s: %w", c.ID(), err)
688
	}
689
	c.state.RunDir = dir
690

691
	if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
692
		info, err := os.Stat(c.runtime.config.Engine.TmpDir)
693
		if err != nil {
694
			return err
695
		}
696
		if err := os.Chmod(c.runtime.config.Engine.TmpDir, info.Mode()|0111); err != nil {
697
			return err
698
		}
699
		root := filepath.Join(c.runtime.config.Engine.TmpDir, "containers-root", c.ID())
700
		if err := os.MkdirAll(root, 0755); err != nil {
701
			return fmt.Errorf("creating userNS tmpdir for container %s: %w", c.ID(), err)
702
		}
703
		if err := idtools.SafeChown(root, c.RootUID(), c.RootGID()); err != nil {
704
			return err
705
		}
706
	}
707

708
	// We need to pick up a new lock
709
	lock, err := c.runtime.lockManager.AllocateAndRetrieveLock(c.config.LockID)
710
	if err != nil {
711
		return fmt.Errorf("acquiring lock %d for container %s: %w", c.config.LockID, c.ID(), err)
712
	}
713
	c.lock = lock
714

715
	c.state.NetworkStatus = nil
716

717
	// Rewrite the config if necessary.
718
	// Podman 4.0 uses a new port format in the config.
719
	// getContainerConfigFromDB() already converted the old ports to the new one
720
	// but it did not write the config to the db back for performance reasons.
721
	// If a rewrite must happen the config.rewrite field is set to true.
722
	if c.config.rewrite {
723
		// SafeRewriteContainerConfig must be used with care. Make sure to not change config fields by accident.
724
		if err := c.runtime.state.SafeRewriteContainerConfig(c, "", "", c.config); err != nil {
725
			return fmt.Errorf("failed to rewrite the config for container %s: %w", c.config.ID, err)
726
		}
727
		c.config.rewrite = false
728
	}
729

730
	if err := c.save(); err != nil {
731
		return fmt.Errorf("refreshing state for container %s: %w", c.ID(), err)
732
	}
733

734
	// Remove ctl and attach files, which may persist across reboot
735
	if err := c.removeConmonFiles(); err != nil {
736
		return err
737
	}
738

739
	return nil
740
}
741

742
// Remove conmon attach socket and terminal resize FIFO
743
// This is necessary for restarting containers
744
func (c *Container) removeConmonFiles() error {
745
	// Files are allowed to not exist, so ignore ENOENT
746
	attachFile, err := c.AttachSocketPath()
747
	if err != nil {
748
		return fmt.Errorf("failed to get attach socket path for container %s: %w", c.ID(), err)
749
	}
750

751
	if err := os.Remove(attachFile); err != nil && !os.IsNotExist(err) {
752
		return fmt.Errorf("removing container %s attach file: %w", c.ID(), err)
753
	}
754

755
	ctlFile := filepath.Join(c.bundlePath(), "ctl")
756
	if err := os.Remove(ctlFile); err != nil && !os.IsNotExist(err) {
757
		return fmt.Errorf("removing container %s ctl file: %w", c.ID(), err)
758
	}
759

760
	winszFile := filepath.Join(c.bundlePath(), "winsz")
761
	if err := os.Remove(winszFile); err != nil && !os.IsNotExist(err) {
762
		return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
763
	}
764

765
	// Remove the exit file so we don't leak memory in tmpfs
766
	exitFile, err := c.exitFilePath()
767
	if err != nil {
768
		return err
769
	}
770
	if err := os.Remove(exitFile); err != nil && !os.IsNotExist(err) {
771
		return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
772
	}
773

774
	// Remove the oom file
775
	oomFile, err := c.oomFilePath()
776
	if err != nil {
777
		return err
778
	}
779
	if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) {
780
		return fmt.Errorf("removing container %s oom file: %w", c.ID(), err)
781
	}
782

783
	return nil
784
}
785

786
func (c *Container) export(out io.Writer) error {
787
	mountPoint := c.state.Mountpoint
788
	if !c.state.Mounted {
789
		containerMount, err := c.runtime.store.Mount(c.ID(), c.config.MountLabel)
790
		if err != nil {
791
			return fmt.Errorf("mounting container %q: %w", c.ID(), err)
792
		}
793
		mountPoint = containerMount
794
		defer func() {
795
			if _, err := c.runtime.store.Unmount(c.ID(), false); err != nil {
796
				logrus.Errorf("Unmounting container %q: %v", c.ID(), err)
797
			}
798
		}()
799
	}
800

801
	input, err := chrootarchive.Tar(mountPoint, nil, mountPoint)
802
	if err != nil {
803
		return fmt.Errorf("reading container directory %q: %w", c.ID(), err)
804
	}
805

806
	_, err = io.Copy(out, input)
807
	return err
808
}
809

810
// Get path of artifact with a given name for this container
811
func (c *Container) getArtifactPath(name string) string {
812
	return filepath.Join(c.config.StaticDir, artifactsDir, name)
813
}
814

815
// save container state to the database
816
func (c *Container) save() error {
817
	if err := c.runtime.state.SaveContainer(c); err != nil {
818
		return fmt.Errorf("saving container %s state: %w", c.ID(), err)
819
	}
820
	return nil
821
}
822

823
// Checks the container is in the right state, then initializes the container in preparation to start the container.
824
// If recursive is true, each of the container's dependencies will be started.
825
// Otherwise, this function will return with error if there are dependencies of this container that aren't running.
826
func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr error) {
827
	// Container must be created or stopped to be started
828
	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
829
		return fmt.Errorf("container %s must be in Created or Stopped state to be started: %w", c.ID(), define.ErrCtrStateInvalid)
830
	}
831

832
	if !recursive {
833
		if err := c.checkDependenciesAndHandleError(); err != nil {
834
			return err
835
		}
836
	} else {
837
		if err := c.startDependencies(ctx); err != nil {
838
			return err
839
		}
840
	}
841

842
	defer func() {
843
		if retErr != nil {
844
			if err := c.cleanup(ctx); err != nil {
845
				logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
846
			}
847
		}
848
	}()
849

850
	if err := c.prepare(); err != nil {
851
		return err
852
	}
853

854
	if c.state.State == define.ContainerStateStopped {
855
		// Reinitialize the container if we need to
856
		if err := c.reinit(ctx, false); err != nil {
857
			return err
858
		}
859
	} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
860
		// Or initialize it if necessary
861
		if err := c.init(ctx, false); err != nil {
862
			return err
863
		}
864
	}
865
	return nil
866
}
867

868
// checks dependencies are running and prints a helpful message
869
func (c *Container) checkDependenciesAndHandleError() error {
870
	notRunning, err := c.checkDependenciesRunning()
871
	if err != nil {
872
		return fmt.Errorf("checking dependencies for container %s: %w", c.ID(), err)
873
	}
874
	if len(notRunning) > 0 {
875
		depString := strings.Join(notRunning, ",")
876
		return fmt.Errorf("some dependencies of container %s are not started: %s: %w", c.ID(), depString, define.ErrCtrStateInvalid)
877
	}
878

879
	return nil
880
}
881

882
// Recursively start all dependencies of a container so the container can be started.
883
func (c *Container) startDependencies(ctx context.Context) error {
884
	depCtrIDs := c.Dependencies()
885
	if len(depCtrIDs) == 0 {
886
		return nil
887
	}
888

889
	depVisitedCtrs := make(map[string]*Container)
890
	if err := c.getAllDependencies(depVisitedCtrs); err != nil {
891
		return fmt.Errorf("starting dependency for container %s: %w", c.ID(), err)
892
	}
893

894
	// Because of how Go handles passing slices through functions, a slice cannot grow between function calls
895
	// without clunky syntax. Circumnavigate this by translating the map to a slice for buildContainerGraph
896
	depCtrs := make([]*Container, 0)
897
	for _, ctr := range depVisitedCtrs {
898
		depCtrs = append(depCtrs, ctr)
899
	}
900

901
	// Build a dependency graph of containers
902
	graph, err := BuildContainerGraph(depCtrs)
903
	if err != nil {
904
		return fmt.Errorf("generating dependency graph for container %s: %w", c.ID(), err)
905
	}
906

907
	// If there are no containers without dependencies, we can't start
908
	// Error out
909
	if len(graph.noDepNodes) == 0 {
910
		// we have no dependencies that need starting, go ahead and return
911
		if len(graph.nodes) == 0 {
912
			return nil
913
		}
914
		return fmt.Errorf("all dependencies have dependencies of %s: %w", c.ID(), define.ErrNoSuchCtr)
915
	}
916

917
	ctrErrors := make(map[string]error)
918
	ctrsVisited := make(map[string]bool)
919

920
	// Traverse the graph beginning at nodes with no dependencies
921
	for _, node := range graph.noDepNodes {
922
		startNode(ctx, node, false, ctrErrors, ctrsVisited, true)
923
	}
924

925
	if len(ctrErrors) > 0 {
926
		logrus.Errorf("Starting some container dependencies")
927
		for _, e := range ctrErrors {
928
			logrus.Errorf("%q", e)
929
		}
930
		return fmt.Errorf("starting some containers: %w", define.ErrInternal)
931
	}
932
	return nil
933
}
934

935
// getAllDependencies is a precursor to starting dependencies.
936
// To start a container with all of its dependencies, we need to recursively find all dependencies
937
// a container has, as well as each of those containers' dependencies, and so on
938
// To do so, keep track of containers already visited (so there aren't redundant state lookups),
939
// and recursively search until we have reached the leafs of every dependency node.
940
// Since we need to start all dependencies for our original container to successfully start, we propagate any errors
941
// in looking up dependencies.
942
// Note: this function is currently meant as a robust solution to a narrow problem: start an infra-container when
943
// a container in the pod is run. It has not been tested for performance past one level, so expansion of recursive start
944
// must be tested first.
945
func (c *Container) getAllDependencies(visited map[string]*Container) error {
946
	depIDs := c.Dependencies()
947
	if len(depIDs) == 0 {
948
		return nil
949
	}
950
	for _, depID := range depIDs {
951
		if _, ok := visited[depID]; !ok {
952
			dep, err := c.runtime.state.Container(depID)
953
			if err != nil {
954
				return err
955
			}
956
			status, err := dep.State()
957
			if err != nil {
958
				return err
959
			}
960
			// if the dependency is already running, we can assume its dependencies are also running
961
			// so no need to add them to those we need to start
962
			if status != define.ContainerStateRunning {
963
				visited[depID] = dep
964
				if err := dep.getAllDependencies(visited); err != nil {
965
					return err
966
				}
967
			}
968
		}
969
	}
970
	return nil
971
}
972

973
// Check if a container's dependencies are running
974
// Returns a []string containing the IDs of dependencies that are not running
975
func (c *Container) checkDependenciesRunning() ([]string, error) {
976
	deps := c.Dependencies()
977
	notRunning := []string{}
978

979
	// We were not passed a set of dependency containers
980
	// Make it ourselves
981
	depCtrs := make(map[string]*Container, len(deps))
982
	for _, dep := range deps {
983
		// Get the dependency container
984
		depCtr, err := c.runtime.state.Container(dep)
985
		if err != nil {
986
			return nil, fmt.Errorf("retrieving dependency %s of container %s from state: %w", dep, c.ID(), err)
987
		}
988

989
		// Check the status
990
		state, err := depCtr.State()
991
		if err != nil {
992
			return nil, fmt.Errorf("retrieving state of dependency %s of container %s: %w", dep, c.ID(), err)
993
		}
994
		if state != define.ContainerStateRunning && !depCtr.config.IsInfra {
995
			notRunning = append(notRunning, dep)
996
		}
997
		depCtrs[dep] = depCtr
998
	}
999

1000
	return notRunning, nil
1001
}
1002

1003
func (c *Container) completeNetworkSetup() error {
1004
	netDisabled, err := c.NetworkDisabled()
1005
	if err != nil {
1006
		return err
1007
	}
1008
	if netDisabled {
1009
		// with net=none we still want to set up /etc/hosts
1010
		return c.addHosts()
1011
	}
1012
	if c.config.NetNsCtr != "" {
1013
		return nil
1014
	}
1015
	if c.config.PostConfigureNetNS {
1016
		if err := c.syncContainer(); err != nil {
1017
			return err
1018
		}
1019
		if err := c.runtime.setupNetNS(c); err != nil {
1020
			return err
1021
		}
1022
		if err := c.save(); err != nil {
1023
			return err
1024
		}
1025
	}
1026
	// add /etc/hosts entries
1027
	if err := c.addHosts(); err != nil {
1028
		return err
1029
	}
1030

1031
	return c.addResolvConf()
1032
}
1033

1034
// Initialize a container, creating it in the runtime
1035
func (c *Container) init(ctx context.Context, retainRetries bool) error {
1036
	// Unconditionally remove conmon temporary files.
1037
	// We've been running into far too many issues where they block startup.
1038
	if err := c.removeConmonFiles(); err != nil {
1039
		return err
1040
	}
1041

1042
	// Generate the OCI newSpec
1043
	newSpec, cleanupFunc, err := c.generateSpec(ctx)
1044
	if err != nil {
1045
		return err
1046
	}
1047
	defer cleanupFunc()
1048

1049
	// Make sure the workdir exists while initializing container
1050
	if err := c.resolveWorkDir(); err != nil {
1051
		return err
1052
	}
1053

1054
	// Save the OCI newSpec to disk
1055
	if err := c.saveSpec(newSpec); err != nil {
1056
		return err
1057
	}
1058

1059
	for _, v := range c.config.NamedVolumes {
1060
		if err := c.fixVolumePermissions(v); err != nil {
1061
			return err
1062
		}
1063
	}
1064

1065
	// To ensure that we don't lose track of Conmon if hit by a SIGTERM
1066
	// in the middle of setting up the container, inhibit shutdown signals
1067
	// until after we save Conmon's PID to the state.
1068
	// TODO: This can likely be removed once conmon-rs support merges.
1069
	shutdown.Inhibit()
1070
	defer shutdown.Uninhibit()
1071

1072
	// If the container is part of a pod, make sure the pod cgroup is created before the container
1073
	// so the limits can be applied.
1074
	if c.PodID() != "" {
1075
		pod, err := c.runtime.LookupPod(c.PodID())
1076
		if err != nil {
1077
			return err
1078
		}
1079

1080
		if _, err := c.runtime.platformMakePod(pod, &pod.config.ResourceLimits); err != nil {
1081
			return err
1082
		}
1083
	}
1084

1085
	// With the spec complete, do an OCI create
1086
	if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil {
1087
		return err
1088
	}
1089

1090
	logrus.Debugf("Created container %s in OCI runtime", c.ID())
1091

1092
	// Remove any exec sessions leftover from a potential prior run.
1093
	if len(c.state.ExecSessions) > 0 {
1094
		if err := c.runtime.state.RemoveContainerExecSessions(c); err != nil {
1095
			logrus.Errorf("Removing container %s exec sessions from DB: %v", c.ID(), err)
1096
		}
1097
		c.state.ExecSessions = make(map[string]*ExecSession)
1098
	}
1099

1100
	c.state.Checkpointed = false
1101
	c.state.Restored = false
1102
	c.state.CheckpointedTime = time.Time{}
1103
	c.state.RestoredTime = time.Time{}
1104
	c.state.CheckpointPath = ""
1105
	c.state.CheckpointLog = ""
1106
	c.state.RestoreLog = ""
1107
	c.state.ExitCode = 0
1108
	c.state.Exited = false
1109
	c.state.State = define.ContainerStateCreated
1110
	c.state.StoppedByUser = false
1111
	c.state.RestartPolicyMatch = false
1112
	c.state.StartupHCFailureCount = 0
1113
	c.state.StartupHCSuccessCount = 0
1114
	c.state.StartupHCPassed = false
1115

1116
	if !retainRetries {
1117
		c.state.RestartCount = 0
1118
	}
1119

1120
	// bugzilla.redhat.com/show_bug.cgi?id=2144754:
1121
	// In case of a restart, make sure to remove the healthcheck log to
1122
	// have a clean state.
1123
	if path := c.healthCheckLogPath(); path != "" {
1124
		if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
1125
			logrus.Error(err)
1126
		}
1127
	}
1128

1129
	if err := c.save(); err != nil {
1130
		return err
1131
	}
1132

1133
	if c.config.HealthCheckConfig != nil {
1134
		timer := c.config.HealthCheckConfig.Interval.String()
1135
		if c.config.StartupHealthCheckConfig != nil {
1136
			timer = c.config.StartupHealthCheckConfig.Interval.String()
1137
		}
1138
		if err := c.createTimer(timer, c.config.StartupHealthCheckConfig != nil); err != nil {
1139
			logrus.Error(err)
1140
		}
1141
	}
1142

1143
	defer c.newContainerEvent(events.Init)
1144
	return c.completeNetworkSetup()
1145
}
1146

1147
// Clean up a container in the OCI runtime.
1148
// Deletes the container in the runtime, and resets its state to Exited.
1149
// The container can be restarted cleanly after this.
1150
func (c *Container) cleanupRuntime(ctx context.Context) error {
1151
	// If the container is not ContainerStateStopped or
1152
	// ContainerStateCreated, do nothing.
1153
	if !c.ensureState(define.ContainerStateStopped, define.ContainerStateCreated) {
1154
		return nil
1155
	}
1156

1157
	// We may be doing this redundantly for some call paths but we need to
1158
	// make sure the exit code is being read at this point.
1159
	if err := c.checkExitFile(); err != nil {
1160
		return err
1161
	}
1162

1163
	// If necessary, delete attach and ctl files
1164
	if err := c.removeConmonFiles(); err != nil {
1165
		return err
1166
	}
1167

1168
	if err := c.delete(ctx); err != nil {
1169
		return err
1170
	}
1171

1172
	// If we were Stopped, we are now Exited, as we've removed ourself
1173
	// from the runtime.
1174
	// If we were Created, we are now Configured.
1175
	if c.state.State == define.ContainerStateStopped {
1176
		c.state.State = define.ContainerStateExited
1177
	} else if c.state.State == define.ContainerStateCreated {
1178
		c.state.State = define.ContainerStateConfigured
1179
	}
1180

1181
	if c.valid {
1182
		if err := c.save(); err != nil {
1183
			return err
1184
		}
1185
	}
1186

1187
	logrus.Debugf("Successfully cleaned up container %s", c.ID())
1188

1189
	return nil
1190
}
1191

1192
// Reinitialize a container.
1193
// Deletes and recreates a container in the runtime.
1194
// Should only be done on ContainerStateStopped containers.
1195
// Not necessary for ContainerStateExited - the container has already been
1196
// removed from the runtime, so init() can proceed freely.
1197
func (c *Container) reinit(ctx context.Context, retainRetries bool) error {
1198
	logrus.Debugf("Recreating container %s in OCI runtime", c.ID())
1199

1200
	if err := c.cleanupRuntime(ctx); err != nil {
1201
		return err
1202
	}
1203

1204
	// Initialize the container again
1205
	return c.init(ctx, retainRetries)
1206
}
1207

1208
// Initialize (if necessary) and start a container
1209
// Performs all necessary steps to start a container that is not running
1210
// Does not lock or check validity
1211
func (c *Container) initAndStart(ctx context.Context) (retErr error) {
1212
	// If we are ContainerStateUnknown, throw an error
1213
	if c.state.State == define.ContainerStateUnknown {
1214
		return fmt.Errorf("container %s is in an unknown state: %w", c.ID(), define.ErrCtrStateInvalid)
1215
	} else if c.state.State == define.ContainerStateRemoving {
1216
		return fmt.Errorf("cannot start container %s as it is being removed: %w", c.ID(), define.ErrCtrStateInvalid)
1217
	}
1218

1219
	// If we are running, do nothing
1220
	if c.state.State == define.ContainerStateRunning {
1221
		return nil
1222
	}
1223
	// If we are paused, throw an error
1224
	if c.state.State == define.ContainerStatePaused {
1225
		return fmt.Errorf("cannot start paused container %s: %w", c.ID(), define.ErrCtrStateInvalid)
1226
	}
1227

1228
	defer func() {
1229
		if retErr != nil {
1230
			if err := c.cleanup(ctx); err != nil {
1231
				logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
1232
			}
1233
		}
1234
	}()
1235

1236
	if err := c.prepare(); err != nil {
1237
		return err
1238
	}
1239

1240
	// If we are ContainerStateStopped we need to remove from runtime
1241
	// And reset to ContainerStateConfigured
1242
	if c.state.State == define.ContainerStateStopped {
1243
		logrus.Debugf("Recreating container %s in OCI runtime", c.ID())
1244

1245
		if err := c.reinit(ctx, false); err != nil {
1246
			return err
1247
		}
1248
	} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
1249
		if err := c.init(ctx, false); err != nil {
1250
			return err
1251
		}
1252
	}
1253

1254
	// Now start the container
1255
	return c.start(ctx)
1256
}
1257

1258
// Internal, non-locking function to start a container
1259
func (c *Container) start(ctx context.Context) error {
1260
	if c.config.Spec.Process != nil {
1261
		logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
1262
	}
1263

1264
	if err := c.ociRuntime.StartContainer(c); err != nil {
1265
		return err
1266
	}
1267
	logrus.Debugf("Started container %s", c.ID())
1268

1269
	c.state.State = define.ContainerStateRunning
1270

1271
	// Unless being ignored, set the MAINPID to conmon.
1272
	if c.config.SdNotifyMode != define.SdNotifyModeIgnore {
1273
		payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID)
1274
		if c.config.SdNotifyMode == define.SdNotifyModeConmon {
1275
			// Also send the READY message for the "conmon" policy.
1276
			payload += "\n"
1277
			payload += daemon.SdNotifyReady
1278
		}
1279
		if err := notifyproxy.SendMessage(c.config.SdNotifySocket, payload); err != nil {
1280
			logrus.Errorf("Notifying systemd of Conmon PID: %s", err.Error())
1281
		} else {
1282
			logrus.Debugf("Notify sent successfully")
1283
		}
1284
	}
1285

1286
	// Check if healthcheck is not nil and --no-healthcheck option is not set.
1287
	// If --no-healthcheck is set Test will be always set to `[NONE]` so no need
1288
	// to update status in such case.
1289
	if c.config.HealthCheckConfig != nil && !(len(c.config.HealthCheckConfig.Test) == 1 && c.config.HealthCheckConfig.Test[0] == "NONE") {
1290
		if err := c.updateHealthStatus(define.HealthCheckStarting); err != nil {
1291
			logrus.Error(err)
1292
		}
1293
		if err := c.startTimer(c.config.StartupHealthCheckConfig != nil); err != nil {
1294
			logrus.Error(err)
1295
		}
1296
	}
1297

1298
	c.newContainerEvent(events.Start)
1299

1300
	if err := c.save(); err != nil {
1301
		return err
1302
	}
1303

1304
	if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
1305
		return nil
1306
	}
1307

1308
	// Wait for the container to turn healthy before sending the READY
1309
	// message.  This implies that we need to unlock and re-lock the
1310
	// container.
1311
	if !c.batched {
1312
		c.lock.Unlock()
1313
		defer c.lock.Lock()
1314
	}
1315

1316
	if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
1317
		return err
1318
	}
1319

1320
	if err := notifyproxy.SendMessage(c.config.SdNotifySocket, daemon.SdNotifyReady); err != nil {
1321
		logrus.Errorf("Sending READY message after turning healthy: %s", err.Error())
1322
	} else {
1323
		logrus.Debugf("Notify sent successfully")
1324
	}
1325
	return nil
1326
}
1327

1328
// Internal, non-locking function to stop container
1329
func (c *Container) stop(timeout uint) error {
1330
	logrus.Debugf("Stopping ctr %s (timeout %d)", c.ID(), timeout)
1331

1332
	// If the container is running in a PID Namespace, then killing the
1333
	// primary pid is enough to kill the container.  If it is not running in
1334
	// a pid namespace then the OCI Runtime needs to kill ALL processes in
1335
	// the container's cgroup in order to make sure the container is stopped.
1336
	all := !c.hasNamespace(spec.PIDNamespace)
1337
	// We can't use --all if Cgroups aren't present.
1338
	// Rootless containers with Cgroups v1 and NoCgroups are both cases
1339
	// where this can happen.
1340
	if all {
1341
		if c.config.NoCgroups {
1342
			all = false
1343
		} else if rootless.IsRootless() {
1344
			// Only do this check if we need to
1345
			unified, err := cgroups.IsCgroup2UnifiedMode()
1346
			if err != nil {
1347
				return err
1348
			}
1349
			if !unified {
1350
				all = false
1351
			}
1352
		}
1353
	}
1354

1355
	// OK, the following code looks a bit weird but we have to make sure we can stop
1356
	// containers with the restart policy always, to do this we have to set
1357
	// StoppedByUser even when there is nothing to stop right now. This is due to the
1358
	// cleanup process waiting on the container lock and then afterwards restarts it.
1359
	// shouldRestart() then checks for StoppedByUser and does not restart it.
1360
	// https://github.com/containers/podman/issues/18259
1361
	var cannotStopErr error
1362
	if c.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
1363
		cannotStopErr = define.ErrCtrStopped
1364
	} else if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopping) {
1365
		cannotStopErr = fmt.Errorf("can only stop created or running containers. %s is in state %s: %w", c.ID(), c.state.State.String(), define.ErrCtrStateInvalid)
1366
	}
1367

1368
	c.state.StoppedByUser = true
1369
	if cannotStopErr == nil {
1370
		// Set the container state to "stopping" and unlock the container
1371
		// before handing it over to conmon to unblock other commands.  #8501
1372
		// demonstrates nicely that a high stop timeout will block even simple
1373
		// commands such as `podman ps` from progressing if the container lock
1374
		// is held when busy-waiting for the container to be stopped.
1375
		c.state.State = define.ContainerStateStopping
1376
	}
1377
	if err := c.save(); err != nil {
1378
		rErr := fmt.Errorf("saving container %s state before stopping: %w", c.ID(), err)
1379
		if cannotStopErr == nil {
1380
			return rErr
1381
		}
1382
		// we return below with cannotStopErr
1383
		logrus.Error(rErr)
1384
	}
1385
	if cannotStopErr != nil {
1386
		return cannotStopErr
1387
	}
1388
	if !c.batched {
1389
		c.lock.Unlock()
1390
	}
1391

1392
	stopErr := c.ociRuntime.StopContainer(c, timeout, all)
1393

1394
	if !c.batched {
1395
		c.lock.Lock()
1396
		if err := c.syncContainer(); err != nil {
1397
			if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
1398
				// If the container has already been removed (e.g., via
1399
				// the cleanup process), set the container state to "stopped".
1400
				c.state.State = define.ContainerStateStopped
1401
				return stopErr
1402
			}
1403

1404
			if stopErr != nil {
1405
				logrus.Errorf("Syncing container %s status: %v", c.ID(), err)
1406
				return stopErr
1407
			}
1408
			return err
1409
		}
1410
	}
1411

1412
	// We have to check stopErr *after* we lock again - otherwise, we have a
1413
	// change of panicking on a double-unlock. Ref: GH Issue 9615
1414
	if stopErr != nil {
1415
		return stopErr
1416
	}
1417

1418
	// Since we're now subject to a race condition with other processes who
1419
	// may have altered the state (and other data), let's check if the
1420
	// state has changed.  If so, we should return immediately and leave
1421
	// breadcrumbs for debugging if needed.
1422
	if c.state.State != define.ContainerStateStopping {
1423
		logrus.Debugf(
1424
			"Container %q state changed from %q to %q while waiting for it to be stopped: discontinuing stop procedure as another process interfered",
1425
			c.ID(), define.ContainerStateStopping, c.state.State,
1426
		)
1427
		return nil
1428
	}
1429

1430
	c.newContainerEvent(events.Stop)
1431
	return c.waitForConmonToExitAndSave()
1432
}
1433

1434
func (c *Container) waitForConmonToExitAndSave() error {
1435
	conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
1436
	if err != nil {
1437
		return err
1438
	}
1439
	if !conmonAlive {
1440
		if err := c.checkExitFile(); err != nil {
1441
			return err
1442
		}
1443

1444
		return c.save()
1445
	}
1446

1447
	if err := c.save(); err != nil {
1448
		return fmt.Errorf("saving container %s state after stopping: %w", c.ID(), err)
1449
	}
1450

1451
	// Wait until we have an exit file, and sync once we do
1452
	if err := c.waitForExitFileAndSync(); err != nil {
1453
		return err
1454
	}
1455

1456
	return nil
1457
}
1458

1459
// Internal, non-locking function to pause a container
1460
func (c *Container) pause() error {
1461
	if c.config.NoCgroups {
1462
		return fmt.Errorf("cannot pause without using Cgroups: %w", define.ErrNoCgroups)
1463
	}
1464

1465
	if rootless.IsRootless() {
1466
		cgroupv2, err := cgroups.IsCgroup2UnifiedMode()
1467
		if err != nil {
1468
			return fmt.Errorf("failed to determine cgroupversion: %w", err)
1469
		}
1470
		if !cgroupv2 {
1471
			return fmt.Errorf("can not pause containers on rootless containers with cgroup V1: %w", define.ErrNoCgroups)
1472
		}
1473
	}
1474

1475
	if err := c.ociRuntime.PauseContainer(c); err != nil {
1476
		// TODO when using docker-py there is some sort of race/incompatibility here
1477
		return err
1478
	}
1479

1480
	logrus.Debugf("Paused container %s", c.ID())
1481

1482
	c.state.State = define.ContainerStatePaused
1483

1484
	return c.save()
1485
}
1486

1487
// Internal, non-locking function to unpause a container
1488
func (c *Container) unpause() error {
1489
	if c.config.NoCgroups {
1490
		return fmt.Errorf("cannot unpause without using Cgroups: %w", define.ErrNoCgroups)
1491
	}
1492

1493
	if err := c.ociRuntime.UnpauseContainer(c); err != nil {
1494
		// TODO when using docker-py there is some sort of race/incompatibility here
1495
		return err
1496
	}
1497

1498
	logrus.Debugf("Unpaused container %s", c.ID())
1499

1500
	c.state.State = define.ContainerStateRunning
1501

1502
	return c.save()
1503
}
1504

1505
// Internal, non-locking function to restart a container
1506
func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
1507
	if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
1508
		return fmt.Errorf("unable to restart a container in a paused or unknown state: %w", define.ErrCtrStateInvalid)
1509
	}
1510

1511
	c.newContainerEvent(events.Restart)
1512

1513
	if c.state.State == define.ContainerStateRunning {
1514
		conmonPID := c.state.ConmonPID
1515
		if err := c.stop(timeout); err != nil {
1516
			return err
1517
		}
1518

1519
		if c.config.HealthCheckConfig != nil {
1520
			if err := c.removeTransientFiles(context.Background(), c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
1521
				logrus.Error(err.Error())
1522
			}
1523
		}
1524
		// Old versions of conmon have a bug where they create the exit file before
1525
		// closing open file descriptors causing a race condition when restarting
1526
		// containers with open ports since we cannot bind the ports as they're not
1527
		// yet closed by conmon.
1528
		//
1529
		// Killing the old conmon PID is ~okay since it forces the FDs of old conmons
1530
		// to be closed, while it's a NOP for newer versions which should have
1531
		// exited already.
1532
		if conmonPID != 0 {
1533
			// Ignore errors from FindProcess() as conmon could already have exited.
1534
			p, err := os.FindProcess(conmonPID)
1535
			if p != nil && err == nil {
1536
				if err = p.Kill(); err != nil {
1537
					logrus.Debugf("error killing conmon process: %v", err)
1538
				}
1539
			}
1540
		}
1541
		// Ensure we tear down the container network so it will be
1542
		// recreated - otherwise, behavior of restart differs from stop
1543
		// and start
1544
		if err := c.cleanupNetwork(); err != nil {
1545
			return err
1546
		}
1547
	}
1548
	defer func() {
1549
		if retErr != nil {
1550
			if err := c.cleanup(ctx); err != nil {
1551
				logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
1552
			}
1553
		}
1554
	}()
1555
	if err := c.prepare(); err != nil {
1556
		return err
1557
	}
1558

1559
	if c.state.State == define.ContainerStateStopped {
1560
		// Reinitialize the container if we need to
1561
		if err := c.reinit(ctx, false); err != nil {
1562
			return err
1563
		}
1564
	} else if c.state.State == define.ContainerStateConfigured ||
1565
		c.state.State == define.ContainerStateExited {
1566
		// Initialize the container
1567
		if err := c.init(ctx, false); err != nil {
1568
			return err
1569
		}
1570
	}
1571
	return c.start(ctx)
1572
}
1573

1574
// mountStorage sets up the container's root filesystem
1575
// It mounts the image and any other requested mounts
1576
// TODO: Add ability to override mount label so we can use this for Mount() too
1577
// TODO: Can we use this for export? Copying SHM into the export might not be
1578
// good
1579
func (c *Container) mountStorage() (_ string, deferredErr error) {
1580
	var err error
1581
	// Container already mounted, nothing to do
1582
	if c.state.Mounted {
1583
		mounted := true
1584
		if c.ensureState(define.ContainerStateExited) {
1585
			mounted, _ = mount.Mounted(c.state.Mountpoint)
1586
		}
1587
		if mounted {
1588
			return c.state.Mountpoint, nil
1589
		}
1590
	}
1591

1592
	if !c.config.NoShm {
1593
		mounted, err := mount.Mounted(c.config.ShmDir)
1594
		if err != nil {
1595
			return "", fmt.Errorf("unable to determine if %q is mounted: %w", c.config.ShmDir, err)
1596
		}
1597

1598
		if !mounted && !MountExists(c.config.Spec.Mounts, "/dev/shm") {
1599
			shmOptions := fmt.Sprintf("mode=1777,size=%d", c.config.ShmSize)
1600
			if err := c.mountSHM(shmOptions); err != nil {
1601
				return "", err
1602
			}
1603
			if err := idtools.SafeChown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil {
1604
				return "", fmt.Errorf("failed to chown %s: %w", c.config.ShmDir, err)
1605
			}
1606
			defer func() {
1607
				if deferredErr != nil {
1608
					if err := c.unmountSHM(c.config.ShmDir); err != nil {
1609
						logrus.Errorf("Unmounting SHM for container %s after mount error: %v", c.ID(), err)
1610
					}
1611
				}
1612
			}()
1613
		}
1614
	}
1615

1616
	// We need to mount the container before volumes - to ensure the copyup
1617
	// works properly.
1618
	mountPoint := c.config.Rootfs
1619

1620
	if c.config.RootfsMapping != nil {
1621
		uidMappings, gidMappings, err := parseIDMapMountOption(c.config.IDMappings, *c.config.RootfsMapping)
1622
		if err != nil {
1623
			return "", err
1624
		}
1625

1626
		pid, cleanupFunc, err := idmap.CreateUsernsProcess(util.RuntimeSpecToIDtools(uidMappings), util.RuntimeSpecToIDtools(gidMappings))
1627
		if err != nil {
1628
			return "", err
1629
		}
1630
		defer cleanupFunc()
1631

1632
		if err := idmap.CreateIDMappedMount(c.config.Rootfs, c.config.Rootfs, pid); err != nil {
1633
			return "", fmt.Errorf("failed to create idmapped mount: %w", err)
1634
		}
1635
		defer func() {
1636
			if deferredErr != nil {
1637
				if err := unix.Unmount(c.config.Rootfs, 0); err != nil {
1638
					logrus.Errorf("Unmounting idmapped rootfs for container %s after mount error: %v", c.ID(), err)
1639
				}
1640
			}
1641
		}()
1642
	}
1643

1644
	// Check if overlay has to be created on top of Rootfs
1645
	if c.config.RootfsOverlay {
1646
		overlayDest := c.runtime.GraphRoot()
1647
		contentDir, err := overlay.GenerateStructure(overlayDest, c.ID(), "rootfs", c.RootUID(), c.RootGID())
1648
		if err != nil {
1649
			return "", fmt.Errorf("rootfs-overlay: failed to create TempDir in the %s directory: %w", overlayDest, err)
1650
		}
1651
		overlayMount, err := overlay.Mount(contentDir, c.config.Rootfs, overlayDest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
1652
		if err != nil {
1653
			return "", fmt.Errorf("rootfs-overlay: creating overlay failed %q: %w", c.config.Rootfs, err)
1654
		}
1655

1656
		// Seems fuse-overlayfs is not present
1657
		// fallback to native overlay
1658
		if overlayMount.Type == "overlay" {
1659
			overlayMount.Options = append(overlayMount.Options, "nodev")
1660
			mountOpts := label.FormatMountLabel(strings.Join(overlayMount.Options, ","), c.MountLabel())
1661
			err = mount.Mount("overlay", overlayMount.Source, overlayMount.Type, mountOpts)
1662
			if err != nil {
1663
				return "", fmt.Errorf("rootfs-overlay: creating overlay failed %q from native overlay: %w", c.config.Rootfs, err)
1664
			}
1665
		}
1666

1667
		mountPoint = overlayMount.Source
1668
		execUser, err := lookup.GetUserGroupInfo(mountPoint, c.config.User, nil)
1669
		if err != nil {
1670
			return "", err
1671
		}
1672
		hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
1673
		if err != nil {
1674
			return "", fmt.Errorf("unable to get host UID and host GID: %w", err)
1675
		}
1676

1677
		//note: this should not be recursive, if using external rootfs users should be responsible on configuring ownership.
1678
		if err := chown.ChangeHostPathOwnership(mountPoint, false, int(hostUID), int(hostGID)); err != nil {
1679
			return "", err
1680
		}
1681
	}
1682

1683
	if mountPoint == "" {
1684
		mountPoint, err = c.mount()
1685
		if err != nil {
1686
			return "", err
1687
		}
1688
		defer func() {
1689
			if deferredErr != nil {
1690
				if err := c.unmount(false); err != nil {
1691
					logrus.Errorf("Unmounting container %s after mount error: %v", c.ID(), err)
1692
				}
1693
			}
1694
		}()
1695
	}
1696

1697
	rootUID, rootGID := c.RootUID(), c.RootGID()
1698

1699
	dirfd, err := openDirectory(mountPoint)
1700
	if err != nil {
1701
		return "", fmt.Errorf("open mount point: %w", err)
1702
	}
1703
	defer unix.Close(dirfd)
1704

1705
	err = unix.Mkdirat(dirfd, "etc", 0755)
1706
	if err != nil && !os.IsExist(err) {
1707
		return "", fmt.Errorf("create /etc: %w", err)
1708
	}
1709
	// If the etc directory was created, chown it to root in the container
1710
	if err == nil && (rootUID != 0 || rootGID != 0) {
1711
		err = unix.Fchownat(dirfd, "etc", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW)
1712
		if err != nil {
1713
			return "", fmt.Errorf("chown /etc: %w", err)
1714
		}
1715
	}
1716

1717
	etcInTheContainerPath, err := securejoin.SecureJoin(mountPoint, "etc")
1718
	if err != nil {
1719
		return "", fmt.Errorf("resolve /etc in the container: %w", err)
1720
	}
1721

1722
	etcInTheContainerFd, err := openDirectory(etcInTheContainerPath)
1723
	if err != nil {
1724
		return "", fmt.Errorf("open /etc in the container: %w", err)
1725
	}
1726
	defer unix.Close(etcInTheContainerFd)
1727

1728
	if err := c.makePlatformMtabLink(etcInTheContainerFd, rootUID, rootGID); err != nil {
1729
		return "", err
1730
	}
1731

1732
	tz := c.Timezone()
1733
	localTimePath, err := timezone.ConfigureContainerTimeZone(tz, c.state.RunDir, mountPoint, etcInTheContainerPath, c.ID())
1734
	if err != nil {
1735
		return "", fmt.Errorf("configuring timezone for container %s: %w", c.ID(), err)
1736
	}
1737
	if localTimePath != "" {
1738
		if err := c.relabel(localTimePath, c.config.MountLabel, false); err != nil {
1739
			return "", err
1740
		}
1741
		if c.state.BindMounts == nil {
1742
			c.state.BindMounts = make(map[string]string)
1743
		}
1744
		c.state.BindMounts["/etc/localtime"] = localTimePath
1745
	}
1746

1747
	// Request a mount of all named volumes
1748
	for _, v := range c.config.NamedVolumes {
1749
		vol, err := c.mountNamedVolume(v, mountPoint)
1750
		if err != nil {
1751
			return "", err
1752
		}
1753
		defer func() {
1754
			if deferredErr == nil {
1755
				return
1756
			}
1757
			vol.lock.Lock()
1758
			if err := vol.unmount(false); err != nil {
1759
				logrus.Errorf("Unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err)
1760
			}
1761
			vol.lock.Unlock()
1762
		}()
1763
	}
1764

1765
	return mountPoint, nil
1766
}
1767

1768
// Mount a single named volume into the container.
1769
// If necessary, copy up image contents into the volume.
1770
// Does not verify that the name volume given is actually present in container
1771
// config.
1772
// Returns the volume that was mounted.
1773
func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) (*Volume, error) {
1774
	logrus.Debugf("Going to mount named volume %s", v.Name)
1775
	vol, err := c.runtime.state.Volume(v.Name)
1776
	if err != nil {
1777
		return nil, fmt.Errorf("retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
1778
	}
1779

1780
	if vol.config.LockID == c.config.LockID {
1781
		return nil, fmt.Errorf("container %s and volume %s share lock ID %d: %w", c.ID(), vol.Name(), c.config.LockID, define.ErrWillDeadlock)
1782
	}
1783
	vol.lock.Lock()
1784
	defer vol.lock.Unlock()
1785
	if vol.needsMount() {
1786
		if err := vol.mount(); err != nil {
1787
			return nil, fmt.Errorf("mounting volume %s for container %s: %w", vol.Name(), c.ID(), err)
1788
		}
1789
	}
1790
	// The volume may need a copy-up. Check the state.
1791
	if err := vol.update(); err != nil {
1792
		return nil, err
1793
	}
1794
	_, hasNoCopy := vol.config.Options["nocopy"]
1795
	if vol.state.NeedsCopyUp && !slices.Contains(v.Options, "nocopy") && !hasNoCopy {
1796
		logrus.Debugf("Copying up contents from container %s to volume %s", c.ID(), vol.Name())
1797

1798
		srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest)
1799
		if err != nil {
1800
			return nil, fmt.Errorf("calculating destination path to copy up container %s volume %s: %w", c.ID(), vol.Name(), err)
1801
		}
1802
		// Do a manual stat on the source directory to verify existence.
1803
		// Skip the rest if it exists.
1804
		srcStat, err := os.Lstat(srcDir)
1805
		if err != nil {
1806
			if os.IsNotExist(err) {
1807
				// Source does not exist, don't bother copying
1808
				// up.
1809
				return vol, nil
1810
			}
1811
			return nil, fmt.Errorf("identifying source directory for copy up into volume %s: %w", vol.Name(), err)
1812
		}
1813
		// If it's not a directory we're mounting over it.
1814
		if !srcStat.IsDir() {
1815
			return vol, nil
1816
		}
1817
		// Read contents, do not bother continuing if it's empty. Fixes
1818
		// a bizarre issue where something copier.Get will ENOENT on
1819
		// empty directories and sometimes it will not.
1820
		// RHBZ#1928643
1821
		srcContents, err := os.ReadDir(srcDir)
1822
		if err != nil {
1823
			return nil, fmt.Errorf("reading contents of source directory for copy up into volume %s: %w", vol.Name(), err)
1824
		}
1825
		if len(srcContents) == 0 {
1826
			return vol, nil
1827
		}
1828

1829
		// If the volume is not empty, we should not copy up.
1830
		volMount := vol.mountPoint()
1831
		contents, err := os.ReadDir(volMount)
1832
		if err != nil {
1833
			return nil, fmt.Errorf("listing contents of volume %s mountpoint when copying up from container %s: %w", vol.Name(), c.ID(), err)
1834
		}
1835
		if len(contents) > 0 {
1836
			// The volume is not empty. It was likely modified
1837
			// outside of Podman. For safety, let's not copy up into
1838
			// it. Fixes CVE-2020-1726.
1839
			return vol, nil
1840
		}
1841

1842
		// Set NeedsCopyUp to false since we are about to do first copy
1843
		// Do not copy second time.
1844
		vol.state.NeedsCopyUp = false
1845
		if err := vol.save(); err != nil {
1846
			return nil, err
1847
		}
1848

1849
		// Buildah Copier accepts a reader, so we'll need a pipe.
1850
		reader, writer := io.Pipe()
1851
		defer reader.Close()
1852

1853
		errChan := make(chan error, 1)
1854

1855
		logrus.Infof("About to copy up into volume %s", vol.Name())
1856

1857
		// Copy, container side: get a tar archive of what needs to be
1858
		// streamed into the volume.
1859
		go func() {
1860
			defer writer.Close()
1861
			getOptions := copier.GetOptions{
1862
				KeepDirectoryNames: false,
1863
			}
1864
			errChan <- copier.Get(srcDir, "", getOptions, []string{"/."}, writer)
1865
		}()
1866

1867
		// Copy, volume side: stream what we've written to the pipe, into
1868
		// the volume.
1869
		copyOpts := copier.PutOptions{}
1870
		if err := copier.Put(volMount, "", copyOpts, reader); err != nil {
1871
			err2 := <-errChan
1872
			if err2 != nil {
1873
				logrus.Errorf("Streaming contents of container %s directory for volume copy-up: %v", c.ID(), err2)
1874
			}
1875
			return nil, fmt.Errorf("copying up to volume %s: %w", vol.Name(), err)
1876
		}
1877

1878
		if err := <-errChan; err != nil {
1879
			return nil, fmt.Errorf("streaming container content for copy up into volume %s: %w", vol.Name(), err)
1880
		}
1881
	}
1882
	return vol, nil
1883
}
1884

1885
// cleanupStorage unmounts and cleans up the container's root filesystem
1886
func (c *Container) cleanupStorage() error {
1887
	if !c.state.Mounted {
1888
		// Already unmounted, do nothing
1889
		logrus.Debugf("Container %s storage is already unmounted, skipping...", c.ID())
1890
		return nil
1891
	}
1892

1893
	var cleanupErr error
1894
	reportErrorf := func(msg string, args ...any) {
1895
		err := fmt.Errorf(msg, args...) // Always use fmt.Errorf instead of just logrus.Errorf(…) because the format string probably contains %w
1896
		if cleanupErr == nil {
1897
			cleanupErr = err
1898
		} else {
1899
			logrus.Errorf("%s", err.Error())
1900
		}
1901
	}
1902

1903
	markUnmounted := func() {
1904
		c.state.Mountpoint = ""
1905
		c.state.Mounted = false
1906

1907
		if c.valid {
1908
			if err := c.save(); err != nil {
1909
				reportErrorf("unmounting container %s: %w", c.ID(), err)
1910
			}
1911
		}
1912
	}
1913

1914
	// umount rootfs overlay if it was created
1915
	if c.config.RootfsOverlay {
1916
		overlayBasePath := filepath.Dir(c.state.Mountpoint)
1917
		if err := overlay.Unmount(overlayBasePath); err != nil {
1918
			reportErrorf("failed to clean up overlay mounts for %s: %w", c.ID(), err)
1919
		}
1920
	}
1921
	if c.config.RootfsMapping != nil {
1922
		if err := unix.Unmount(c.config.Rootfs, 0); err != nil && err != unix.EINVAL {
1923
			reportErrorf("unmounting idmapped rootfs for container %s after mount error: %w", c.ID(), err)
1924
		}
1925
	}
1926

1927
	for _, containerMount := range c.config.Mounts {
1928
		if err := c.unmountSHM(containerMount); err != nil {
1929
			reportErrorf("unmounting container %s: %w", c.ID(), err)
1930
		}
1931
	}
1932

1933
	if err := c.cleanupOverlayMounts(); err != nil {
1934
		// If the container can't remove content report the error
1935
		reportErrorf("failed to clean up overlay mounts for %s: %w", c.ID(), err)
1936
	}
1937

1938
	if c.config.Rootfs != "" {
1939
		markUnmounted()
1940
		return cleanupErr
1941
	}
1942

1943
	if err := c.unmount(false); err != nil {
1944
		// If the container has already been removed, warn but don't
1945
		// error
1946
		// We still want to be able to kick the container out of the
1947
		// state
1948
		switch {
1949
		case errors.Is(err, storage.ErrLayerNotMounted):
1950
			logrus.Infof("Storage for container %s is not mounted: %v", c.ID(), err)
1951
		case errors.Is(err, storage.ErrNotAContainer) || errors.Is(err, storage.ErrContainerUnknown):
1952
			logrus.Warnf("Storage for container %s has been removed: %v", c.ID(), err)
1953
		default:
1954
			reportErrorf("cleaning up container %s storage: %w", c.ID(), err)
1955
		}
1956
	}
1957

1958
	// Request an unmount of all named volumes
1959
	for _, v := range c.config.NamedVolumes {
1960
		vol, err := c.runtime.state.Volume(v.Name)
1961
		if err != nil {
1962
			reportErrorf("retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
1963

1964
			// We need to try and unmount every volume, so continue
1965
			// if they fail.
1966
			continue
1967
		}
1968

1969
		if vol.needsMount() {
1970
			vol.lock.Lock()
1971
			if err := vol.unmount(false); err != nil {
1972
				reportErrorf("unmounting volume %s for container %s: %w", vol.Name(), c.ID(), err)
1973
			}
1974
			vol.lock.Unlock()
1975
		}
1976
	}
1977

1978
	markUnmounted()
1979
	return cleanupErr
1980
}
1981

1982
// Unmount the container and free its resources
1983
func (c *Container) cleanup(ctx context.Context) error {
1984
	var lastError error
1985

1986
	logrus.Debugf("Cleaning up container %s", c.ID())
1987

1988
	// Remove healthcheck unit/timer file if it execs
1989
	if c.config.HealthCheckConfig != nil {
1990
		if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
1991
			logrus.Errorf("Removing timer for container %s healthcheck: %v", c.ID(), err)
1992
		}
1993
	}
1994

1995
	// Clean up network namespace, if present
1996
	if err := c.cleanupNetwork(); err != nil {
1997
		lastError = fmt.Errorf("removing container %s network: %w", c.ID(), err)
1998
	}
1999

2000
	// cleanup host entry if it is shared
2001
	if c.config.NetNsCtr != "" {
2002
		if hoststFile, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
2003
			if err := fileutils.Exists(hoststFile); err == nil {
2004
				// we cannot use the dependency container lock due ABBA deadlocks
2005
				if lock, err := lockfile.GetLockFile(hoststFile); err == nil {
2006
					lock.Lock()
2007
					// make sure to ignore ENOENT error in case the netns container was cleaned up before this one
2008
					if err := etchosts.Remove(hoststFile, getLocalhostHostEntry(c)); err != nil && !errors.Is(err, os.ErrNotExist) {
2009
						// this error is not fatal we still want to do proper cleanup
2010
						logrus.Errorf("failed to remove hosts entry from the netns containers /etc/hosts: %v", err)
2011
					}
2012
					lock.Unlock()
2013
				}
2014
			}
2015
		}
2016
	}
2017

2018
	// Remove the container from the runtime, if necessary.
2019
	// Do this *before* unmounting storage - some runtimes (e.g. Kata)
2020
	// apparently object to having storage removed while the container still
2021
	// exists.
2022
	if err := c.cleanupRuntime(ctx); err != nil {
2023
		if lastError != nil {
2024
			logrus.Errorf("Removing container %s from OCI runtime: %v", c.ID(), err)
2025
		} else {
2026
			lastError = err
2027
		}
2028
	}
2029

2030
	// Unmount storage
2031
	if err := c.cleanupStorage(); err != nil {
2032
		if lastError != nil {
2033
			logrus.Errorf("Unmounting container %s storage: %v", c.ID(), err)
2034
		} else {
2035
			lastError = fmt.Errorf("unmounting container %s storage: %w", c.ID(), err)
2036
		}
2037
	}
2038

2039
	// Unmount image volumes
2040
	for _, v := range c.config.ImageVolumes {
2041
		img, _, err := c.runtime.LibimageRuntime().LookupImage(v.Source, nil)
2042
		if err != nil {
2043
			if lastError == nil {
2044
				lastError = err
2045
				continue
2046
			}
2047
			logrus.Errorf("Unmounting image volume %q:%q :%v", v.Source, v.Dest, err)
2048
		}
2049
		if err := img.Unmount(false); err != nil {
2050
			if lastError == nil {
2051
				lastError = err
2052
				continue
2053
			}
2054
			logrus.Errorf("Unmounting image volume %q:%q :%v", v.Source, v.Dest, err)
2055
		}
2056
	}
2057

2058
	if err := c.stopPodIfNeeded(context.Background()); err != nil {
2059
		if lastError == nil {
2060
			lastError = err
2061
		} else {
2062
			logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err)
2063
		}
2064
	}
2065

2066
	// Prune the exit codes of other container during clean up.
2067
	// Since Podman is no daemon, we have to clean them up somewhere.
2068
	// Cleanup seems like a good place as it's not performance
2069
	// critical.
2070
	if err := c.runtime.state.PruneContainerExitCodes(); err != nil {
2071
		if lastError == nil {
2072
			lastError = err
2073
		} else {
2074
			logrus.Errorf("Pruning container exit codes: %v", err)
2075
		}
2076
	}
2077

2078
	return lastError
2079
}
2080

2081
// If the container is part of a pod where only the infra container remains
2082
// running, attempt to stop the pod.
2083
func (c *Container) stopPodIfNeeded(ctx context.Context) error {
2084
	if c.config.Pod == "" {
2085
		return nil
2086
	}
2087

2088
	pod, err := c.runtime.state.Pod(c.config.Pod)
2089
	if err != nil {
2090
		return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
2091
	}
2092

2093
	switch pod.config.ExitPolicy {
2094
	case config.PodExitPolicyContinue:
2095
		return nil
2096

2097
	case config.PodExitPolicyStop:
2098
		// Use the runtime's work queue to stop the pod. This resolves
2099
		// a number of scenarios where we'd otherwise run into
2100
		// deadlocks.  For instance, during `pod stop`, the pod has
2101
		// already been locked.
2102
		// The work queue is a simple means without having to worry about
2103
		// future changes that may introduce more deadlock scenarios.
2104
		c.runtime.queueWork(func() {
2105
			if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil {
2106
				if !errors.Is(err, define.ErrNoSuchPod) {
2107
					logrus.Errorf("Checking if infra needs to be stopped: %v", err)
2108
				}
2109
			}
2110
		})
2111
	}
2112
	return nil
2113
}
2114

2115
// delete deletes the container and runs any configured poststop
2116
// hooks.
2117
func (c *Container) delete(ctx context.Context) error {
2118
	if err := c.ociRuntime.DeleteContainer(c); err != nil {
2119
		return fmt.Errorf("removing container %s from runtime: %w", c.ID(), err)
2120
	}
2121

2122
	if err := c.postDeleteHooks(ctx); err != nil {
2123
		return fmt.Errorf("container %s poststop hooks: %w", c.ID(), err)
2124
	}
2125

2126
	return nil
2127
}
2128

2129
// postDeleteHooks runs the poststop hooks (if any) as specified by
2130
// the OCI Runtime Specification (which requires them to run
2131
// post-delete, despite the stage name).
2132
func (c *Container) postDeleteHooks(ctx context.Context) error {
2133
	if c.state.ExtensionStageHooks != nil {
2134
		extensionHooks, ok := c.state.ExtensionStageHooks["poststop"]
2135
		if ok {
2136
			state, err := json.Marshal(spec.State{
2137
				Version:     spec.Version,
2138
				ID:          c.ID(),
2139
				Status:      "stopped",
2140
				Bundle:      c.bundlePath(),
2141
				Annotations: c.config.Spec.Annotations,
2142
			})
2143
			if err != nil {
2144
				return err
2145
			}
2146
			for i, hook := range extensionHooks {
2147
				hook := hook
2148
				logrus.Debugf("container %s: invoke poststop hook %d, path %s", c.ID(), i, hook.Path)
2149
				var stderr, stdout bytes.Buffer
2150
				hookErr, err := exec.RunWithOptions(
2151
					ctx,
2152
					exec.RunOptions{
2153
						Hook:            &hook,
2154
						Dir:             c.bundlePath(),
2155
						State:           state,
2156
						Stdout:          &stdout,
2157
						Stderr:          &stderr,
2158
						PostKillTimeout: exec.DefaultPostKillTimeout,
2159
					},
2160
				)
2161
				if err != nil {
2162
					logrus.Warnf("Container %s: poststop hook %d: %v", c.ID(), i, err)
2163
					if hookErr != err {
2164
						logrus.Debugf("container %s: poststop hook %d (hook error): %v", c.ID(), i, hookErr)
2165
					}
2166
					stdoutString := stdout.String()
2167
					if stdoutString != "" {
2168
						logrus.Debugf("container %s: poststop hook %d: stdout:\n%s", c.ID(), i, stdoutString)
2169
					}
2170
					stderrString := stderr.String()
2171
					if stderrString != "" {
2172
						logrus.Debugf("container %s: poststop hook %d: stderr:\n%s", c.ID(), i, stderrString)
2173
					}
2174
				}
2175
			}
2176
		}
2177
	}
2178

2179
	return nil
2180
}
2181

2182
// writeStringToRundir writes the given string to a file with the given name in
2183
// the container's temporary files directory. The file will be chown'd to the
2184
// container's root user and have an appropriate SELinux label set.
2185
// If a file with the same name already exists, it will be deleted and recreated
2186
// with the new contents.
2187
// Returns the full path to the new file.
2188
func (c *Container) writeStringToRundir(destFile, contents string) (string, error) {
2189
	destFileName := filepath.Join(c.state.RunDir, destFile)
2190

2191
	if err := os.Remove(destFileName); err != nil && !os.IsNotExist(err) {
2192
		return "", fmt.Errorf("removing %s for container %s: %w", destFile, c.ID(), err)
2193
	}
2194

2195
	if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil {
2196
		return "", err
2197
	}
2198

2199
	return destFileName, nil
2200
}
2201

2202
// writeStringToStaticDir writes the given string to a file with the given name
2203
// in the container's permanent files directory. The file will be chown'd to the
2204
// container's root user and have an appropriate SELinux label set.
2205
// Unlike writeStringToRundir, will *not* delete and re-create if the file
2206
// already exists (will instead error).
2207
// Returns the full path to the new file.
2208
func (c *Container) writeStringToStaticDir(filename, contents string) (string, error) {
2209
	destFileName := filepath.Join(c.config.StaticDir, filename)
2210

2211
	if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil {
2212
		return "", err
2213
	}
2214

2215
	return destFileName, nil
2216
}
2217

2218
// saveSpec saves the OCI spec to disk, replacing any existing specs for the container
2219
func (c *Container) saveSpec(spec *spec.Spec) error {
2220
	// If the OCI spec already exists, we need to replace it
2221
	// Cannot guarantee some things, e.g. network namespaces, have the same
2222
	// paths
2223
	jsonPath := filepath.Join(c.bundlePath(), "config.json")
2224
	if err := fileutils.Exists(jsonPath); err != nil {
2225
		if !os.IsNotExist(err) {
2226
			return fmt.Errorf("doing stat on container %s spec: %w", c.ID(), err)
2227
		}
2228
		// The spec does not exist, we're fine
2229
	} else {
2230
		// The spec exists, need to remove it
2231
		if err := os.Remove(jsonPath); err != nil {
2232
			return fmt.Errorf("replacing runtime spec for container %s: %w", c.ID(), err)
2233
		}
2234
	}
2235

2236
	fileJSON, err := json.Marshal(spec)
2237
	if err != nil {
2238
		return fmt.Errorf("exporting runtime spec for container %s to JSON: %w", c.ID(), err)
2239
	}
2240
	if err := os.WriteFile(jsonPath, fileJSON, 0644); err != nil {
2241
		return fmt.Errorf("writing runtime spec JSON for container %s to disk: %w", c.ID(), err)
2242
	}
2243

2244
	logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath)
2245

2246
	c.state.ConfigPath = jsonPath
2247

2248
	return nil
2249
}
2250

2251
// Warning: precreate hooks may alter 'config' in place.
2252
func (c *Container) setupOCIHooks(ctx context.Context, config *spec.Spec) (map[string][]spec.Hook, error) {
2253
	allHooks := make(map[string][]spec.Hook)
2254
	if len(c.runtime.config.Engine.HooksDir.Get()) == 0 {
2255
		if rootless.IsRootless() {
2256
			return nil, nil
2257
		}
2258
		for _, hDir := range []string{hooks.DefaultDir, hooks.OverrideDir} {
2259
			manager, err := hooks.New(ctx, []string{hDir}, []string{"precreate", "poststop"})
2260
			if err != nil {
2261
				if os.IsNotExist(err) {
2262
					continue
2263
				}
2264
				return nil, err
2265
			}
2266
			ociHooks, err := manager.Hooks(config, c.config.Spec.Annotations, len(c.config.UserVolumes) > 0)
2267
			if err != nil {
2268
				return nil, err
2269
			}
2270
			if len(ociHooks) > 0 || config.Hooks != nil {
2271
				logrus.Warnf("Implicit hook directories are deprecated; set --ociHooks-dir=%q explicitly to continue to load ociHooks from this directory", hDir)
2272
			}
2273
			for i, hook := range ociHooks {
2274
				allHooks[i] = hook
2275
			}
2276
		}
2277
	} else {
2278
		manager, err := hooks.New(ctx, c.runtime.config.Engine.HooksDir.Get(), []string{"precreate", "poststop"})
2279
		if err != nil {
2280
			return nil, err
2281
		}
2282

2283
		allHooks, err = manager.Hooks(config, c.config.Spec.Annotations, len(c.config.UserVolumes) > 0)
2284
		if err != nil {
2285
			return nil, err
2286
		}
2287
	}
2288

2289
	hookErr, err := exec.RuntimeConfigFilterWithOptions(
2290
		ctx,
2291
		exec.RuntimeConfigFilterOptions{
2292
			Hooks:           allHooks["precreate"],
2293
			Dir:             c.bundlePath(),
2294
			Config:          config,
2295
			PostKillTimeout: exec.DefaultPostKillTimeout,
2296
		},
2297
	)
2298
	if err != nil {
2299
		logrus.Warnf("Container %s: precreate hook: %v", c.ID(), err)
2300
		if hookErr != nil && hookErr != err {
2301
			logrus.Debugf("container %s: precreate hook (hook error): %v", c.ID(), hookErr)
2302
		}
2303
		return nil, err
2304
	}
2305

2306
	return allHooks, nil
2307
}
2308

2309
// mount mounts the container's root filesystem
2310
func (c *Container) mount() (string, error) {
2311
	if c.state.State == define.ContainerStateRemoving {
2312
		return "", fmt.Errorf("cannot mount container %s as it is being removed: %w", c.ID(), define.ErrCtrStateInvalid)
2313
	}
2314

2315
	mountPoint, err := c.runtime.storageService.MountContainerImage(c.ID())
2316
	if err != nil {
2317
		return "", fmt.Errorf("mounting storage for container %s: %w", c.ID(), err)
2318
	}
2319
	mountPoint, err = filepath.EvalSymlinks(mountPoint)
2320
	if err != nil {
2321
		return "", fmt.Errorf("resolving storage path for container %s: %w", c.ID(), err)
2322
	}
2323
	if err := idtools.SafeChown(mountPoint, c.RootUID(), c.RootGID()); err != nil {
2324
		return "", fmt.Errorf("cannot chown %s to %d:%d: %w", mountPoint, c.RootUID(), c.RootGID(), err)
2325
	}
2326
	return mountPoint, nil
2327
}
2328

2329
// unmount unmounts the container's root filesystem
2330
func (c *Container) unmount(force bool) error {
2331
	// Also unmount storage
2332
	if _, err := c.runtime.storageService.UnmountContainerImage(c.ID(), force); err != nil {
2333
		return fmt.Errorf("unmounting container %s root filesystem: %w", c.ID(), err)
2334
	}
2335

2336
	return nil
2337
}
2338

2339
// checkReadyForRemoval checks whether the given container is ready to be
2340
// removed.
2341
// These checks are only used if force-remove is not specified.
2342
// If it is, we'll remove the container anyways.
2343
// Returns nil if safe to remove, or an error describing why it's unsafe if not.
2344
func (c *Container) checkReadyForRemoval() error {
2345
	if c.state.State == define.ContainerStateUnknown {
2346
		return fmt.Errorf("container %s is in invalid state: %w", c.ID(), define.ErrCtrStateInvalid)
2347
	}
2348

2349
	if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) && !c.IsInfra() {
2350
		return fmt.Errorf("cannot remove container %s as it is %s - running or paused containers cannot be removed without force: %w", c.ID(), c.state.State.String(), define.ErrCtrStateInvalid)
2351
	}
2352

2353
	// Check exec sessions
2354
	sessions, err := c.getActiveExecSessions()
2355
	if err != nil {
2356
		return err
2357
	}
2358
	if len(sessions) != 0 {
2359
		return fmt.Errorf("cannot remove container %s as it has active exec sessions: %w", c.ID(), define.ErrCtrStateInvalid)
2360
	}
2361

2362
	return nil
2363
}
2364

2365
// canWithPrevious return the stat of the preCheckPoint dir
2366
func (c *Container) canWithPrevious() error {
2367
	return fileutils.Exists(c.PreCheckPointPath())
2368
}
2369

2370
// prepareCheckpointExport writes the config and spec to
2371
// JSON files for later export
2372
func (c *Container) prepareCheckpointExport() error {
2373
	networks, err := c.networks()
2374
	if err != nil {
2375
		return err
2376
	}
2377
	// make sure to exclude the short ID alias since the container gets a new ID on restore
2378
	for net, opts := range networks {
2379
		newAliases := make([]string, 0, len(opts.Aliases))
2380
		for _, alias := range opts.Aliases {
2381
			if alias != c.config.ID[:12] {
2382
				newAliases = append(newAliases, alias)
2383
			}
2384
		}
2385
		opts.Aliases = newAliases
2386
		networks[net] = opts
2387
	}
2388

2389
	// add the networks from the db to the config so that the exported checkpoint still stores all current networks
2390
	c.config.Networks = networks
2391
	// save live config
2392
	if _, err := metadata.WriteJSONFile(c.config, c.bundlePath(), metadata.ConfigDumpFile); err != nil {
2393
		return err
2394
	}
2395

2396
	// save spec
2397
	jsonPath := filepath.Join(c.bundlePath(), "config.json")
2398
	g, err := generate.NewFromFile(jsonPath)
2399
	if err != nil {
2400
		logrus.Debugf("generating spec for container %q failed with %v", c.ID(), err)
2401
		return err
2402
	}
2403
	if _, err := metadata.WriteJSONFile(g.Config, c.bundlePath(), metadata.SpecDumpFile); err != nil {
2404
		return err
2405
	}
2406

2407
	return nil
2408
}
2409

2410
// SortUserVolumes sorts the volumes specified for a container
2411
// between named and normal volumes
2412
func (c *Container) SortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume, []spec.Mount) {
2413
	namedUserVolumes := []*ContainerNamedVolume{}
2414
	userMounts := []spec.Mount{}
2415

2416
	// We need to parse all named volumes and mounts into maps, so we don't
2417
	// end up with repeated lookups for each user volume.
2418
	// Map destination to struct, as destination is what is stored in
2419
	// UserVolumes.
2420
	namedVolumes := make(map[string]*ContainerNamedVolume)
2421
	mounts := make(map[string]spec.Mount)
2422
	for _, namedVol := range c.config.NamedVolumes {
2423
		namedVolumes[namedVol.Dest] = namedVol
2424
	}
2425
	for _, mount := range ctrSpec.Mounts {
2426
		mounts[mount.Destination] = mount
2427
	}
2428

2429
	for _, vol := range c.config.UserVolumes {
2430
		if volume, ok := namedVolumes[vol]; ok {
2431
			namedUserVolumes = append(namedUserVolumes, volume)
2432
		} else if mount, ok := mounts[vol]; ok {
2433
			userMounts = append(userMounts, mount)
2434
		} else {
2435
			logrus.Warnf("Could not find mount at destination %q when parsing user volumes for container %s", vol, c.ID())
2436
		}
2437
	}
2438
	return namedUserVolumes, userMounts
2439
}
2440

2441
// Check for an exit file, and handle one if present
2442
func (c *Container) checkExitFile() error {
2443
	// If the container's not running, nothing to do.
2444
	if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) {
2445
		return nil
2446
	}
2447

2448
	exitFile, err := c.exitFilePath()
2449
	if err != nil {
2450
		return err
2451
	}
2452

2453
	// Check for the exit file
2454
	info, err := os.Stat(exitFile)
2455
	if err != nil {
2456
		if os.IsNotExist(err) {
2457
			// Container is still running, no error
2458
			return nil
2459
		}
2460

2461
		return fmt.Errorf("running stat on container %s exit file: %w", c.ID(), err)
2462
	}
2463

2464
	// Alright, it exists. Transition to Stopped state.
2465
	c.state.State = define.ContainerStateStopped
2466
	c.state.PID = 0
2467
	c.state.ConmonPID = 0
2468

2469
	// Read the exit file to get our stopped time and exit code.
2470
	return c.handleExitFile(exitFile, info)
2471
}
2472

2473
func (c *Container) hasNamespace(namespace spec.LinuxNamespaceType) bool {
2474
	if c.config.Spec == nil || c.config.Spec.Linux == nil {
2475
		return false
2476
	}
2477
	for _, n := range c.config.Spec.Linux.Namespaces {
2478
		if n.Type == namespace {
2479
			return true
2480
		}
2481
	}
2482
	return false
2483
}
2484

2485
// extractSecretToCtrStorage copies a secret's data from the secrets manager to the container's static dir
2486
func (c *Container) extractSecretToCtrStorage(secr *ContainerSecret) error {
2487
	manager, err := c.runtime.SecretsManager()
2488
	if err != nil {
2489
		return err
2490
	}
2491
	_, data, err := manager.LookupSecretData(secr.Name)
2492
	if err != nil {
2493
		return err
2494
	}
2495
	secretFile := filepath.Join(c.config.SecretsPath, secr.Name)
2496

2497
	hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), secr.UID, secr.GID)
2498
	if err != nil {
2499
		return fmt.Errorf("unable to extract secret: %w", err)
2500
	}
2501
	err = os.WriteFile(secretFile, data, 0644)
2502
	if err != nil {
2503
		return fmt.Errorf("unable to create %s: %w", secretFile, err)
2504
	}
2505
	if err := idtools.SafeLchown(secretFile, int(hostUID), int(hostGID)); err != nil {
2506
		return err
2507
	}
2508
	if err := os.Chmod(secretFile, os.FileMode(secr.Mode)); err != nil {
2509
		return err
2510
	}
2511
	if err := c.relabel(secretFile, c.config.MountLabel, false); err != nil {
2512
		return err
2513
	}
2514
	return nil
2515
}
2516

2517
// Update a container's resources or restart policy after creation.
2518
// At least one of resources or restartPolicy must not be nil.
2519
func (c *Container) update(resources *spec.LinuxResources, restartPolicy *string, restartRetries *uint) error {
2520
	if resources == nil && restartPolicy == nil {
2521
		return fmt.Errorf("must provide at least one of resources and restartPolicy to update a container: %w", define.ErrInvalidArg)
2522
	}
2523
	if restartRetries != nil && restartPolicy == nil {
2524
		return fmt.Errorf("must provide restart policy if updating restart retries: %w", define.ErrInvalidArg)
2525
	}
2526

2527
	oldResources := c.config.Spec.Linux.Resources
2528
	oldRestart := c.config.RestartPolicy
2529
	oldRetries := c.config.RestartRetries
2530

2531
	if restartPolicy != nil {
2532
		if err := define.ValidateRestartPolicy(*restartPolicy); err != nil {
2533
			return err
2534
		}
2535

2536
		if restartRetries != nil {
2537
			if *restartPolicy != define.RestartPolicyOnFailure {
2538
				return fmt.Errorf("cannot set restart policy retries unless policy is on-failure: %w", define.ErrInvalidArg)
2539
			}
2540
		}
2541

2542
		c.config.RestartPolicy = *restartPolicy
2543
		if restartRetries != nil {
2544
			c.config.RestartRetries = *restartRetries
2545
		} else {
2546
			c.config.RestartRetries = 0
2547
		}
2548
	}
2549

2550
	if resources != nil {
2551
		if c.config.Spec.Linux == nil {
2552
			c.config.Spec.Linux = new(spec.Linux)
2553
		}
2554
		c.config.Spec.Linux.Resources = resources
2555
	}
2556

2557
	if err := c.runtime.state.SafeRewriteContainerConfig(c, "", "", c.config); err != nil {
2558
		// Assume DB write failed, revert to old resources block
2559
		c.config.Spec.Linux.Resources = oldResources
2560
		c.config.RestartPolicy = oldRestart
2561
		c.config.RestartRetries = oldRetries
2562
		return err
2563
	}
2564

2565
	if c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStatePaused) && resources != nil {
2566
		// So `podman inspect` on running containers sources its OCI spec from disk.
2567
		// To keep inspect accurate we need to update the on-disk OCI spec.
2568
		onDiskSpec, err := c.specFromState()
2569
		if err != nil {
2570
			return fmt.Errorf("retrieving on-disk OCI spec to update: %w", err)
2571
		}
2572
		if onDiskSpec.Linux == nil {
2573
			onDiskSpec.Linux = new(spec.Linux)
2574
		}
2575
		onDiskSpec.Linux.Resources = resources
2576
		if err := c.saveSpec(onDiskSpec); err != nil {
2577
			logrus.Errorf("Unable to update container %s OCI spec - `podman inspect` may not be accurate until container is restarted: %v", c.ID(), err)
2578
		}
2579

2580
		if err := c.ociRuntime.UpdateContainer(c, resources); err != nil {
2581
			return err
2582
		}
2583
	}
2584

2585
	logrus.Debugf("updated container %s", c.ID())
2586

2587
	c.newContainerEvent(events.Update)
2588

2589
	return nil
2590
}
2591
podman

Использование cookies