18
metadata "github.com/checkpoint-restore/checkpointctl/lib"
19
"github.com/containers/buildah/copier"
20
"github.com/containers/buildah/pkg/overlay"
21
butil "github.com/containers/buildah/util"
22
"github.com/containers/common/libnetwork/etchosts"
23
"github.com/containers/common/pkg/cgroups"
24
"github.com/containers/common/pkg/chown"
25
"github.com/containers/common/pkg/config"
26
"github.com/containers/common/pkg/hooks"
27
"github.com/containers/common/pkg/hooks/exec"
28
"github.com/containers/common/pkg/timezone"
29
cutil "github.com/containers/common/pkg/util"
30
"github.com/containers/podman/v5/libpod/define"
31
"github.com/containers/podman/v5/libpod/events"
32
"github.com/containers/podman/v5/libpod/shutdown"
33
"github.com/containers/podman/v5/pkg/ctime"
34
"github.com/containers/podman/v5/pkg/lookup"
35
"github.com/containers/podman/v5/pkg/rootless"
36
"github.com/containers/podman/v5/pkg/selinux"
37
"github.com/containers/podman/v5/pkg/systemd/notifyproxy"
38
"github.com/containers/podman/v5/pkg/util"
39
"github.com/containers/storage"
40
"github.com/containers/storage/pkg/chrootarchive"
41
"github.com/containers/storage/pkg/fileutils"
42
"github.com/containers/storage/pkg/idmap"
43
"github.com/containers/storage/pkg/idtools"
44
"github.com/containers/storage/pkg/lockfile"
45
"github.com/containers/storage/pkg/mount"
46
"github.com/coreos/go-systemd/v22/daemon"
47
securejoin "github.com/cyphar/filepath-securejoin"
48
spec "github.com/opencontainers/runtime-spec/specs-go"
49
"github.com/opencontainers/runtime-tools/generate"
50
"github.com/opencontainers/selinux/go-selinux/label"
51
"github.com/sirupsen/logrus"
52
"golang.org/x/exp/slices"
53
"golang.org/x/sys/unix"
57
// name of the directory holding the artifacts
58
artifactsDir = "artifacts"
59
execDirPermission = 0755
60
preCheckpointDir = "pre-checkpoint"
63
// rootFsSize gets the size of the container, which can be divided notionally
64
// into two parts. The first is the part of its size that can be directly
65
// attributed to its base image, if it has one. The second is the set of
66
// changes that the container has had made relative to that base image. Both
67
// parts include some ancillary data, and we count that, too.
68
func (c *Container) rootFsSize() (int64, error) {
69
if c.config.Rootfs != "" {
72
if c.runtime.store == nil {
76
container, err := c.runtime.store.Container(c.ID())
82
if container.ImageID != "" {
83
size, err = c.runtime.store.ImageSize(container.ImageID)
89
layerSize, err := c.runtime.store.ContainerSize(c.ID())
91
return size + layerSize, err
94
// rwSize gets the combined size of the writeable layer and any ancillary data
95
// for a given container.
96
func (c *Container) rwSize() (int64, error) {
97
if c.config.Rootfs != "" {
98
size, err := util.SizeOfPath(c.config.Rootfs)
99
return int64(size), err
102
layerSize, err := c.runtime.store.ContainerSize(c.ID())
107
return layerSize, nil
110
// bundlePath returns the path to the container's root filesystem - where the OCI spec will be
111
// placed, amongst other things
112
func (c *Container) bundlePath() string {
113
if c.runtime.storageConfig.TransientStore {
114
return c.state.RunDir
116
return c.config.StaticDir
119
// ControlSocketPath returns the path to the container's control socket for things like tty
121
func (c *Container) ControlSocketPath() string {
122
return filepath.Join(c.bundlePath(), "ctl")
125
// CheckpointVolumesPath returns the path to the directory containing the checkpointed volumes
126
func (c *Container) CheckpointVolumesPath() string {
127
return filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
130
// CheckpointPath returns the path to the directory containing the checkpoint
131
func (c *Container) CheckpointPath() string {
132
return filepath.Join(c.bundlePath(), metadata.CheckpointDirectory)
135
// PreCheckpointPath returns the path to the directory containing the pre-checkpoint-images
136
func (c *Container) PreCheckPointPath() string {
137
return filepath.Join(c.bundlePath(), preCheckpointDir)
140
// AttachSocketPath retrieves the path of the container's attach socket
141
func (c *Container) AttachSocketPath() (string, error) {
142
return c.ociRuntime.AttachSocketPath(c)
145
// exitFilePath gets the path to the container's exit file
146
func (c *Container) exitFilePath() (string, error) {
147
return c.ociRuntime.ExitFilePath(c)
150
func (c *Container) oomFilePath() (string, error) {
151
return c.ociRuntime.OOMFilePath(c)
154
// Wait for the container's exit file to appear.
155
// When it does, update our state based on it.
156
func (c *Container) waitForExitFileAndSync() error {
157
exitFile, err := c.exitFilePath()
162
chWait := make(chan error)
165
_, err = cutil.WaitForFile(exitFile, chWait, time.Second*5)
167
// Exit file did not appear
169
c.state.ExitCode = -1
170
c.state.FinishedTime = time.Now()
171
c.state.State = define.ContainerStateStopped
173
if err2 := c.save(); err2 != nil {
174
logrus.Errorf("Saving container %s state: %v", c.ID(), err2)
180
if err := c.checkExitFile(); err != nil {
187
// Handle the container exit file.
188
// The exit file is used to supply container exit time and exit code.
189
// This assumes the exit file already exists.
190
// Also check for an oom file to determine if the container was oom killed or not.
191
func (c *Container) handleExitFile(exitFile string, fi os.FileInfo) error {
192
c.state.FinishedTime = ctime.Created(fi)
193
statusCodeStr, err := os.ReadFile(exitFile)
195
return fmt.Errorf("failed to read exit file for container %s: %w", c.ID(), err)
197
statusCode, err := strconv.Atoi(string(statusCodeStr))
199
return fmt.Errorf("converting exit status code (%q, err) for container %s to int: %w",
200
c.ID(), statusCodeStr, err)
202
c.state.ExitCode = int32(statusCode)
204
oomFilePath, err := c.oomFilePath()
208
if err = fileutils.Exists(oomFilePath); err == nil {
209
c.state.OOMKilled = true
212
c.state.Exited = true
214
// Write an event for the container's death
215
c.newContainerExitedEvent(c.state.ExitCode)
217
return c.runtime.state.AddContainerExitCode(c.ID(), c.state.ExitCode)
220
func (c *Container) shouldRestart() bool {
221
if c.config.HealthCheckOnFailureAction == define.HealthCheckOnFailureActionRestart {
222
isUnhealthy, err := c.isUnhealthy()
224
logrus.Errorf("Checking if container is unhealthy: %v", err)
225
} else if isUnhealthy {
230
// Explicitly stopped by user, do not restart again.
231
if c.state.StoppedByUser {
235
// If we did not get a restart policy match, return false
236
// Do the same if we're not a policy that restarts.
237
if !c.state.RestartPolicyMatch ||
238
c.config.RestartPolicy == define.RestartPolicyNo ||
239
c.config.RestartPolicy == define.RestartPolicyNone {
243
// If we're RestartPolicyOnFailure, we need to check retries and exit
245
if c.config.RestartPolicy == define.RestartPolicyOnFailure {
246
if c.state.ExitCode == 0 {
250
// If we don't have a max retries set, continue
251
if c.config.RestartRetries > 0 {
252
if c.state.RestartCount >= c.config.RestartRetries {
260
// Handle container restart policy.
261
// This is called when a container has exited, and was not explicitly stopped by
262
// an API call to stop the container or pod it is in.
263
func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr error) {
264
if !c.shouldRestart() {
267
logrus.Debugf("Restarting container %s due to restart policy %s", c.ID(), c.config.RestartPolicy)
269
// Need to check if dependencies are alive.
270
if err := c.checkDependenciesAndHandleError(); err != nil {
274
if c.config.HealthCheckConfig != nil {
275
if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
280
// Is the container running again?
281
// If so, we don't have to do anything
282
if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
284
} else if c.state.State == define.ContainerStateUnknown {
285
return false, fmt.Errorf("invalid container state encountered in restart attempt: %w", define.ErrInternal)
288
c.newContainerEvent(events.Restart)
290
// Increment restart count
291
c.state.RestartCount++
292
logrus.Debugf("Container %s now on retry %d", c.ID(), c.state.RestartCount)
293
if err := c.save(); err != nil {
299
if err := c.cleanup(ctx); err != nil {
300
logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
305
// Always teardown the network, trying to reuse the netns has caused
306
// a significant amount of bugs in this code here. It also never worked
307
// for containers with user namespaces. So once and for all simplify this
308
// by never reusing the netns. Originally this was done to have a faster
309
// restart of containers but with netavark now we are much faster so it
310
// shouldn't be that noticeable in practice. It also makes more sense to
311
// reconfigure the netns as it is likely that the container exited due
312
// some broken network state in which case reusing would just cause more
314
if err := c.cleanupNetwork(); err != nil {
318
if err := c.prepare(); err != nil {
322
if c.state.State == define.ContainerStateStopped {
323
// Reinitialize the container if we need to
324
if err := c.reinit(ctx, true); err != nil {
327
} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
328
// Initialize the container
329
if err := c.init(ctx, true); err != nil {
333
if err := c.start(ctx); err != nil {
339
// Ensure that the container is in a specific state or state.
340
// Returns true if the container is in one of the given states,
341
// or false otherwise.
342
func (c *Container) ensureState(states ...define.ContainerStatus) bool {
343
for _, state := range states {
344
if state == c.state.State {
351
// Sync this container with on-disk state and runtime status
352
// Should only be called with container lock held
353
// This function should suffice to ensure a container's state is accurate and
354
// it is valid for use.
355
func (c *Container) syncContainer() error {
356
if err := c.runtime.state.UpdateContainer(c); err != nil {
359
// If runtime knows about the container, update its status in runtime
360
// And then save back to disk
361
if c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateStopping, define.ContainerStatePaused) {
362
oldState := c.state.State
364
if err := c.checkExitFile(); err != nil {
368
// Only save back to DB if state changed
369
if c.state.State != oldState {
370
// Check for a restart policy match
371
if c.config.RestartPolicy != define.RestartPolicyNone && c.config.RestartPolicy != define.RestartPolicyNo &&
372
(oldState == define.ContainerStateRunning || oldState == define.ContainerStatePaused) &&
373
(c.state.State == define.ContainerStateStopped || c.state.State == define.ContainerStateExited) &&
374
!c.state.StoppedByUser {
375
c.state.RestartPolicyMatch = true
378
if err := c.save(); err != nil {
385
return fmt.Errorf("container %s is not valid: %w", c.ID(), define.ErrCtrRemoved)
391
func (c *Container) setupStorageMapping(dest, from *storage.IDMappingOptions) {
393
// If we are creating a container inside a pod, we always want to inherit the
394
// userns settings from the infra container. So clear the auto userns settings
395
// so that we don't request storage for a new uid/gid map.
396
if c.PodID() != "" && !c.IsInfra() {
397
dest.AutoUserNs = false
400
overrides := c.getUserOverrides()
401
dest.AutoUserNsOpts.PasswdFile = overrides.ContainerEtcPasswdPath
402
dest.AutoUserNsOpts.GroupFile = overrides.ContainerEtcGroupPath
403
if c.config.User != "" {
404
initialSize := uint32(0)
405
parts := strings.Split(c.config.User, ":")
406
for _, p := range parts {
407
s, err := strconv.ParseUint(p, 10, 32)
408
if err == nil && uint32(s) > initialSize {
409
initialSize = uint32(s)
412
dest.AutoUserNsOpts.InitialSize = initialSize + 1
414
} else if c.config.Spec.Linux != nil {
416
for _, r := range c.config.Spec.Linux.UIDMappings {
418
ContainerID: int(r.ContainerID),
419
HostID: int(r.HostID),
422
dest.UIDMap = append(dest.UIDMap, u)
425
for _, r := range c.config.Spec.Linux.GIDMappings {
427
ContainerID: int(r.ContainerID),
428
HostID: int(r.HostID),
431
dest.GIDMap = append(dest.GIDMap, g)
433
dest.HostUIDMapping = false
434
dest.HostGIDMapping = false
438
// Create container root filesystem for use
439
func (c *Container) setupStorage(ctx context.Context) error {
441
return fmt.Errorf("container %s is not valid: %w", c.ID(), define.ErrCtrRemoved)
444
if c.state.State != define.ContainerStateConfigured {
445
return fmt.Errorf("container %s must be in Configured state to have storage set up: %w", c.ID(), define.ErrCtrStateInvalid)
448
// Need both an image ID and image name, plus a bool telling us whether to use the image configuration
449
if c.config.Rootfs == "" && (c.config.RootfsImageID == "" || c.config.RootfsImageName == "") {
450
return fmt.Errorf("must provide image ID and image name to use an image: %w", define.ErrInvalidArg)
452
options := storage.ContainerOptions{
453
IDMappingOptions: storage.IDMappingOptions{
454
HostUIDMapping: true,
455
HostGIDMapping: true,
457
LabelOpts: c.config.LabelOpts,
460
options.StorageOpt = c.config.StorageOpts
462
if c.restoreFromCheckpoint && c.config.ProcessLabel != "" && c.config.MountLabel != "" {
463
// If restoring from a checkpoint, the root file-system needs
464
// to be mounted with the same SELinux labels as it was mounted
465
// previously. But only if both labels have been set. For
466
// privileged containers or '--ipc host' only ProcessLabel will
467
// be set and so we will skip it for cases like that.
468
if options.Flags == nil {
469
options.Flags = make(map[string]interface{})
471
options.Flags["ProcessLabel"] = c.config.ProcessLabel
472
options.Flags["MountLabel"] = c.config.MountLabel
474
if c.config.Privileged {
475
privOpt := func(opt string) bool {
476
for _, privopt := range []string{"nodev", "nosuid", "noexec"} {
484
defOptions, err := storage.GetMountOptions(c.runtime.store.GraphDriverName(), c.runtime.store.GraphOptions())
486
return fmt.Errorf("getting default mount options: %w", err)
488
var newOptions []string
489
for _, opt := range defOptions {
491
newOptions = append(newOptions, opt)
494
options.MountOpts = newOptions
497
options.Volatile = c.config.Volatile
499
c.setupStorageMapping(&options.IDMappingOptions, &c.config.IDMappings)
501
// Unless the user has specified a name, use a randomly generated one.
502
// Note that name conflicts may occur (see #11735), so we need to loop.
503
generateName := c.config.Name == ""
504
var containerInfo ContainerInfo
505
var containerInfoErr error
508
name, err := c.runtime.generateName()
514
containerInfo, containerInfoErr = c.runtime.storageService.CreateContainerStorage(ctx, c.runtime.imageContext, c.config.RootfsImageName, c.config.RootfsImageID, c.config.Name, c.config.ID, options)
516
if !generateName || !errors.Is(containerInfoErr, storage.ErrDuplicateName) {
520
if containerInfoErr != nil {
521
if errors.Is(containerInfoErr, storage.ErrDuplicateName) {
522
if _, err := c.runtime.LookupContainer(c.config.Name); errors.Is(err, define.ErrNoSuchCtr) {
523
return fmt.Errorf("creating container storage: %w by an external entity", containerInfoErr)
526
return fmt.Errorf("creating container storage: %w", containerInfoErr)
529
// Only reconfig IDMappings if layer was mounted from storage.
530
// If it's an external overlay do not reset IDmappings.
531
if !c.config.RootfsOverlay {
532
c.config.IDMappings.UIDMap = containerInfo.UIDMap
533
c.config.IDMappings.GIDMap = containerInfo.GIDMap
536
processLabel, err := c.processLabel(containerInfo.ProcessLabel)
540
c.config.ProcessLabel = processLabel
541
c.config.MountLabel = containerInfo.MountLabel
542
c.config.StaticDir = containerInfo.Dir
543
c.state.RunDir = containerInfo.RunDir
545
if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
546
if err := idtools.SafeChown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil {
550
if err := idtools.SafeChown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil {
555
// Set the default Entrypoint and Command
556
if containerInfo.Config != nil {
557
// Set CMD in the container to the default configuration only if ENTRYPOINT is not set by the user.
558
if c.config.Entrypoint == nil && c.config.Command == nil {
559
c.config.Command = containerInfo.Config.Config.Cmd
561
if c.config.Entrypoint == nil {
562
c.config.Entrypoint = containerInfo.Config.Config.Entrypoint
566
artifacts := filepath.Join(c.config.StaticDir, artifactsDir)
567
if err := os.MkdirAll(artifacts, 0755); err != nil {
568
return fmt.Errorf("creating artifacts directory: %w", err)
574
func (c *Container) processLabel(processLabel string) (string, error) {
575
if !c.Systemd() && !c.ociRuntime.SupportsKVM() {
576
return processLabel, nil
578
ctrSpec, err := c.specFromState()
582
label, ok := ctrSpec.Annotations[define.InspectAnnotationLabel]
583
if !ok || !strings.Contains(label, "type:") {
585
case c.ociRuntime.SupportsKVM():
586
return selinux.KVMLabel(processLabel)
588
return selinux.InitLabel(processLabel)
591
return processLabel, nil
594
// Tear down a container's storage prior to removal
595
func (c *Container) teardownStorage() error {
596
if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused) {
597
return fmt.Errorf("cannot remove storage for container %s as it is running or paused: %w", c.ID(), define.ErrCtrStateInvalid)
600
artifacts := filepath.Join(c.config.StaticDir, artifactsDir)
601
if err := os.RemoveAll(artifacts); err != nil {
602
return fmt.Errorf("removing container %s artifacts %q: %w", c.ID(), artifacts, err)
605
if err := c.cleanupStorage(); err != nil {
606
return fmt.Errorf("failed to clean up container %s storage: %w", c.ID(), err)
609
if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil {
610
// If the container has already been removed, warn but do not
611
// error - we wanted it gone, it is already gone.
612
// Potentially another tool using containers/storage already
614
if errors.Is(err, storage.ErrNotAContainer) || errors.Is(err, storage.ErrContainerUnknown) {
615
logrus.Infof("Storage for container %s already removed", c.ID())
619
return fmt.Errorf("removing container %s root filesystem: %w", c.ID(), err)
625
// Reset resets state fields to default values.
626
// It is performed before a refresh and clears the state after a reboot.
627
// It does not save the results - assumes the database will do that for us.
628
func resetContainerState(state *ContainerState) {
631
state.Mountpoint = ""
632
state.Mounted = false
634
// Almost all states are reset to either Configured or Exited,
635
// except ContainerStateRemoving which is preserved.
637
case define.ContainerStateStopped, define.ContainerStateExited, define.ContainerStateStopping, define.ContainerStateRunning, define.ContainerStatePaused:
638
// All containers that ran at any point during the last boot
639
// must be placed in the Exited state.
640
state.State = define.ContainerStateExited
641
case define.ContainerStateConfigured, define.ContainerStateCreated:
642
state.State = define.ContainerStateConfigured
643
case define.ContainerStateUnknown:
644
// Something really strange must have happened to get us here.
645
// Reset to configured, maybe the reboot cleared things up?
646
state.State = define.ContainerStateConfigured
648
state.ExecSessions = make(map[string]*ExecSession)
649
state.LegacyExecSessions = nil
650
state.BindMounts = make(map[string]string)
651
state.StoppedByUser = false
652
state.RestartPolicyMatch = false
653
state.RestartCount = 0
654
state.Checkpointed = false
655
state.Restored = false
656
state.CheckpointedTime = time.Time{}
657
state.RestoredTime = time.Time{}
658
state.CheckpointPath = ""
659
state.CheckpointLog = ""
660
state.RestoreLog = ""
661
state.StartupHCPassed = false
662
state.StartupHCSuccessCount = 0
663
state.StartupHCFailureCount = 0
665
state.NetworkStatus = nil
668
// Refresh refreshes the container's state after a restart.
669
// Refresh cannot perform any operations that would lock another container.
670
// We cannot guarantee any other container has a valid lock at the time it is
672
func (c *Container) refresh() error {
673
// Don't need a full sync, but we do need to update from the database to
674
// pick up potentially-missing container state
675
if err := c.runtime.state.UpdateContainer(c); err != nil {
680
return fmt.Errorf("container %s is not valid - may have been removed: %w", c.ID(), define.ErrCtrRemoved)
683
// We need to get the container's temporary directory from c/storage
684
// It was lost in the reboot and must be recreated
685
dir, err := c.runtime.storageService.GetRunDir(c.ID())
687
return fmt.Errorf("retrieving temporary directory for container %s: %w", c.ID(), err)
691
if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 {
692
info, err := os.Stat(c.runtime.config.Engine.TmpDir)
696
if err := os.Chmod(c.runtime.config.Engine.TmpDir, info.Mode()|0111); err != nil {
699
root := filepath.Join(c.runtime.config.Engine.TmpDir, "containers-root", c.ID())
700
if err := os.MkdirAll(root, 0755); err != nil {
701
return fmt.Errorf("creating userNS tmpdir for container %s: %w", c.ID(), err)
703
if err := idtools.SafeChown(root, c.RootUID(), c.RootGID()); err != nil {
708
// We need to pick up a new lock
709
lock, err := c.runtime.lockManager.AllocateAndRetrieveLock(c.config.LockID)
711
return fmt.Errorf("acquiring lock %d for container %s: %w", c.config.LockID, c.ID(), err)
715
c.state.NetworkStatus = nil
717
// Rewrite the config if necessary.
718
// Podman 4.0 uses a new port format in the config.
719
// getContainerConfigFromDB() already converted the old ports to the new one
720
// but it did not write the config to the db back for performance reasons.
721
// If a rewrite must happen the config.rewrite field is set to true.
722
if c.config.rewrite {
723
// SafeRewriteContainerConfig must be used with care. Make sure to not change config fields by accident.
724
if err := c.runtime.state.SafeRewriteContainerConfig(c, "", "", c.config); err != nil {
725
return fmt.Errorf("failed to rewrite the config for container %s: %w", c.config.ID, err)
727
c.config.rewrite = false
730
if err := c.save(); err != nil {
731
return fmt.Errorf("refreshing state for container %s: %w", c.ID(), err)
734
// Remove ctl and attach files, which may persist across reboot
735
if err := c.removeConmonFiles(); err != nil {
742
// Remove conmon attach socket and terminal resize FIFO
743
// This is necessary for restarting containers
744
func (c *Container) removeConmonFiles() error {
745
// Files are allowed to not exist, so ignore ENOENT
746
attachFile, err := c.AttachSocketPath()
748
return fmt.Errorf("failed to get attach socket path for container %s: %w", c.ID(), err)
751
if err := os.Remove(attachFile); err != nil && !os.IsNotExist(err) {
752
return fmt.Errorf("removing container %s attach file: %w", c.ID(), err)
755
ctlFile := filepath.Join(c.bundlePath(), "ctl")
756
if err := os.Remove(ctlFile); err != nil && !os.IsNotExist(err) {
757
return fmt.Errorf("removing container %s ctl file: %w", c.ID(), err)
760
winszFile := filepath.Join(c.bundlePath(), "winsz")
761
if err := os.Remove(winszFile); err != nil && !os.IsNotExist(err) {
762
return fmt.Errorf("removing container %s winsz file: %w", c.ID(), err)
765
// Remove the exit file so we don't leak memory in tmpfs
766
exitFile, err := c.exitFilePath()
770
if err := os.Remove(exitFile); err != nil && !os.IsNotExist(err) {
771
return fmt.Errorf("removing container %s exit file: %w", c.ID(), err)
774
// Remove the oom file
775
oomFile, err := c.oomFilePath()
779
if err := os.Remove(oomFile); err != nil && !errors.Is(err, fs.ErrNotExist) {
780
return fmt.Errorf("removing container %s oom file: %w", c.ID(), err)
786
func (c *Container) export(out io.Writer) error {
787
mountPoint := c.state.Mountpoint
788
if !c.state.Mounted {
789
containerMount, err := c.runtime.store.Mount(c.ID(), c.config.MountLabel)
791
return fmt.Errorf("mounting container %q: %w", c.ID(), err)
793
mountPoint = containerMount
795
if _, err := c.runtime.store.Unmount(c.ID(), false); err != nil {
796
logrus.Errorf("Unmounting container %q: %v", c.ID(), err)
801
input, err := chrootarchive.Tar(mountPoint, nil, mountPoint)
803
return fmt.Errorf("reading container directory %q: %w", c.ID(), err)
806
_, err = io.Copy(out, input)
810
// Get path of artifact with a given name for this container
811
func (c *Container) getArtifactPath(name string) string {
812
return filepath.Join(c.config.StaticDir, artifactsDir, name)
815
// save container state to the database
816
func (c *Container) save() error {
817
if err := c.runtime.state.SaveContainer(c); err != nil {
818
return fmt.Errorf("saving container %s state: %w", c.ID(), err)
823
// Checks the container is in the right state, then initializes the container in preparation to start the container.
824
// If recursive is true, each of the container's dependencies will be started.
825
// Otherwise, this function will return with error if there are dependencies of this container that aren't running.
826
func (c *Container) prepareToStart(ctx context.Context, recursive bool) (retErr error) {
827
// Container must be created or stopped to be started
828
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateStopped, define.ContainerStateExited) {
829
return fmt.Errorf("container %s must be in Created or Stopped state to be started: %w", c.ID(), define.ErrCtrStateInvalid)
833
if err := c.checkDependenciesAndHandleError(); err != nil {
837
if err := c.startDependencies(ctx); err != nil {
844
if err := c.cleanup(ctx); err != nil {
845
logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
850
if err := c.prepare(); err != nil {
854
if c.state.State == define.ContainerStateStopped {
855
// Reinitialize the container if we need to
856
if err := c.reinit(ctx, false); err != nil {
859
} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
860
// Or initialize it if necessary
861
if err := c.init(ctx, false); err != nil {
868
// checks dependencies are running and prints a helpful message
869
func (c *Container) checkDependenciesAndHandleError() error {
870
notRunning, err := c.checkDependenciesRunning()
872
return fmt.Errorf("checking dependencies for container %s: %w", c.ID(), err)
874
if len(notRunning) > 0 {
875
depString := strings.Join(notRunning, ",")
876
return fmt.Errorf("some dependencies of container %s are not started: %s: %w", c.ID(), depString, define.ErrCtrStateInvalid)
882
// Recursively start all dependencies of a container so the container can be started.
883
func (c *Container) startDependencies(ctx context.Context) error {
884
depCtrIDs := c.Dependencies()
885
if len(depCtrIDs) == 0 {
889
depVisitedCtrs := make(map[string]*Container)
890
if err := c.getAllDependencies(depVisitedCtrs); err != nil {
891
return fmt.Errorf("starting dependency for container %s: %w", c.ID(), err)
894
// Because of how Go handles passing slices through functions, a slice cannot grow between function calls
895
// without clunky syntax. Circumnavigate this by translating the map to a slice for buildContainerGraph
896
depCtrs := make([]*Container, 0)
897
for _, ctr := range depVisitedCtrs {
898
depCtrs = append(depCtrs, ctr)
901
// Build a dependency graph of containers
902
graph, err := BuildContainerGraph(depCtrs)
904
return fmt.Errorf("generating dependency graph for container %s: %w", c.ID(), err)
907
// If there are no containers without dependencies, we can't start
909
if len(graph.noDepNodes) == 0 {
910
// we have no dependencies that need starting, go ahead and return
911
if len(graph.nodes) == 0 {
914
return fmt.Errorf("all dependencies have dependencies of %s: %w", c.ID(), define.ErrNoSuchCtr)
917
ctrErrors := make(map[string]error)
918
ctrsVisited := make(map[string]bool)
920
// Traverse the graph beginning at nodes with no dependencies
921
for _, node := range graph.noDepNodes {
922
startNode(ctx, node, false, ctrErrors, ctrsVisited, true)
925
if len(ctrErrors) > 0 {
926
logrus.Errorf("Starting some container dependencies")
927
for _, e := range ctrErrors {
928
logrus.Errorf("%q", e)
930
return fmt.Errorf("starting some containers: %w", define.ErrInternal)
935
// getAllDependencies is a precursor to starting dependencies.
936
// To start a container with all of its dependencies, we need to recursively find all dependencies
937
// a container has, as well as each of those containers' dependencies, and so on
938
// To do so, keep track of containers already visited (so there aren't redundant state lookups),
939
// and recursively search until we have reached the leafs of every dependency node.
940
// Since we need to start all dependencies for our original container to successfully start, we propagate any errors
941
// in looking up dependencies.
942
// Note: this function is currently meant as a robust solution to a narrow problem: start an infra-container when
943
// a container in the pod is run. It has not been tested for performance past one level, so expansion of recursive start
944
// must be tested first.
945
func (c *Container) getAllDependencies(visited map[string]*Container) error {
946
depIDs := c.Dependencies()
947
if len(depIDs) == 0 {
950
for _, depID := range depIDs {
951
if _, ok := visited[depID]; !ok {
952
dep, err := c.runtime.state.Container(depID)
956
status, err := dep.State()
960
// if the dependency is already running, we can assume its dependencies are also running
961
// so no need to add them to those we need to start
962
if status != define.ContainerStateRunning {
964
if err := dep.getAllDependencies(visited); err != nil {
973
// Check if a container's dependencies are running
974
// Returns a []string containing the IDs of dependencies that are not running
975
func (c *Container) checkDependenciesRunning() ([]string, error) {
976
deps := c.Dependencies()
977
notRunning := []string{}
979
// We were not passed a set of dependency containers
981
depCtrs := make(map[string]*Container, len(deps))
982
for _, dep := range deps {
983
// Get the dependency container
984
depCtr, err := c.runtime.state.Container(dep)
986
return nil, fmt.Errorf("retrieving dependency %s of container %s from state: %w", dep, c.ID(), err)
990
state, err := depCtr.State()
992
return nil, fmt.Errorf("retrieving state of dependency %s of container %s: %w", dep, c.ID(), err)
994
if state != define.ContainerStateRunning && !depCtr.config.IsInfra {
995
notRunning = append(notRunning, dep)
997
depCtrs[dep] = depCtr
1000
return notRunning, nil
1003
func (c *Container) completeNetworkSetup() error {
1004
netDisabled, err := c.NetworkDisabled()
1009
// with net=none we still want to set up /etc/hosts
1012
if c.config.NetNsCtr != "" {
1015
if c.config.PostConfigureNetNS {
1016
if err := c.syncContainer(); err != nil {
1019
if err := c.runtime.setupNetNS(c); err != nil {
1022
if err := c.save(); err != nil {
1026
// add /etc/hosts entries
1027
if err := c.addHosts(); err != nil {
1031
return c.addResolvConf()
1034
// Initialize a container, creating it in the runtime
1035
func (c *Container) init(ctx context.Context, retainRetries bool) error {
1036
// Unconditionally remove conmon temporary files.
1037
// We've been running into far too many issues where they block startup.
1038
if err := c.removeConmonFiles(); err != nil {
1042
// Generate the OCI newSpec
1043
newSpec, cleanupFunc, err := c.generateSpec(ctx)
1049
// Make sure the workdir exists while initializing container
1050
if err := c.resolveWorkDir(); err != nil {
1054
// Save the OCI newSpec to disk
1055
if err := c.saveSpec(newSpec); err != nil {
1059
for _, v := range c.config.NamedVolumes {
1060
if err := c.fixVolumePermissions(v); err != nil {
1065
// To ensure that we don't lose track of Conmon if hit by a SIGTERM
1066
// in the middle of setting up the container, inhibit shutdown signals
1067
// until after we save Conmon's PID to the state.
1068
// TODO: This can likely be removed once conmon-rs support merges.
1070
defer shutdown.Uninhibit()
1072
// If the container is part of a pod, make sure the pod cgroup is created before the container
1073
// so the limits can be applied.
1074
if c.PodID() != "" {
1075
pod, err := c.runtime.LookupPod(c.PodID())
1080
if _, err := c.runtime.platformMakePod(pod, &pod.config.ResourceLimits); err != nil {
1085
// With the spec complete, do an OCI create
1086
if _, err = c.ociRuntime.CreateContainer(c, nil); err != nil {
1090
logrus.Debugf("Created container %s in OCI runtime", c.ID())
1092
// Remove any exec sessions leftover from a potential prior run.
1093
if len(c.state.ExecSessions) > 0 {
1094
if err := c.runtime.state.RemoveContainerExecSessions(c); err != nil {
1095
logrus.Errorf("Removing container %s exec sessions from DB: %v", c.ID(), err)
1097
c.state.ExecSessions = make(map[string]*ExecSession)
1100
c.state.Checkpointed = false
1101
c.state.Restored = false
1102
c.state.CheckpointedTime = time.Time{}
1103
c.state.RestoredTime = time.Time{}
1104
c.state.CheckpointPath = ""
1105
c.state.CheckpointLog = ""
1106
c.state.RestoreLog = ""
1107
c.state.ExitCode = 0
1108
c.state.Exited = false
1109
c.state.State = define.ContainerStateCreated
1110
c.state.StoppedByUser = false
1111
c.state.RestartPolicyMatch = false
1112
c.state.StartupHCFailureCount = 0
1113
c.state.StartupHCSuccessCount = 0
1114
c.state.StartupHCPassed = false
1117
c.state.RestartCount = 0
1120
// bugzilla.redhat.com/show_bug.cgi?id=2144754:
1121
// In case of a restart, make sure to remove the healthcheck log to
1122
// have a clean state.
1123
if path := c.healthCheckLogPath(); path != "" {
1124
if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
1129
if err := c.save(); err != nil {
1133
if c.config.HealthCheckConfig != nil {
1134
timer := c.config.HealthCheckConfig.Interval.String()
1135
if c.config.StartupHealthCheckConfig != nil {
1136
timer = c.config.StartupHealthCheckConfig.Interval.String()
1138
if err := c.createTimer(timer, c.config.StartupHealthCheckConfig != nil); err != nil {
1143
defer c.newContainerEvent(events.Init)
1144
return c.completeNetworkSetup()
1147
// Clean up a container in the OCI runtime.
1148
// Deletes the container in the runtime, and resets its state to Exited.
1149
// The container can be restarted cleanly after this.
1150
func (c *Container) cleanupRuntime(ctx context.Context) error {
1151
// If the container is not ContainerStateStopped or
1152
// ContainerStateCreated, do nothing.
1153
if !c.ensureState(define.ContainerStateStopped, define.ContainerStateCreated) {
1157
// We may be doing this redundantly for some call paths but we need to
1158
// make sure the exit code is being read at this point.
1159
if err := c.checkExitFile(); err != nil {
1163
// If necessary, delete attach and ctl files
1164
if err := c.removeConmonFiles(); err != nil {
1168
if err := c.delete(ctx); err != nil {
1172
// If we were Stopped, we are now Exited, as we've removed ourself
1173
// from the runtime.
1174
// If we were Created, we are now Configured.
1175
if c.state.State == define.ContainerStateStopped {
1176
c.state.State = define.ContainerStateExited
1177
} else if c.state.State == define.ContainerStateCreated {
1178
c.state.State = define.ContainerStateConfigured
1182
if err := c.save(); err != nil {
1187
logrus.Debugf("Successfully cleaned up container %s", c.ID())
1192
// Reinitialize a container.
1193
// Deletes and recreates a container in the runtime.
1194
// Should only be done on ContainerStateStopped containers.
1195
// Not necessary for ContainerStateExited - the container has already been
1196
// removed from the runtime, so init() can proceed freely.
1197
func (c *Container) reinit(ctx context.Context, retainRetries bool) error {
1198
logrus.Debugf("Recreating container %s in OCI runtime", c.ID())
1200
if err := c.cleanupRuntime(ctx); err != nil {
1204
// Initialize the container again
1205
return c.init(ctx, retainRetries)
1208
// Initialize (if necessary) and start a container
1209
// Performs all necessary steps to start a container that is not running
1210
// Does not lock or check validity
1211
func (c *Container) initAndStart(ctx context.Context) (retErr error) {
1212
// If we are ContainerStateUnknown, throw an error
1213
if c.state.State == define.ContainerStateUnknown {
1214
return fmt.Errorf("container %s is in an unknown state: %w", c.ID(), define.ErrCtrStateInvalid)
1215
} else if c.state.State == define.ContainerStateRemoving {
1216
return fmt.Errorf("cannot start container %s as it is being removed: %w", c.ID(), define.ErrCtrStateInvalid)
1219
// If we are running, do nothing
1220
if c.state.State == define.ContainerStateRunning {
1223
// If we are paused, throw an error
1224
if c.state.State == define.ContainerStatePaused {
1225
return fmt.Errorf("cannot start paused container %s: %w", c.ID(), define.ErrCtrStateInvalid)
1230
if err := c.cleanup(ctx); err != nil {
1231
logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
1236
if err := c.prepare(); err != nil {
1240
// If we are ContainerStateStopped we need to remove from runtime
1241
// And reset to ContainerStateConfigured
1242
if c.state.State == define.ContainerStateStopped {
1243
logrus.Debugf("Recreating container %s in OCI runtime", c.ID())
1245
if err := c.reinit(ctx, false); err != nil {
1248
} else if c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
1249
if err := c.init(ctx, false); err != nil {
1254
// Now start the container
1258
// Internal, non-locking function to start a container
1259
func (c *Container) start(ctx context.Context) error {
1260
if c.config.Spec.Process != nil {
1261
logrus.Debugf("Starting container %s with command %v", c.ID(), c.config.Spec.Process.Args)
1264
if err := c.ociRuntime.StartContainer(c); err != nil {
1267
logrus.Debugf("Started container %s", c.ID())
1269
c.state.State = define.ContainerStateRunning
1271
// Unless being ignored, set the MAINPID to conmon.
1272
if c.config.SdNotifyMode != define.SdNotifyModeIgnore {
1273
payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID)
1274
if c.config.SdNotifyMode == define.SdNotifyModeConmon {
1275
// Also send the READY message for the "conmon" policy.
1277
payload += daemon.SdNotifyReady
1279
if err := notifyproxy.SendMessage(c.config.SdNotifySocket, payload); err != nil {
1280
logrus.Errorf("Notifying systemd of Conmon PID: %s", err.Error())
1282
logrus.Debugf("Notify sent successfully")
1286
// Check if healthcheck is not nil and --no-healthcheck option is not set.
1287
// If --no-healthcheck is set Test will be always set to `[NONE]` so no need
1288
// to update status in such case.
1289
if c.config.HealthCheckConfig != nil && !(len(c.config.HealthCheckConfig.Test) == 1 && c.config.HealthCheckConfig.Test[0] == "NONE") {
1290
if err := c.updateHealthStatus(define.HealthCheckStarting); err != nil {
1293
if err := c.startTimer(c.config.StartupHealthCheckConfig != nil); err != nil {
1298
c.newContainerEvent(events.Start)
1300
if err := c.save(); err != nil {
1304
if c.config.SdNotifyMode != define.SdNotifyModeHealthy {
1308
// Wait for the container to turn healthy before sending the READY
1309
// message. This implies that we need to unlock and re-lock the
1316
if _, err := c.WaitForConditionWithInterval(ctx, DefaultWaitInterval, define.HealthCheckHealthy); err != nil {
1320
if err := notifyproxy.SendMessage(c.config.SdNotifySocket, daemon.SdNotifyReady); err != nil {
1321
logrus.Errorf("Sending READY message after turning healthy: %s", err.Error())
1323
logrus.Debugf("Notify sent successfully")
1328
// Internal, non-locking function to stop container
1329
func (c *Container) stop(timeout uint) error {
1330
logrus.Debugf("Stopping ctr %s (timeout %d)", c.ID(), timeout)
1332
// If the container is running in a PID Namespace, then killing the
1333
// primary pid is enough to kill the container. If it is not running in
1334
// a pid namespace then the OCI Runtime needs to kill ALL processes in
1335
// the container's cgroup in order to make sure the container is stopped.
1336
all := !c.hasNamespace(spec.PIDNamespace)
1337
// We can't use --all if Cgroups aren't present.
1338
// Rootless containers with Cgroups v1 and NoCgroups are both cases
1339
// where this can happen.
1341
if c.config.NoCgroups {
1343
} else if rootless.IsRootless() {
1344
// Only do this check if we need to
1345
unified, err := cgroups.IsCgroup2UnifiedMode()
1355
// OK, the following code looks a bit weird but we have to make sure we can stop
1356
// containers with the restart policy always, to do this we have to set
1357
// StoppedByUser even when there is nothing to stop right now. This is due to the
1358
// cleanup process waiting on the container lock and then afterwards restarts it.
1359
// shouldRestart() then checks for StoppedByUser and does not restart it.
1360
// https://github.com/containers/podman/issues/18259
1361
var cannotStopErr error
1362
if c.ensureState(define.ContainerStateStopped, define.ContainerStateExited) {
1363
cannotStopErr = define.ErrCtrStopped
1364
} else if !c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopping) {
1365
cannotStopErr = fmt.Errorf("can only stop created or running containers. %s is in state %s: %w", c.ID(), c.state.State.String(), define.ErrCtrStateInvalid)
1368
c.state.StoppedByUser = true
1369
if cannotStopErr == nil {
1370
// Set the container state to "stopping" and unlock the container
1371
// before handing it over to conmon to unblock other commands. #8501
1372
// demonstrates nicely that a high stop timeout will block even simple
1373
// commands such as `podman ps` from progressing if the container lock
1374
// is held when busy-waiting for the container to be stopped.
1375
c.state.State = define.ContainerStateStopping
1377
if err := c.save(); err != nil {
1378
rErr := fmt.Errorf("saving container %s state before stopping: %w", c.ID(), err)
1379
if cannotStopErr == nil {
1382
// we return below with cannotStopErr
1385
if cannotStopErr != nil {
1386
return cannotStopErr
1392
stopErr := c.ociRuntime.StopContainer(c, timeout, all)
1396
if err := c.syncContainer(); err != nil {
1397
if errors.Is(err, define.ErrNoSuchCtr) || errors.Is(err, define.ErrCtrRemoved) {
1398
// If the container has already been removed (e.g., via
1399
// the cleanup process), set the container state to "stopped".
1400
c.state.State = define.ContainerStateStopped
1405
logrus.Errorf("Syncing container %s status: %v", c.ID(), err)
1412
// We have to check stopErr *after* we lock again - otherwise, we have a
1413
// change of panicking on a double-unlock. Ref: GH Issue 9615
1418
// Since we're now subject to a race condition with other processes who
1419
// may have altered the state (and other data), let's check if the
1420
// state has changed. If so, we should return immediately and leave
1421
// breadcrumbs for debugging if needed.
1422
if c.state.State != define.ContainerStateStopping {
1424
"Container %q state changed from %q to %q while waiting for it to be stopped: discontinuing stop procedure as another process interfered",
1425
c.ID(), define.ContainerStateStopping, c.state.State,
1430
c.newContainerEvent(events.Stop)
1431
return c.waitForConmonToExitAndSave()
1434
func (c *Container) waitForConmonToExitAndSave() error {
1435
conmonAlive, err := c.ociRuntime.CheckConmonRunning(c)
1440
if err := c.checkExitFile(); err != nil {
1447
if err := c.save(); err != nil {
1448
return fmt.Errorf("saving container %s state after stopping: %w", c.ID(), err)
1451
// Wait until we have an exit file, and sync once we do
1452
if err := c.waitForExitFileAndSync(); err != nil {
1459
// Internal, non-locking function to pause a container
1460
func (c *Container) pause() error {
1461
if c.config.NoCgroups {
1462
return fmt.Errorf("cannot pause without using Cgroups: %w", define.ErrNoCgroups)
1465
if rootless.IsRootless() {
1466
cgroupv2, err := cgroups.IsCgroup2UnifiedMode()
1468
return fmt.Errorf("failed to determine cgroupversion: %w", err)
1471
return fmt.Errorf("can not pause containers on rootless containers with cgroup V1: %w", define.ErrNoCgroups)
1475
if err := c.ociRuntime.PauseContainer(c); err != nil {
1476
// TODO when using docker-py there is some sort of race/incompatibility here
1480
logrus.Debugf("Paused container %s", c.ID())
1482
c.state.State = define.ContainerStatePaused
1487
// Internal, non-locking function to unpause a container
1488
func (c *Container) unpause() error {
1489
if c.config.NoCgroups {
1490
return fmt.Errorf("cannot unpause without using Cgroups: %w", define.ErrNoCgroups)
1493
if err := c.ociRuntime.UnpauseContainer(c); err != nil {
1494
// TODO when using docker-py there is some sort of race/incompatibility here
1498
logrus.Debugf("Unpaused container %s", c.ID())
1500
c.state.State = define.ContainerStateRunning
1505
// Internal, non-locking function to restart a container
1506
func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retErr error) {
1507
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStateStopped, define.ContainerStateExited) {
1508
return fmt.Errorf("unable to restart a container in a paused or unknown state: %w", define.ErrCtrStateInvalid)
1511
c.newContainerEvent(events.Restart)
1513
if c.state.State == define.ContainerStateRunning {
1514
conmonPID := c.state.ConmonPID
1515
if err := c.stop(timeout); err != nil {
1519
if c.config.HealthCheckConfig != nil {
1520
if err := c.removeTransientFiles(context.Background(), c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
1521
logrus.Error(err.Error())
1524
// Old versions of conmon have a bug where they create the exit file before
1525
// closing open file descriptors causing a race condition when restarting
1526
// containers with open ports since we cannot bind the ports as they're not
1527
// yet closed by conmon.
1529
// Killing the old conmon PID is ~okay since it forces the FDs of old conmons
1530
// to be closed, while it's a NOP for newer versions which should have
1533
// Ignore errors from FindProcess() as conmon could already have exited.
1534
p, err := os.FindProcess(conmonPID)
1535
if p != nil && err == nil {
1536
if err = p.Kill(); err != nil {
1537
logrus.Debugf("error killing conmon process: %v", err)
1541
// Ensure we tear down the container network so it will be
1542
// recreated - otherwise, behavior of restart differs from stop
1544
if err := c.cleanupNetwork(); err != nil {
1550
if err := c.cleanup(ctx); err != nil {
1551
logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
1555
if err := c.prepare(); err != nil {
1559
if c.state.State == define.ContainerStateStopped {
1560
// Reinitialize the container if we need to
1561
if err := c.reinit(ctx, false); err != nil {
1564
} else if c.state.State == define.ContainerStateConfigured ||
1565
c.state.State == define.ContainerStateExited {
1566
// Initialize the container
1567
if err := c.init(ctx, false); err != nil {
1574
// mountStorage sets up the container's root filesystem
1575
// It mounts the image and any other requested mounts
1576
// TODO: Add ability to override mount label so we can use this for Mount() too
1577
// TODO: Can we use this for export? Copying SHM into the export might not be
1579
func (c *Container) mountStorage() (_ string, deferredErr error) {
1581
// Container already mounted, nothing to do
1582
if c.state.Mounted {
1584
if c.ensureState(define.ContainerStateExited) {
1585
mounted, _ = mount.Mounted(c.state.Mountpoint)
1588
return c.state.Mountpoint, nil
1592
if !c.config.NoShm {
1593
mounted, err := mount.Mounted(c.config.ShmDir)
1595
return "", fmt.Errorf("unable to determine if %q is mounted: %w", c.config.ShmDir, err)
1598
if !mounted && !MountExists(c.config.Spec.Mounts, "/dev/shm") {
1599
shmOptions := fmt.Sprintf("mode=1777,size=%d", c.config.ShmSize)
1600
if err := c.mountSHM(shmOptions); err != nil {
1603
if err := idtools.SafeChown(c.config.ShmDir, c.RootUID(), c.RootGID()); err != nil {
1604
return "", fmt.Errorf("failed to chown %s: %w", c.config.ShmDir, err)
1607
if deferredErr != nil {
1608
if err := c.unmountSHM(c.config.ShmDir); err != nil {
1609
logrus.Errorf("Unmounting SHM for container %s after mount error: %v", c.ID(), err)
1616
// We need to mount the container before volumes - to ensure the copyup
1618
mountPoint := c.config.Rootfs
1620
if c.config.RootfsMapping != nil {
1621
uidMappings, gidMappings, err := parseIDMapMountOption(c.config.IDMappings, *c.config.RootfsMapping)
1626
pid, cleanupFunc, err := idmap.CreateUsernsProcess(util.RuntimeSpecToIDtools(uidMappings), util.RuntimeSpecToIDtools(gidMappings))
1632
if err := idmap.CreateIDMappedMount(c.config.Rootfs, c.config.Rootfs, pid); err != nil {
1633
return "", fmt.Errorf("failed to create idmapped mount: %w", err)
1636
if deferredErr != nil {
1637
if err := unix.Unmount(c.config.Rootfs, 0); err != nil {
1638
logrus.Errorf("Unmounting idmapped rootfs for container %s after mount error: %v", c.ID(), err)
1644
// Check if overlay has to be created on top of Rootfs
1645
if c.config.RootfsOverlay {
1646
overlayDest := c.runtime.GraphRoot()
1647
contentDir, err := overlay.GenerateStructure(overlayDest, c.ID(), "rootfs", c.RootUID(), c.RootGID())
1649
return "", fmt.Errorf("rootfs-overlay: failed to create TempDir in the %s directory: %w", overlayDest, err)
1651
overlayMount, err := overlay.Mount(contentDir, c.config.Rootfs, overlayDest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
1653
return "", fmt.Errorf("rootfs-overlay: creating overlay failed %q: %w", c.config.Rootfs, err)
1656
// Seems fuse-overlayfs is not present
1657
// fallback to native overlay
1658
if overlayMount.Type == "overlay" {
1659
overlayMount.Options = append(overlayMount.Options, "nodev")
1660
mountOpts := label.FormatMountLabel(strings.Join(overlayMount.Options, ","), c.MountLabel())
1661
err = mount.Mount("overlay", overlayMount.Source, overlayMount.Type, mountOpts)
1663
return "", fmt.Errorf("rootfs-overlay: creating overlay failed %q from native overlay: %w", c.config.Rootfs, err)
1667
mountPoint = overlayMount.Source
1668
execUser, err := lookup.GetUserGroupInfo(mountPoint, c.config.User, nil)
1672
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
1674
return "", fmt.Errorf("unable to get host UID and host GID: %w", err)
1677
//note: this should not be recursive, if using external rootfs users should be responsible on configuring ownership.
1678
if err := chown.ChangeHostPathOwnership(mountPoint, false, int(hostUID), int(hostGID)); err != nil {
1683
if mountPoint == "" {
1684
mountPoint, err = c.mount()
1689
if deferredErr != nil {
1690
if err := c.unmount(false); err != nil {
1691
logrus.Errorf("Unmounting container %s after mount error: %v", c.ID(), err)
1697
rootUID, rootGID := c.RootUID(), c.RootGID()
1699
dirfd, err := openDirectory(mountPoint)
1701
return "", fmt.Errorf("open mount point: %w", err)
1703
defer unix.Close(dirfd)
1705
err = unix.Mkdirat(dirfd, "etc", 0755)
1706
if err != nil && !os.IsExist(err) {
1707
return "", fmt.Errorf("create /etc: %w", err)
1709
// If the etc directory was created, chown it to root in the container
1710
if err == nil && (rootUID != 0 || rootGID != 0) {
1711
err = unix.Fchownat(dirfd, "etc", rootUID, rootGID, unix.AT_SYMLINK_NOFOLLOW)
1713
return "", fmt.Errorf("chown /etc: %w", err)
1717
etcInTheContainerPath, err := securejoin.SecureJoin(mountPoint, "etc")
1719
return "", fmt.Errorf("resolve /etc in the container: %w", err)
1722
etcInTheContainerFd, err := openDirectory(etcInTheContainerPath)
1724
return "", fmt.Errorf("open /etc in the container: %w", err)
1726
defer unix.Close(etcInTheContainerFd)
1728
if err := c.makePlatformMtabLink(etcInTheContainerFd, rootUID, rootGID); err != nil {
1733
localTimePath, err := timezone.ConfigureContainerTimeZone(tz, c.state.RunDir, mountPoint, etcInTheContainerPath, c.ID())
1735
return "", fmt.Errorf("configuring timezone for container %s: %w", c.ID(), err)
1737
if localTimePath != "" {
1738
if err := c.relabel(localTimePath, c.config.MountLabel, false); err != nil {
1741
if c.state.BindMounts == nil {
1742
c.state.BindMounts = make(map[string]string)
1744
c.state.BindMounts["/etc/localtime"] = localTimePath
1747
// Request a mount of all named volumes
1748
for _, v := range c.config.NamedVolumes {
1749
vol, err := c.mountNamedVolume(v, mountPoint)
1754
if deferredErr == nil {
1758
if err := vol.unmount(false); err != nil {
1759
logrus.Errorf("Unmounting volume %s after error mounting container %s: %v", vol.Name(), c.ID(), err)
1765
return mountPoint, nil
1768
// Mount a single named volume into the container.
1769
// If necessary, copy up image contents into the volume.
1770
// Does not verify that the name volume given is actually present in container
1772
// Returns the volume that was mounted.
1773
func (c *Container) mountNamedVolume(v *ContainerNamedVolume, mountpoint string) (*Volume, error) {
1774
logrus.Debugf("Going to mount named volume %s", v.Name)
1775
vol, err := c.runtime.state.Volume(v.Name)
1777
return nil, fmt.Errorf("retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
1780
if vol.config.LockID == c.config.LockID {
1781
return nil, fmt.Errorf("container %s and volume %s share lock ID %d: %w", c.ID(), vol.Name(), c.config.LockID, define.ErrWillDeadlock)
1784
defer vol.lock.Unlock()
1785
if vol.needsMount() {
1786
if err := vol.mount(); err != nil {
1787
return nil, fmt.Errorf("mounting volume %s for container %s: %w", vol.Name(), c.ID(), err)
1790
// The volume may need a copy-up. Check the state.
1791
if err := vol.update(); err != nil {
1794
_, hasNoCopy := vol.config.Options["nocopy"]
1795
if vol.state.NeedsCopyUp && !slices.Contains(v.Options, "nocopy") && !hasNoCopy {
1796
logrus.Debugf("Copying up contents from container %s to volume %s", c.ID(), vol.Name())
1798
srcDir, err := securejoin.SecureJoin(mountpoint, v.Dest)
1800
return nil, fmt.Errorf("calculating destination path to copy up container %s volume %s: %w", c.ID(), vol.Name(), err)
1802
// Do a manual stat on the source directory to verify existence.
1803
// Skip the rest if it exists.
1804
srcStat, err := os.Lstat(srcDir)
1806
if os.IsNotExist(err) {
1807
// Source does not exist, don't bother copying
1811
return nil, fmt.Errorf("identifying source directory for copy up into volume %s: %w", vol.Name(), err)
1813
// If it's not a directory we're mounting over it.
1814
if !srcStat.IsDir() {
1817
// Read contents, do not bother continuing if it's empty. Fixes
1818
// a bizarre issue where something copier.Get will ENOENT on
1819
// empty directories and sometimes it will not.
1821
srcContents, err := os.ReadDir(srcDir)
1823
return nil, fmt.Errorf("reading contents of source directory for copy up into volume %s: %w", vol.Name(), err)
1825
if len(srcContents) == 0 {
1829
// If the volume is not empty, we should not copy up.
1830
volMount := vol.mountPoint()
1831
contents, err := os.ReadDir(volMount)
1833
return nil, fmt.Errorf("listing contents of volume %s mountpoint when copying up from container %s: %w", vol.Name(), c.ID(), err)
1835
if len(contents) > 0 {
1836
// The volume is not empty. It was likely modified
1837
// outside of Podman. For safety, let's not copy up into
1838
// it. Fixes CVE-2020-1726.
1842
// Set NeedsCopyUp to false since we are about to do first copy
1843
// Do not copy second time.
1844
vol.state.NeedsCopyUp = false
1845
if err := vol.save(); err != nil {
1849
// Buildah Copier accepts a reader, so we'll need a pipe.
1850
reader, writer := io.Pipe()
1851
defer reader.Close()
1853
errChan := make(chan error, 1)
1855
logrus.Infof("About to copy up into volume %s", vol.Name())
1857
// Copy, container side: get a tar archive of what needs to be
1858
// streamed into the volume.
1860
defer writer.Close()
1861
getOptions := copier.GetOptions{
1862
KeepDirectoryNames: false,
1864
errChan <- copier.Get(srcDir, "", getOptions, []string{"/."}, writer)
1867
// Copy, volume side: stream what we've written to the pipe, into
1869
copyOpts := copier.PutOptions{}
1870
if err := copier.Put(volMount, "", copyOpts, reader); err != nil {
1873
logrus.Errorf("Streaming contents of container %s directory for volume copy-up: %v", c.ID(), err2)
1875
return nil, fmt.Errorf("copying up to volume %s: %w", vol.Name(), err)
1878
if err := <-errChan; err != nil {
1879
return nil, fmt.Errorf("streaming container content for copy up into volume %s: %w", vol.Name(), err)
1885
// cleanupStorage unmounts and cleans up the container's root filesystem
1886
func (c *Container) cleanupStorage() error {
1887
if !c.state.Mounted {
1888
// Already unmounted, do nothing
1889
logrus.Debugf("Container %s storage is already unmounted, skipping...", c.ID())
1893
var cleanupErr error
1894
reportErrorf := func(msg string, args ...any) {
1895
err := fmt.Errorf(msg, args...) // Always use fmt.Errorf instead of just logrus.Errorf(…) because the format string probably contains %w
1896
if cleanupErr == nil {
1899
logrus.Errorf("%s", err.Error())
1903
markUnmounted := func() {
1904
c.state.Mountpoint = ""
1905
c.state.Mounted = false
1908
if err := c.save(); err != nil {
1909
reportErrorf("unmounting container %s: %w", c.ID(), err)
1914
// umount rootfs overlay if it was created
1915
if c.config.RootfsOverlay {
1916
overlayBasePath := filepath.Dir(c.state.Mountpoint)
1917
if err := overlay.Unmount(overlayBasePath); err != nil {
1918
reportErrorf("failed to clean up overlay mounts for %s: %w", c.ID(), err)
1921
if c.config.RootfsMapping != nil {
1922
if err := unix.Unmount(c.config.Rootfs, 0); err != nil && err != unix.EINVAL {
1923
reportErrorf("unmounting idmapped rootfs for container %s after mount error: %w", c.ID(), err)
1927
for _, containerMount := range c.config.Mounts {
1928
if err := c.unmountSHM(containerMount); err != nil {
1929
reportErrorf("unmounting container %s: %w", c.ID(), err)
1933
if err := c.cleanupOverlayMounts(); err != nil {
1934
// If the container can't remove content report the error
1935
reportErrorf("failed to clean up overlay mounts for %s: %w", c.ID(), err)
1938
if c.config.Rootfs != "" {
1943
if err := c.unmount(false); err != nil {
1944
// If the container has already been removed, warn but don't
1946
// We still want to be able to kick the container out of the
1949
case errors.Is(err, storage.ErrLayerNotMounted):
1950
logrus.Infof("Storage for container %s is not mounted: %v", c.ID(), err)
1951
case errors.Is(err, storage.ErrNotAContainer) || errors.Is(err, storage.ErrContainerUnknown):
1952
logrus.Warnf("Storage for container %s has been removed: %v", c.ID(), err)
1954
reportErrorf("cleaning up container %s storage: %w", c.ID(), err)
1958
// Request an unmount of all named volumes
1959
for _, v := range c.config.NamedVolumes {
1960
vol, err := c.runtime.state.Volume(v.Name)
1962
reportErrorf("retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
1964
// We need to try and unmount every volume, so continue
1969
if vol.needsMount() {
1971
if err := vol.unmount(false); err != nil {
1972
reportErrorf("unmounting volume %s for container %s: %w", vol.Name(), c.ID(), err)
1982
// Unmount the container and free its resources
1983
func (c *Container) cleanup(ctx context.Context) error {
1986
logrus.Debugf("Cleaning up container %s", c.ID())
1988
// Remove healthcheck unit/timer file if it execs
1989
if c.config.HealthCheckConfig != nil {
1990
if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
1991
logrus.Errorf("Removing timer for container %s healthcheck: %v", c.ID(), err)
1995
// Clean up network namespace, if present
1996
if err := c.cleanupNetwork(); err != nil {
1997
lastError = fmt.Errorf("removing container %s network: %w", c.ID(), err)
2000
// cleanup host entry if it is shared
2001
if c.config.NetNsCtr != "" {
2002
if hoststFile, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
2003
if err := fileutils.Exists(hoststFile); err == nil {
2004
// we cannot use the dependency container lock due ABBA deadlocks
2005
if lock, err := lockfile.GetLockFile(hoststFile); err == nil {
2007
// make sure to ignore ENOENT error in case the netns container was cleaned up before this one
2008
if err := etchosts.Remove(hoststFile, getLocalhostHostEntry(c)); err != nil && !errors.Is(err, os.ErrNotExist) {
2009
// this error is not fatal we still want to do proper cleanup
2010
logrus.Errorf("failed to remove hosts entry from the netns containers /etc/hosts: %v", err)
2018
// Remove the container from the runtime, if necessary.
2019
// Do this *before* unmounting storage - some runtimes (e.g. Kata)
2020
// apparently object to having storage removed while the container still
2022
if err := c.cleanupRuntime(ctx); err != nil {
2023
if lastError != nil {
2024
logrus.Errorf("Removing container %s from OCI runtime: %v", c.ID(), err)
2031
if err := c.cleanupStorage(); err != nil {
2032
if lastError != nil {
2033
logrus.Errorf("Unmounting container %s storage: %v", c.ID(), err)
2035
lastError = fmt.Errorf("unmounting container %s storage: %w", c.ID(), err)
2039
// Unmount image volumes
2040
for _, v := range c.config.ImageVolumes {
2041
img, _, err := c.runtime.LibimageRuntime().LookupImage(v.Source, nil)
2043
if lastError == nil {
2047
logrus.Errorf("Unmounting image volume %q:%q :%v", v.Source, v.Dest, err)
2049
if err := img.Unmount(false); err != nil {
2050
if lastError == nil {
2054
logrus.Errorf("Unmounting image volume %q:%q :%v", v.Source, v.Dest, err)
2058
if err := c.stopPodIfNeeded(context.Background()); err != nil {
2059
if lastError == nil {
2062
logrus.Errorf("Stopping pod of container %s: %v", c.ID(), err)
2066
// Prune the exit codes of other container during clean up.
2067
// Since Podman is no daemon, we have to clean them up somewhere.
2068
// Cleanup seems like a good place as it's not performance
2070
if err := c.runtime.state.PruneContainerExitCodes(); err != nil {
2071
if lastError == nil {
2074
logrus.Errorf("Pruning container exit codes: %v", err)
2081
// If the container is part of a pod where only the infra container remains
2082
// running, attempt to stop the pod.
2083
func (c *Container) stopPodIfNeeded(ctx context.Context) error {
2084
if c.config.Pod == "" {
2088
pod, err := c.runtime.state.Pod(c.config.Pod)
2090
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
2093
switch pod.config.ExitPolicy {
2094
case config.PodExitPolicyContinue:
2097
case config.PodExitPolicyStop:
2098
// Use the runtime's work queue to stop the pod. This resolves
2099
// a number of scenarios where we'd otherwise run into
2100
// deadlocks. For instance, during `pod stop`, the pod has
2101
// already been locked.
2102
// The work queue is a simple means without having to worry about
2103
// future changes that may introduce more deadlock scenarios.
2104
c.runtime.queueWork(func() {
2105
if err := pod.stopIfOnlyInfraRemains(ctx, c.ID()); err != nil {
2106
if !errors.Is(err, define.ErrNoSuchPod) {
2107
logrus.Errorf("Checking if infra needs to be stopped: %v", err)
2115
// delete deletes the container and runs any configured poststop
2117
func (c *Container) delete(ctx context.Context) error {
2118
if err := c.ociRuntime.DeleteContainer(c); err != nil {
2119
return fmt.Errorf("removing container %s from runtime: %w", c.ID(), err)
2122
if err := c.postDeleteHooks(ctx); err != nil {
2123
return fmt.Errorf("container %s poststop hooks: %w", c.ID(), err)
2129
// postDeleteHooks runs the poststop hooks (if any) as specified by
2130
// the OCI Runtime Specification (which requires them to run
2131
// post-delete, despite the stage name).
2132
func (c *Container) postDeleteHooks(ctx context.Context) error {
2133
if c.state.ExtensionStageHooks != nil {
2134
extensionHooks, ok := c.state.ExtensionStageHooks["poststop"]
2136
state, err := json.Marshal(spec.State{
2137
Version: spec.Version,
2140
Bundle: c.bundlePath(),
2141
Annotations: c.config.Spec.Annotations,
2146
for i, hook := range extensionHooks {
2148
logrus.Debugf("container %s: invoke poststop hook %d, path %s", c.ID(), i, hook.Path)
2149
var stderr, stdout bytes.Buffer
2150
hookErr, err := exec.RunWithOptions(
2154
Dir: c.bundlePath(),
2158
PostKillTimeout: exec.DefaultPostKillTimeout,
2162
logrus.Warnf("Container %s: poststop hook %d: %v", c.ID(), i, err)
2164
logrus.Debugf("container %s: poststop hook %d (hook error): %v", c.ID(), i, hookErr)
2166
stdoutString := stdout.String()
2167
if stdoutString != "" {
2168
logrus.Debugf("container %s: poststop hook %d: stdout:\n%s", c.ID(), i, stdoutString)
2170
stderrString := stderr.String()
2171
if stderrString != "" {
2172
logrus.Debugf("container %s: poststop hook %d: stderr:\n%s", c.ID(), i, stderrString)
2182
// writeStringToRundir writes the given string to a file with the given name in
2183
// the container's temporary files directory. The file will be chown'd to the
2184
// container's root user and have an appropriate SELinux label set.
2185
// If a file with the same name already exists, it will be deleted and recreated
2186
// with the new contents.
2187
// Returns the full path to the new file.
2188
func (c *Container) writeStringToRundir(destFile, contents string) (string, error) {
2189
destFileName := filepath.Join(c.state.RunDir, destFile)
2191
if err := os.Remove(destFileName); err != nil && !os.IsNotExist(err) {
2192
return "", fmt.Errorf("removing %s for container %s: %w", destFile, c.ID(), err)
2195
if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil {
2199
return destFileName, nil
2202
// writeStringToStaticDir writes the given string to a file with the given name
2203
// in the container's permanent files directory. The file will be chown'd to the
2204
// container's root user and have an appropriate SELinux label set.
2205
// Unlike writeStringToRundir, will *not* delete and re-create if the file
2206
// already exists (will instead error).
2207
// Returns the full path to the new file.
2208
func (c *Container) writeStringToStaticDir(filename, contents string) (string, error) {
2209
destFileName := filepath.Join(c.config.StaticDir, filename)
2211
if err := writeStringToPath(destFileName, contents, c.config.MountLabel, c.RootUID(), c.RootGID()); err != nil {
2215
return destFileName, nil
2218
// saveSpec saves the OCI spec to disk, replacing any existing specs for the container
2219
func (c *Container) saveSpec(spec *spec.Spec) error {
2220
// If the OCI spec already exists, we need to replace it
2221
// Cannot guarantee some things, e.g. network namespaces, have the same
2223
jsonPath := filepath.Join(c.bundlePath(), "config.json")
2224
if err := fileutils.Exists(jsonPath); err != nil {
2225
if !os.IsNotExist(err) {
2226
return fmt.Errorf("doing stat on container %s spec: %w", c.ID(), err)
2228
// The spec does not exist, we're fine
2230
// The spec exists, need to remove it
2231
if err := os.Remove(jsonPath); err != nil {
2232
return fmt.Errorf("replacing runtime spec for container %s: %w", c.ID(), err)
2236
fileJSON, err := json.Marshal(spec)
2238
return fmt.Errorf("exporting runtime spec for container %s to JSON: %w", c.ID(), err)
2240
if err := os.WriteFile(jsonPath, fileJSON, 0644); err != nil {
2241
return fmt.Errorf("writing runtime spec JSON for container %s to disk: %w", c.ID(), err)
2244
logrus.Debugf("Created OCI spec for container %s at %s", c.ID(), jsonPath)
2246
c.state.ConfigPath = jsonPath
2251
// Warning: precreate hooks may alter 'config' in place.
2252
func (c *Container) setupOCIHooks(ctx context.Context, config *spec.Spec) (map[string][]spec.Hook, error) {
2253
allHooks := make(map[string][]spec.Hook)
2254
if len(c.runtime.config.Engine.HooksDir.Get()) == 0 {
2255
if rootless.IsRootless() {
2258
for _, hDir := range []string{hooks.DefaultDir, hooks.OverrideDir} {
2259
manager, err := hooks.New(ctx, []string{hDir}, []string{"precreate", "poststop"})
2261
if os.IsNotExist(err) {
2266
ociHooks, err := manager.Hooks(config, c.config.Spec.Annotations, len(c.config.UserVolumes) > 0)
2270
if len(ociHooks) > 0 || config.Hooks != nil {
2271
logrus.Warnf("Implicit hook directories are deprecated; set --ociHooks-dir=%q explicitly to continue to load ociHooks from this directory", hDir)
2273
for i, hook := range ociHooks {
2278
manager, err := hooks.New(ctx, c.runtime.config.Engine.HooksDir.Get(), []string{"precreate", "poststop"})
2283
allHooks, err = manager.Hooks(config, c.config.Spec.Annotations, len(c.config.UserVolumes) > 0)
2289
hookErr, err := exec.RuntimeConfigFilterWithOptions(
2291
exec.RuntimeConfigFilterOptions{
2292
Hooks: allHooks["precreate"],
2293
Dir: c.bundlePath(),
2295
PostKillTimeout: exec.DefaultPostKillTimeout,
2299
logrus.Warnf("Container %s: precreate hook: %v", c.ID(), err)
2300
if hookErr != nil && hookErr != err {
2301
logrus.Debugf("container %s: precreate hook (hook error): %v", c.ID(), hookErr)
2306
return allHooks, nil
2309
// mount mounts the container's root filesystem
2310
func (c *Container) mount() (string, error) {
2311
if c.state.State == define.ContainerStateRemoving {
2312
return "", fmt.Errorf("cannot mount container %s as it is being removed: %w", c.ID(), define.ErrCtrStateInvalid)
2315
mountPoint, err := c.runtime.storageService.MountContainerImage(c.ID())
2317
return "", fmt.Errorf("mounting storage for container %s: %w", c.ID(), err)
2319
mountPoint, err = filepath.EvalSymlinks(mountPoint)
2321
return "", fmt.Errorf("resolving storage path for container %s: %w", c.ID(), err)
2323
if err := idtools.SafeChown(mountPoint, c.RootUID(), c.RootGID()); err != nil {
2324
return "", fmt.Errorf("cannot chown %s to %d:%d: %w", mountPoint, c.RootUID(), c.RootGID(), err)
2326
return mountPoint, nil
2329
// unmount unmounts the container's root filesystem
2330
func (c *Container) unmount(force bool) error {
2331
// Also unmount storage
2332
if _, err := c.runtime.storageService.UnmountContainerImage(c.ID(), force); err != nil {
2333
return fmt.Errorf("unmounting container %s root filesystem: %w", c.ID(), err)
2339
// checkReadyForRemoval checks whether the given container is ready to be
2341
// These checks are only used if force-remove is not specified.
2342
// If it is, we'll remove the container anyways.
2343
// Returns nil if safe to remove, or an error describing why it's unsafe if not.
2344
func (c *Container) checkReadyForRemoval() error {
2345
if c.state.State == define.ContainerStateUnknown {
2346
return fmt.Errorf("container %s is in invalid state: %w", c.ID(), define.ErrCtrStateInvalid)
2349
if c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) && !c.IsInfra() {
2350
return fmt.Errorf("cannot remove container %s as it is %s - running or paused containers cannot be removed without force: %w", c.ID(), c.state.State.String(), define.ErrCtrStateInvalid)
2353
// Check exec sessions
2354
sessions, err := c.getActiveExecSessions()
2358
if len(sessions) != 0 {
2359
return fmt.Errorf("cannot remove container %s as it has active exec sessions: %w", c.ID(), define.ErrCtrStateInvalid)
2365
// canWithPrevious return the stat of the preCheckPoint dir
2366
func (c *Container) canWithPrevious() error {
2367
return fileutils.Exists(c.PreCheckPointPath())
2370
// prepareCheckpointExport writes the config and spec to
2371
// JSON files for later export
2372
func (c *Container) prepareCheckpointExport() error {
2373
networks, err := c.networks()
2377
// make sure to exclude the short ID alias since the container gets a new ID on restore
2378
for net, opts := range networks {
2379
newAliases := make([]string, 0, len(opts.Aliases))
2380
for _, alias := range opts.Aliases {
2381
if alias != c.config.ID[:12] {
2382
newAliases = append(newAliases, alias)
2385
opts.Aliases = newAliases
2386
networks[net] = opts
2389
// add the networks from the db to the config so that the exported checkpoint still stores all current networks
2390
c.config.Networks = networks
2392
if _, err := metadata.WriteJSONFile(c.config, c.bundlePath(), metadata.ConfigDumpFile); err != nil {
2397
jsonPath := filepath.Join(c.bundlePath(), "config.json")
2398
g, err := generate.NewFromFile(jsonPath)
2400
logrus.Debugf("generating spec for container %q failed with %v", c.ID(), err)
2403
if _, err := metadata.WriteJSONFile(g.Config, c.bundlePath(), metadata.SpecDumpFile); err != nil {
2410
// SortUserVolumes sorts the volumes specified for a container
2411
// between named and normal volumes
2412
func (c *Container) SortUserVolumes(ctrSpec *spec.Spec) ([]*ContainerNamedVolume, []spec.Mount) {
2413
namedUserVolumes := []*ContainerNamedVolume{}
2414
userMounts := []spec.Mount{}
2416
// We need to parse all named volumes and mounts into maps, so we don't
2417
// end up with repeated lookups for each user volume.
2418
// Map destination to struct, as destination is what is stored in
2420
namedVolumes := make(map[string]*ContainerNamedVolume)
2421
mounts := make(map[string]spec.Mount)
2422
for _, namedVol := range c.config.NamedVolumes {
2423
namedVolumes[namedVol.Dest] = namedVol
2425
for _, mount := range ctrSpec.Mounts {
2426
mounts[mount.Destination] = mount
2429
for _, vol := range c.config.UserVolumes {
2430
if volume, ok := namedVolumes[vol]; ok {
2431
namedUserVolumes = append(namedUserVolumes, volume)
2432
} else if mount, ok := mounts[vol]; ok {
2433
userMounts = append(userMounts, mount)
2435
logrus.Warnf("Could not find mount at destination %q when parsing user volumes for container %s", vol, c.ID())
2438
return namedUserVolumes, userMounts
2441
// Check for an exit file, and handle one if present
2442
func (c *Container) checkExitFile() error {
2443
// If the container's not running, nothing to do.
2444
if !c.ensureState(define.ContainerStateRunning, define.ContainerStatePaused, define.ContainerStateStopping) {
2448
exitFile, err := c.exitFilePath()
2453
// Check for the exit file
2454
info, err := os.Stat(exitFile)
2456
if os.IsNotExist(err) {
2457
// Container is still running, no error
2461
return fmt.Errorf("running stat on container %s exit file: %w", c.ID(), err)
2464
// Alright, it exists. Transition to Stopped state.
2465
c.state.State = define.ContainerStateStopped
2467
c.state.ConmonPID = 0
2469
// Read the exit file to get our stopped time and exit code.
2470
return c.handleExitFile(exitFile, info)
2473
func (c *Container) hasNamespace(namespace spec.LinuxNamespaceType) bool {
2474
if c.config.Spec == nil || c.config.Spec.Linux == nil {
2477
for _, n := range c.config.Spec.Linux.Namespaces {
2478
if n.Type == namespace {
2485
// extractSecretToCtrStorage copies a secret's data from the secrets manager to the container's static dir
2486
func (c *Container) extractSecretToCtrStorage(secr *ContainerSecret) error {
2487
manager, err := c.runtime.SecretsManager()
2491
_, data, err := manager.LookupSecretData(secr.Name)
2495
secretFile := filepath.Join(c.config.SecretsPath, secr.Name)
2497
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), secr.UID, secr.GID)
2499
return fmt.Errorf("unable to extract secret: %w", err)
2501
err = os.WriteFile(secretFile, data, 0644)
2503
return fmt.Errorf("unable to create %s: %w", secretFile, err)
2505
if err := idtools.SafeLchown(secretFile, int(hostUID), int(hostGID)); err != nil {
2508
if err := os.Chmod(secretFile, os.FileMode(secr.Mode)); err != nil {
2511
if err := c.relabel(secretFile, c.config.MountLabel, false); err != nil {
2517
// Update a container's resources or restart policy after creation.
2518
// At least one of resources or restartPolicy must not be nil.
2519
func (c *Container) update(resources *spec.LinuxResources, restartPolicy *string, restartRetries *uint) error {
2520
if resources == nil && restartPolicy == nil {
2521
return fmt.Errorf("must provide at least one of resources and restartPolicy to update a container: %w", define.ErrInvalidArg)
2523
if restartRetries != nil && restartPolicy == nil {
2524
return fmt.Errorf("must provide restart policy if updating restart retries: %w", define.ErrInvalidArg)
2527
oldResources := c.config.Spec.Linux.Resources
2528
oldRestart := c.config.RestartPolicy
2529
oldRetries := c.config.RestartRetries
2531
if restartPolicy != nil {
2532
if err := define.ValidateRestartPolicy(*restartPolicy); err != nil {
2536
if restartRetries != nil {
2537
if *restartPolicy != define.RestartPolicyOnFailure {
2538
return fmt.Errorf("cannot set restart policy retries unless policy is on-failure: %w", define.ErrInvalidArg)
2542
c.config.RestartPolicy = *restartPolicy
2543
if restartRetries != nil {
2544
c.config.RestartRetries = *restartRetries
2546
c.config.RestartRetries = 0
2550
if resources != nil {
2551
if c.config.Spec.Linux == nil {
2552
c.config.Spec.Linux = new(spec.Linux)
2554
c.config.Spec.Linux.Resources = resources
2557
if err := c.runtime.state.SafeRewriteContainerConfig(c, "", "", c.config); err != nil {
2558
// Assume DB write failed, revert to old resources block
2559
c.config.Spec.Linux.Resources = oldResources
2560
c.config.RestartPolicy = oldRestart
2561
c.config.RestartRetries = oldRetries
2565
if c.ensureState(define.ContainerStateCreated, define.ContainerStateRunning, define.ContainerStatePaused) && resources != nil {
2566
// So `podman inspect` on running containers sources its OCI spec from disk.
2567
// To keep inspect accurate we need to update the on-disk OCI spec.
2568
onDiskSpec, err := c.specFromState()
2570
return fmt.Errorf("retrieving on-disk OCI spec to update: %w", err)
2572
if onDiskSpec.Linux == nil {
2573
onDiskSpec.Linux = new(spec.Linux)
2575
onDiskSpec.Linux.Resources = resources
2576
if err := c.saveSpec(onDiskSpec); err != nil {
2577
logrus.Errorf("Unable to update container %s OCI spec - `podman inspect` may not be accurate until container is restarted: %v", c.ID(), err)
2580
if err := c.ociRuntime.UpdateContainer(c, resources); err != nil {
2585
logrus.Debugf("updated container %s", c.ID())
2587
c.newContainerEvent(events.Update)