1
//go:build !remote && (linux || freebsd)
23
metadata "github.com/checkpoint-restore/checkpointctl/lib"
24
"github.com/checkpoint-restore/go-criu/v7/stats"
25
"github.com/containers/buildah"
26
"github.com/containers/buildah/pkg/chrootuser"
27
"github.com/containers/buildah/pkg/overlay"
28
butil "github.com/containers/buildah/util"
29
"github.com/containers/common/libnetwork/etchosts"
30
"github.com/containers/common/libnetwork/resolvconf"
31
"github.com/containers/common/libnetwork/types"
32
"github.com/containers/common/pkg/apparmor"
33
"github.com/containers/common/pkg/chown"
34
"github.com/containers/common/pkg/config"
35
"github.com/containers/common/pkg/subscriptions"
36
"github.com/containers/common/pkg/umask"
37
is "github.com/containers/image/v5/storage"
38
"github.com/containers/podman/v5/libpod/define"
39
"github.com/containers/podman/v5/libpod/events"
40
"github.com/containers/podman/v5/pkg/annotations"
41
"github.com/containers/podman/v5/pkg/checkpoint/crutils"
42
"github.com/containers/podman/v5/pkg/criu"
43
"github.com/containers/podman/v5/pkg/lookup"
44
"github.com/containers/podman/v5/pkg/rootless"
45
"github.com/containers/podman/v5/pkg/util"
46
"github.com/containers/podman/v5/version"
47
"github.com/containers/storage/pkg/archive"
48
"github.com/containers/storage/pkg/fileutils"
49
"github.com/containers/storage/pkg/idtools"
50
"github.com/containers/storage/pkg/lockfile"
51
"github.com/containers/storage/pkg/unshare"
52
stypes "github.com/containers/storage/types"
53
securejoin "github.com/cyphar/filepath-securejoin"
54
runcuser "github.com/moby/sys/user"
55
spec "github.com/opencontainers/runtime-spec/specs-go"
56
"github.com/opencontainers/runtime-tools/generate"
57
"github.com/opencontainers/selinux/go-selinux"
58
"github.com/opencontainers/selinux/go-selinux/label"
59
"github.com/sirupsen/logrus"
60
"golang.org/x/exp/slices"
61
"golang.org/x/sys/unix"
62
cdi "tags.cncf.io/container-device-interface/pkg/cdi"
65
func parseOptionIDs(ctrMappings []idtools.IDMap, option string) ([]idtools.IDMap, error) {
66
ranges := strings.Split(option, "#")
67
ret := make([]idtools.IDMap, len(ranges))
68
for i, m := range ranges {
72
return nil, fmt.Errorf("invalid empty range for %q", option)
80
_, err := fmt.Sscanf(m, "%d-%d-%d", &v.ContainerID, &v.HostID, &v.Size)
84
if v.ContainerID < 0 || v.HostID < 0 || v.Size < 1 {
85
return nil, fmt.Errorf("invalid value for %q", option)
90
for _, m := range ctrMappings {
91
if v.HostID >= m.ContainerID && v.HostID < m.ContainerID+m.Size {
92
v.HostID += m.HostID - m.ContainerID
98
return nil, fmt.Errorf("could not find a user namespace mapping for the relative mapping %q", option)
106
func parseIDMapMountOption(idMappings stypes.IDMappingOptions, option string) ([]spec.LinuxIDMapping, []spec.LinuxIDMapping, error) {
107
uidMap := idMappings.UIDMap
108
gidMap := idMappings.GIDMap
109
if strings.HasPrefix(option, "idmap=") {
111
options := strings.Split(strings.SplitN(option, "=", 2)[1], ";")
112
for _, i := range options {
114
case strings.HasPrefix(i, "uids="):
115
uidMap, err = parseOptionIDs(idMappings.UIDMap, strings.Replace(i, "uids=", "", 1))
119
case strings.HasPrefix(i, "gids="):
120
gidMap, err = parseOptionIDs(idMappings.GIDMap, strings.Replace(i, "gids=", "", 1))
125
return nil, nil, fmt.Errorf("unknown option %q", i)
130
uidMappings := make([]spec.LinuxIDMapping, len(uidMap))
131
gidMappings := make([]spec.LinuxIDMapping, len(gidMap))
132
for i, uidmap := range uidMap {
133
uidMappings[i] = spec.LinuxIDMapping{
134
HostID: uint32(uidmap.HostID),
135
ContainerID: uint32(uidmap.ContainerID),
136
Size: uint32(uidmap.Size),
139
for i, gidmap := range gidMap {
140
gidMappings[i] = spec.LinuxIDMapping{
141
HostID: uint32(gidmap.HostID),
142
ContainerID: uint32(gidmap.ContainerID),
143
Size: uint32(gidmap.Size),
146
return uidMappings, gidMappings, nil
149
// Internal only function which returns upper and work dir from
151
func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
154
for _, o := range options {
155
if strings.HasPrefix(o, "upperdir") {
156
splitOpt := strings.SplitN(o, "=", 2)
157
if len(splitOpt) > 1 {
158
upperDir = splitOpt[1]
160
return "", "", errors.New("cannot accept empty value for upperdir")
164
if strings.HasPrefix(o, "workdir") {
165
splitOpt := strings.SplitN(o, "=", 2)
166
if len(splitOpt) > 1 {
167
workDir = splitOpt[1]
169
return "", "", errors.New("cannot accept empty value for workdir")
174
if (upperDir != "" && workDir == "") || (upperDir == "" && workDir != "") {
175
return "", "", errors.New("must specify both upperdir and workdir")
177
return upperDir, workDir, nil
180
// Generate spec for a container
181
// Accepts a map of the container's dependencies
182
func (c *Container) generateSpec(ctx context.Context) (s *spec.Spec, cleanupFuncRet func(), err error) {
183
var safeMounts []*safeMountInfo
184
// lock the thread so that the current thread will be kept alive until the mounts are used
185
runtime.LockOSThread()
186
cleanupFunc := func() {
187
runtime.UnlockOSThread()
188
for _, s := range safeMounts {
197
overrides := c.getUserOverrides()
198
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
200
if slices.Contains(c.config.HostUsers, c.config.User) {
201
execUser, err = lookupHostUser(c.config.User)
208
// NewFromSpec() is deprecated according to its comment
209
// however the recommended replace just causes a nil map panic
210
g := generate.NewFromSpec(c.config.Spec)
212
// If the flag to mount all devices is set for a privileged container, add
213
// all the devices from the host's machine into the container
214
if c.config.MountAllDevices {
216
if c.config.Systemd != nil {
217
systemdMode = *c.config.Systemd
219
if err := util.AddPrivilegedDevices(&g, systemdMode); err != nil {
224
// If network namespace was requested, add it now
225
if err := c.addNetworkNamespace(&g); err != nil {
229
// Apply AppArmor checks and load the default profile if needed.
230
if len(c.config.Spec.Process.ApparmorProfile) > 0 {
231
updatedProfile, err := apparmor.CheckProfileAndLoadDefault(c.config.Spec.Process.ApparmorProfile)
235
g.SetProcessApparmorProfile(updatedProfile)
238
if err := c.makeBindMounts(); err != nil {
242
if err := c.mountNotifySocket(g); err != nil {
246
// Get host UID and GID based on the container process UID and GID.
247
hostUID, hostGID, err := butil.GetHostIDs(util.IDtoolsToRuntimeSpec(c.config.IDMappings.UIDMap), util.IDtoolsToRuntimeSpec(c.config.IDMappings.GIDMap), uint32(execUser.Uid), uint32(execUser.Gid))
253
for _, namedVol := range c.config.NamedVolumes {
254
volume, err := c.runtime.GetVolume(namedVol.Name)
256
return nil, nil, fmt.Errorf("retrieving volume %s to add to container %s: %w", namedVol.Name, c.ID(), err)
258
mountPoint, err := volume.MountPoint()
263
if len(namedVol.SubPath) > 0 {
264
safeMount, err := c.safeMountSubPath(mountPoint, namedVol.SubPath)
268
safeMounts = append(safeMounts, safeMount)
270
mountPoint = safeMount.mountPoint
276
for _, o := range namedVol.Options {
279
upperDir, workDir, err = getOverlayUpperAndWorkDir(namedVol.Options)
287
var overlayMount spec.Mount
288
var overlayOpts *overlay.Options
289
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
294
overlayOpts = &overlay.Options{RootUID: c.RootUID(),
295
RootGID: c.RootGID(),
296
UpperDirOptionFragment: upperDir,
297
WorkDirOptionFragment: workDir,
298
GraphOpts: c.runtime.store.GraphOptions(),
301
overlayMount, err = overlay.MountWithOptions(contentDir, mountPoint, namedVol.Dest, overlayOpts)
303
return nil, nil, fmt.Errorf("mounting overlay failed %q: %w", mountPoint, err)
306
for _, o := range namedVol.Options {
308
if err := c.ChangeHostPathOwnership(mountPoint, true, int(hostUID), int(hostGID)); err != nil {
312
if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
317
g.AddMount(overlayMount)
319
volMount := spec.Mount{
320
Type: define.TypeBind,
322
Destination: namedVol.Dest,
323
Options: namedVol.Options,
329
// Check if the spec file mounts contain the options z, Z, U or idmap.
330
// If they have z or Z, relabel the source directory and then remove the option.
331
// If they have U, chown the source directory and then remove the option.
332
// If they have idmap, then calculate the mappings to use in the OCI config file.
333
for i := range g.Config.Mounts {
334
m := &g.Config.Mounts[i]
336
for _, o := range m.Options {
337
if strings.HasPrefix(o, "subpath=") {
338
subpath := strings.Split(o, "=")[1]
339
safeMount, err := c.safeMountSubPath(m.Source, subpath)
343
safeMounts = append(safeMounts, safeMount)
344
m.Source = safeMount.mountPoint
347
if o == "idmap" || strings.HasPrefix(o, "idmap=") {
349
m.UIDMappings, m.GIDMappings, err = parseIDMapMountOption(c.config.IDMappings, o)
357
if m.Type == define.TypeTmpfs {
358
options = append(options, []string{fmt.Sprintf("uid=%d", execUser.Uid), fmt.Sprintf("gid=%d", execUser.Gid)}...)
360
// only chown on initial creation of container
361
if err := c.ChangeHostPathOwnership(m.Source, true, int(hostUID), int(hostGID)); err != nil {
368
if err := c.relabel(m.Source, c.MountLabel(), label.IsShared(o)); err != nil {
371
case "no-dereference":
372
// crun calls the option `copy-symlink`.
373
// Podman decided for --no-dereference as many
374
// bin-utils tools (e..g, touch, chown, cp) do.
375
options = append(options, "copy-symlink")
377
options = append(options, o)
383
c.setProcessLabel(&g)
386
// Add bind mounts to container
387
for dstPath, srcPath := range c.state.BindMounts {
388
newMount := spec.Mount{
389
Type: define.TypeBind,
391
Destination: dstPath,
392
Options: bindOptions,
394
if c.IsReadOnly() && (dstPath != "/dev/shm" || !c.config.ReadWriteTmpfs) {
395
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
397
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
398
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
400
if !MountExists(g.Mounts(), dstPath) {
403
logrus.Infof("User mount overriding libpod mount at %q", dstPath)
407
// Add overlay volumes
408
for _, overlayVol := range c.config.OverlayVolumes {
409
upperDir, workDir, err := getOverlayUpperAndWorkDir(overlayVol.Options)
413
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
417
overlayOpts := &overlay.Options{RootUID: c.RootUID(),
418
RootGID: c.RootGID(),
419
UpperDirOptionFragment: upperDir,
420
WorkDirOptionFragment: workDir,
421
GraphOpts: c.runtime.store.GraphOptions(),
424
overlayMount, err := overlay.MountWithOptions(contentDir, overlayVol.Source, overlayVol.Dest, overlayOpts)
426
return nil, nil, fmt.Errorf("mounting overlay failed %q: %w", overlayVol.Source, err)
429
// Check overlay volume options
430
for _, o := range overlayVol.Options {
432
if err := c.ChangeHostPathOwnership(overlayVol.Source, true, int(hostUID), int(hostGID)); err != nil {
436
if err := c.ChangeHostPathOwnership(contentDir, true, int(hostUID), int(hostGID)); err != nil {
442
g.AddMount(overlayMount)
445
// Add image volumes as overlay mounts
446
for _, volume := range c.config.ImageVolumes {
447
// Mount the specified image.
448
img, _, err := c.runtime.LibimageRuntime().LookupImage(volume.Source, nil)
450
return nil, nil, fmt.Errorf("creating image volume %q:%q: %w", volume.Source, volume.Dest, err)
452
mountPoint, err := img.Mount(ctx, nil, "")
454
return nil, nil, fmt.Errorf("mounting image volume %q:%q: %w", volume.Source, volume.Dest, err)
457
contentDir, err := overlay.TempDir(c.config.StaticDir, c.RootUID(), c.RootGID())
459
return nil, nil, fmt.Errorf("failed to create TempDir in the %s directory: %w", c.config.StaticDir, err)
462
var overlayMount spec.Mount
463
if volume.ReadWrite {
464
overlayMount, err = overlay.Mount(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
466
overlayMount, err = overlay.MountReadOnly(contentDir, mountPoint, volume.Dest, c.RootUID(), c.RootGID(), c.runtime.store.GraphOptions())
469
return nil, nil, fmt.Errorf("creating overlay mount for image %q failed: %w", volume.Source, err)
471
g.AddMount(overlayMount)
474
err = c.setHomeEnvIfNeeded()
479
if c.config.User != "" {
480
// User and Group must go together
481
g.SetProcessUID(uint32(execUser.Uid))
482
g.SetProcessGID(uint32(execUser.Gid))
483
g.AddProcessAdditionalGid(uint32(execUser.Gid))
486
if c.config.Umask != "" {
487
umask, err := c.umask()
491
g.Config.Process.User.Umask = &umask
494
// Add addition groups if c.config.GroupAdd is not empty
495
if len(c.config.Groups) > 0 {
496
gids, err := lookup.GetContainerGroups(c.config.Groups, c.state.Mountpoint, overrides)
498
return nil, nil, fmt.Errorf("looking up supplemental groups for container %s: %w", c.ID(), err)
500
for _, gid := range gids {
501
g.AddProcessAdditionalGid(gid)
505
if err := c.addSystemdMounts(&g); err != nil {
509
// Look up and add groups the user belongs to, if a group wasn't directly specified
510
if !strings.Contains(c.config.User, ":") {
511
// the gidMappings that are present inside the container user namespace
512
var gidMappings []idtools.IDMap
515
case len(c.config.IDMappings.GIDMap) > 0:
516
gidMappings = c.config.IDMappings.GIDMap
517
case rootless.IsRootless():
518
// Check whether the current user namespace has enough gids available.
519
availableGids, err := rootless.GetAvailableGids()
521
return nil, nil, fmt.Errorf("cannot read number of available GIDs: %w", err)
523
gidMappings = []idtools.IDMap{{
526
Size: int(availableGids),
529
gidMappings = []idtools.IDMap{{
535
for _, gid := range execUser.Sgids {
536
isGIDAvailable := false
537
for _, m := range gidMappings {
538
if gid >= m.ContainerID && gid < m.ContainerID+m.Size {
539
isGIDAvailable = true
544
g.AddProcessAdditionalGid(uint32(gid))
546
logrus.Warnf("Additional gid=%d is not present in the user namespace, skip setting it", gid)
551
// Add shared namespaces from other containers
552
if err := c.addSharedNamespaces(&g); err != nil {
556
g.SetRootPath(c.state.Mountpoint)
557
g.AddAnnotation("org.opencontainers.image.stopSignal", strconv.FormatUint(uint64(c.config.StopSignal), 10))
559
if _, exists := g.Config.Annotations[annotations.ContainerManager]; !exists {
560
g.AddAnnotation(annotations.ContainerManager, annotations.ContainerManagerLibpod)
563
if err := c.setCgroupsPath(&g); err != nil {
567
// Warning: CDI may alter g.Config in place.
568
if len(c.config.CDIDevices) > 0 {
569
registry, err := cdi.NewCache(
570
cdi.WithAutoRefresh(false),
573
return nil, nil, fmt.Errorf("creating CDI registry: %w", err)
575
if err := registry.Refresh(); err != nil {
576
logrus.Debugf("The following error was triggered when refreshing the CDI registry: %v", err)
578
if _, err := registry.InjectDevices(g.Config, c.config.CDIDevices...); err != nil {
579
return nil, nil, fmt.Errorf("setting up CDI devices: %w", err)
583
// Mounts need to be sorted so paths will not cover other paths
584
mounts := sortMounts(g.Mounts())
587
for _, m := range mounts {
588
// We need to remove all symlinks from tmpfs mounts.
589
// Runc and other runtimes may choke on them.
590
// Easy solution: use securejoin to do a scoped evaluation of
591
// the links, then trim off the mount prefix.
592
if m.Type == define.TypeTmpfs {
593
finalPath, err := securejoin.SecureJoin(c.state.Mountpoint, m.Destination)
595
return nil, nil, fmt.Errorf("resolving symlinks for mount destination %s: %w", m.Destination, err)
597
trimmedPath := strings.TrimPrefix(finalPath, strings.TrimSuffix(c.state.Mountpoint, "/"))
598
m.Destination = trimmedPath
603
if err := c.addRootPropagation(&g, mounts); err != nil {
607
// Warning: precreate hooks may alter g.Config in place.
608
if c.state.ExtensionStageHooks, err = c.setupOCIHooks(ctx, g.Config); err != nil {
609
return nil, nil, fmt.Errorf("setting up OCI Hooks: %w", err)
611
if len(c.config.EnvSecrets) > 0 {
612
manager, err := c.runtime.SecretsManager()
616
for name, secr := range c.config.EnvSecrets {
617
_, data, err := manager.LookupSecretData(secr.Name)
621
g.AddProcessEnv(name, string(data))
625
// Pass down the LISTEN_* environment (see #10443).
626
for _, key := range []string{"LISTEN_PID", "LISTEN_FDS", "LISTEN_FDNAMES"} {
627
if val, ok := os.LookupEnv(key); ok {
628
// Force the PID to `1` since we cannot rely on (all
629
// versions of) all runtimes to do it for us.
630
if key == "LISTEN_PID" {
633
g.AddProcessEnv(key, val)
640
isRootless := rootless.IsRootless()
641
isRunningInUserNs := unshare.IsRootless()
642
if isRunningInUserNs && g.Config.Process != nil && g.Config.Process.OOMScoreAdj != nil {
644
*g.Config.Process.OOMScoreAdj, err = maybeClampOOMScoreAdj(*g.Config.Process.OOMScoreAdj)
650
for _, rlimit := range c.config.Spec.Process.Rlimits {
651
if rlimit.Type == "RLIMIT_NOFILE" {
654
if rlimit.Type == "RLIMIT_NPROC" {
659
max := rlimT(define.RLimitDefaultValue)
660
current := rlimT(define.RLimitDefaultValue)
661
var rlimit unix.Rlimit
662
if err := unix.Getrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil {
663
logrus.Warnf("Failed to return RLIMIT_NOFILE ulimit %q", err)
665
if rlimT(rlimit.Cur) < current {
666
current = rlimT(rlimit.Cur)
668
if rlimT(rlimit.Max) < max {
669
max = rlimT(rlimit.Max)
671
g.AddProcessRlimits("RLIMIT_NOFILE", uint64(max), uint64(current))
674
max := rlimT(define.RLimitDefaultValue)
675
current := rlimT(define.RLimitDefaultValue)
676
var rlimit unix.Rlimit
677
if err := unix.Getrlimit(unix.RLIMIT_NPROC, &rlimit); err != nil {
678
logrus.Warnf("Failed to return RLIMIT_NPROC ulimit %q", err)
680
if rlimT(rlimit.Cur) < current {
681
current = rlimT(rlimit.Cur)
683
if rlimT(rlimit.Max) < max {
684
max = rlimT(rlimit.Max)
686
g.AddProcessRlimits("RLIMIT_NPROC", uint64(max), uint64(current))
692
return g.Config, cleanupFunc, nil
695
// isWorkDirSymlink returns true if resolved workdir is symlink or a chain of symlinks,
696
// and final resolved target is present either on volume, mount or inside of container
697
// otherwise it returns false. Following function is meant for internal use only and
698
// can change at any point of time.
699
func (c *Container) isWorkDirSymlink(resolvedPath string) bool {
700
// We cannot create workdir since explicit --workdir is
701
// set in config but workdir could also be a symlink.
702
// If it's a symlink, check if the resolved target is present in the container.
703
// If so, that's a valid use case: return nil.
707
// Linux only supports a chain of 40 links.
708
// Reference: https://github.com/torvalds/linux/blob/master/include/linux/namei.h#L13
709
if maxSymLinks > 40 {
712
resolvedSymlink, err := os.Readlink(resolvedPath)
714
// End sym-link resolution loop.
717
if resolvedSymlink != "" {
718
_, resolvedSymlinkWorkdir, err := c.resolvePath(c.state.Mountpoint, resolvedSymlink)
719
if isPathOnVolume(c, resolvedSymlinkWorkdir) || isPathOnMount(c, resolvedSymlinkWorkdir) {
720
// Resolved symlink exists on external volume or mount
724
// Could not resolve path so end sym-link resolution loop.
727
if resolvedSymlinkWorkdir != "" {
728
resolvedPath = resolvedSymlinkWorkdir
729
err := fileutils.Exists(resolvedSymlinkWorkdir)
731
// Symlink resolved successfully and resolved path exists on container,
732
// this is a valid use-case so return nil.
733
logrus.Debugf("Workdir is a symlink with target to %q and resolved symlink exists on container", resolvedSymlink)
743
// resolveWorkDir resolves the container's workdir and, depending on the
744
// configuration, will create it, or error out if it does not exist.
745
// Note that the container must be mounted before.
746
func (c *Container) resolveWorkDir() error {
747
workdir := c.WorkingDir()
749
// If the specified workdir is a subdir of a volume or mount,
750
// we don't need to do anything. The runtime is taking care of
752
if isPathOnVolume(c, workdir) || isPathOnMount(c, workdir) {
753
logrus.Debugf("Workdir %q resolved to a volume or mount", workdir)
757
_, resolvedWorkdir, err := c.resolvePath(c.state.Mountpoint, workdir)
761
logrus.Debugf("Workdir %q resolved to host path %q", workdir, resolvedWorkdir)
763
st, err := os.Stat(resolvedWorkdir)
766
return fmt.Errorf("workdir %q exists on container %s, but is not a directory", workdir, c.ID())
770
if !c.config.CreateWorkingDir {
771
// No need to create it (e.g., `--workdir=/foo`), so let's make sure
772
// the path exists on the container.
774
if os.IsNotExist(err) {
775
// If resolved Workdir path gets marked as a valid symlink,
776
// return nil cause this is valid use-case.
777
if c.isWorkDirSymlink(resolvedWorkdir) {
780
return fmt.Errorf("workdir %q does not exist on container %s", workdir, c.ID())
782
// This might be a serious error (e.g., permission), so
783
// we need to return the full error.
784
return fmt.Errorf("detecting workdir %q on container %s: %w", workdir, c.ID(), err)
788
if err := os.MkdirAll(resolvedWorkdir, 0755); err != nil {
792
return fmt.Errorf("creating container %s workdir: %w", c.ID(), err)
795
// Ensure container entrypoint is created (if required).
796
uid, gid, _, err := chrootuser.GetUser(c.state.Mountpoint, c.User())
798
return fmt.Errorf("looking up %s inside of the container %s: %w", c.User(), c.ID(), err)
800
if err := idtools.SafeChown(resolvedWorkdir, int(uid), int(gid)); err != nil {
801
return fmt.Errorf("chowning container %s workdir to container root: %w", c.ID(), err)
807
func (c *Container) getUserOverrides() *lookup.Overrides {
808
var hasPasswdFile, hasGroupFile bool
809
overrides := lookup.Overrides{}
810
for _, m := range c.config.Spec.Mounts {
811
if m.Destination == "/etc/passwd" {
812
overrides.ContainerEtcPasswdPath = m.Source
815
if m.Destination == "/etc/group" {
816
overrides.ContainerEtcGroupPath = m.Source
819
if m.Destination == "/etc" {
821
overrides.ContainerEtcPasswdPath = filepath.Join(m.Source, "passwd")
824
overrides.ContainerEtcGroupPath = filepath.Join(m.Source, "group")
828
if path, ok := c.state.BindMounts["/etc/passwd"]; ok {
829
overrides.ContainerEtcPasswdPath = path
834
func lookupHostUser(name string) (*runcuser.ExecUser, error) {
835
var execUser runcuser.ExecUser
836
// Look up User on host
837
u, err := util.LookupUser(name)
839
return &execUser, err
841
uid, err := strconv.ParseUint(u.Uid, 10, 32)
843
return &execUser, err
846
gid, err := strconv.ParseUint(u.Gid, 10, 32)
848
return &execUser, err
850
execUser.Uid = int(uid)
851
execUser.Gid = int(gid)
852
execUser.Home = u.HomeDir
853
return &execUser, nil
856
// mountNotifySocket mounts the NOTIFY_SOCKET into the container if it's set
857
// and if the sdnotify mode is set to container. It also sets c.notifySocket
858
// to avoid redundantly looking up the env variable.
859
func (c *Container) mountNotifySocket(g generate.Generator) error {
860
if c.config.SdNotifySocket == "" {
863
if c.config.SdNotifyMode != define.SdNotifyModeContainer {
867
notifyDir := filepath.Join(c.bundlePath(), "notify")
868
logrus.Debugf("Checking notify %q dir", notifyDir)
869
if err := os.MkdirAll(notifyDir, 0755); err != nil {
870
if !os.IsExist(err) {
871
return fmt.Errorf("unable to create notify %q dir: %w", notifyDir, err)
874
if err := c.relabel(notifyDir, c.MountLabel(), true); err != nil {
875
return fmt.Errorf("relabel failed %q: %w", notifyDir, err)
877
logrus.Debugf("Add bindmount notify %q dir", notifyDir)
878
if _, ok := c.state.BindMounts["/run/notify"]; !ok {
879
c.state.BindMounts["/run/notify"] = notifyDir
882
// Set the container's notify socket to the proxy socket created by conmon
883
g.AddProcessEnv("NOTIFY_SOCKET", "/run/notify/notify.sock")
888
func (c *Container) addCheckpointImageMetadata(importBuilder *buildah.Builder) error {
889
// Get information about host environment
890
hostInfo, err := c.Runtime().hostInfo()
892
return fmt.Errorf("getting host info: %v", err)
895
criuVersion, err := criu.GetCriuVersion()
897
return fmt.Errorf("getting criu version: %v", err)
900
rootfsImageID, rootfsImageName := c.Image()
902
// Add image annotations with information about the container and the host.
903
// This information is useful to check compatibility before restoring the checkpoint
905
checkpointImageAnnotations := map[string]string{
906
define.CheckpointAnnotationName: c.config.Name,
907
define.CheckpointAnnotationRawImageName: c.config.RawImageName,
908
define.CheckpointAnnotationRootfsImageID: rootfsImageID,
909
define.CheckpointAnnotationRootfsImageName: rootfsImageName,
910
define.CheckpointAnnotationPodmanVersion: version.Version.String(),
911
define.CheckpointAnnotationCriuVersion: strconv.Itoa(criuVersion),
912
define.CheckpointAnnotationRuntimeName: hostInfo.OCIRuntime.Name,
913
define.CheckpointAnnotationRuntimeVersion: hostInfo.OCIRuntime.Version,
914
define.CheckpointAnnotationConmonVersion: hostInfo.Conmon.Version,
915
define.CheckpointAnnotationHostArch: hostInfo.Arch,
916
define.CheckpointAnnotationHostKernel: hostInfo.Kernel,
917
define.CheckpointAnnotationCgroupVersion: hostInfo.CgroupsVersion,
918
define.CheckpointAnnotationDistributionVersion: hostInfo.Distribution.Version,
919
define.CheckpointAnnotationDistributionName: hostInfo.Distribution.Distribution,
922
for key, value := range checkpointImageAnnotations {
923
importBuilder.SetAnnotation(key, value)
929
func (c *Container) resolveCheckpointImageName(options *ContainerCheckpointOptions) error {
930
if options.CreateImage == "" {
934
// Resolve image name
935
resolvedImageName, err := c.runtime.LibimageRuntime().ResolveName(options.CreateImage)
940
options.CreateImage = resolvedImageName
944
func (c *Container) createCheckpointImage(ctx context.Context, options ContainerCheckpointOptions) error {
945
if options.CreateImage == "" {
948
logrus.Debugf("Create checkpoint image %s", options.CreateImage)
950
// Create storage reference
951
imageRef, err := is.Transport.ParseStoreReference(c.runtime.store, options.CreateImage)
953
return errors.New("failed to parse image name")
956
// Build an image scratch
957
builderOptions := buildah.BuilderOptions{
958
FromImage: "scratch",
960
importBuilder, err := buildah.NewBuilder(ctx, c.runtime.store, builderOptions)
964
// Clean up buildah working container
966
if err := importBuilder.Delete(); err != nil {
967
logrus.Errorf("Image builder delete failed: %v", err)
971
if err := c.prepareCheckpointExport(); err != nil {
975
// Export checkpoint into temporary tar file
976
tmpDir, err := os.MkdirTemp("", "checkpoint_image_")
980
defer os.RemoveAll(tmpDir)
982
options.TargetFile = path.Join(tmpDir, "checkpoint.tar")
984
if err := c.exportCheckpoint(options); err != nil {
988
// Copy checkpoint from temporary tar file in the image
989
addAndCopyOptions := buildah.AddAndCopyOptions{}
990
if err := importBuilder.Add("", true, addAndCopyOptions, options.TargetFile); err != nil {
994
if err := c.addCheckpointImageMetadata(importBuilder); err != nil {
998
commitOptions := buildah.CommitOptions{
1000
SystemContext: c.runtime.imageContext,
1003
// Create checkpoint image
1004
id, _, _, err := importBuilder.Commit(ctx, imageRef, commitOptions)
1008
logrus.Debugf("Created checkpoint image: %s", id)
1012
func (c *Container) exportCheckpoint(options ContainerCheckpointOptions) error {
1013
if len(c.Dependencies()) == 1 {
1014
// Check if the dependency is an infra container. If it is we can checkpoint
1015
// the container out of the Pod.
1016
if c.config.Pod == "" {
1017
return errors.New("cannot export checkpoints of containers with dependencies")
1020
pod, err := c.runtime.state.Pod(c.config.Pod)
1022
return fmt.Errorf("container %s is in pod %s, but pod cannot be retrieved: %w", c.ID(), c.config.Pod, err)
1024
infraID, err := pod.InfraContainerID()
1026
return fmt.Errorf("cannot retrieve infra container ID for pod %s: %w", c.config.Pod, err)
1028
if c.Dependencies()[0] != infraID {
1029
return errors.New("cannot export checkpoints of containers with dependencies")
1032
if len(c.Dependencies()) > 1 {
1033
return errors.New("cannot export checkpoints of containers with dependencies")
1035
logrus.Debugf("Exporting checkpoint image of container %q to %q", c.ID(), options.TargetFile)
1037
includeFiles := []string{
1039
metadata.DevShmCheckpointTar,
1040
metadata.ConfigDumpFile,
1041
metadata.SpecDumpFile,
1042
metadata.NetworkStatusFile,
1046
if c.LogDriver() == define.KubernetesLogging ||
1047
c.LogDriver() == define.JSONLogging {
1048
includeFiles = append(includeFiles, "ctr.log")
1050
if options.PreCheckPoint {
1051
includeFiles = append(includeFiles, preCheckpointDir)
1053
includeFiles = append(includeFiles, metadata.CheckpointDirectory)
1055
// Get root file-system changes included in the checkpoint archive
1056
var addToTarFiles []string
1057
if !options.IgnoreRootfs {
1058
// To correctly track deleted files, let's go through the output of 'podman diff'
1059
rootFsChanges, err := c.runtime.GetDiff("", c.ID(), define.DiffContainer)
1061
return fmt.Errorf("exporting root file-system diff for %q: %w", c.ID(), err)
1064
addToTarFiles, err := crutils.CRCreateRootFsDiffTar(&rootFsChanges, c.state.Mountpoint, c.bundlePath())
1069
includeFiles = append(includeFiles, addToTarFiles...)
1072
// Folder containing archived volumes that will be included in the export
1073
expVolDir := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory)
1075
// Create an archive for each volume associated with the container
1076
if !options.IgnoreVolumes {
1077
if err := os.MkdirAll(expVolDir, 0700); err != nil {
1078
return fmt.Errorf("creating volumes export directory %q: %w", expVolDir, err)
1081
for _, v := range c.config.NamedVolumes {
1082
volumeTarFilePath := filepath.Join(metadata.CheckpointVolumesDirectory, v.Name+".tar")
1083
volumeTarFileFullPath := filepath.Join(c.bundlePath(), volumeTarFilePath)
1085
volumeTarFile, err := os.Create(volumeTarFileFullPath)
1087
return fmt.Errorf("creating %q: %w", volumeTarFileFullPath, err)
1090
volume, err := c.runtime.GetVolume(v.Name)
1095
mp, err := volume.MountPoint()
1100
return fmt.Errorf("volume %s is not mounted, cannot export: %w", volume.Name(), define.ErrInternal)
1103
input, err := archive.TarWithOptions(mp, &archive.TarOptions{
1104
Compression: archive.Uncompressed,
1105
IncludeSourceDir: true,
1108
return fmt.Errorf("reading volume directory %q: %w", v.Dest, err)
1111
_, err = io.Copy(volumeTarFile, input)
1115
volumeTarFile.Close()
1117
includeFiles = append(includeFiles, volumeTarFilePath)
1121
input, err := archive.TarWithOptions(c.bundlePath(), &archive.TarOptions{
1122
Compression: options.Compression,
1123
IncludeSourceDir: true,
1124
IncludeFiles: includeFiles,
1128
return fmt.Errorf("reading checkpoint directory %q: %w", c.ID(), err)
1131
outFile, err := os.Create(options.TargetFile)
1133
return fmt.Errorf("creating checkpoint export file %q: %w", options.TargetFile, err)
1135
defer outFile.Close()
1137
if err := os.Chmod(options.TargetFile, 0600); err != nil {
1141
_, err = io.Copy(outFile, input)
1146
for _, file := range addToTarFiles {
1147
os.Remove(filepath.Join(c.bundlePath(), file))
1150
if !options.IgnoreVolumes {
1151
os.RemoveAll(expVolDir)
1157
func (c *Container) checkpointRestoreSupported(version int) error {
1158
if err := criu.CheckForCriu(version); err != nil {
1161
if !c.ociRuntime.SupportsCheckpoint() {
1162
return errors.New("configured runtime does not support checkpoint/restore")
1167
func (c *Container) checkpoint(ctx context.Context, options ContainerCheckpointOptions) (*define.CRIUCheckpointRestoreStatistics, int64, error) {
1168
if err := c.checkpointRestoreSupported(criu.MinCriuVersion); err != nil {
1172
if c.state.State != define.ContainerStateRunning {
1173
return nil, 0, fmt.Errorf("%q is not running, cannot checkpoint: %w", c.state.State, define.ErrCtrStateInvalid)
1176
if c.AutoRemove() && options.TargetFile == "" {
1177
return nil, 0, errors.New("cannot checkpoint containers that have been started with '--rm' unless '--export' is used")
1180
if err := c.resolveCheckpointImageName(&options); err != nil {
1184
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "dump.log", c.MountLabel()); err != nil {
1188
// Setting CheckpointLog early in case there is a failure.
1189
c.state.CheckpointLog = path.Join(c.bundlePath(), "dump.log")
1190
c.state.CheckpointPath = c.CheckpointPath()
1192
runtimeCheckpointDuration, err := c.ociRuntime.CheckpointContainer(c, options)
1197
// Keep the content of /dev/shm directory
1198
if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
1199
shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
1201
shmDirTarFile, err := os.Create(shmDirTarFileFullPath)
1205
defer shmDirTarFile.Close()
1207
input, err := archive.TarWithOptions(c.config.ShmDir, &archive.TarOptions{
1208
Compression: archive.Uncompressed,
1209
IncludeSourceDir: true,
1215
if _, err = io.Copy(shmDirTarFile, input); err != nil {
1220
// Save network.status. This is needed to restore the container with
1221
// the same IP. Currently limited to one IP address in a container
1222
// with one interface.
1223
// FIXME: will this break something?
1224
if _, err := metadata.WriteJSONFile(c.getNetworkStatus(), c.bundlePath(), metadata.NetworkStatusFile); err != nil {
1228
defer c.newContainerEvent(events.Checkpoint)
1230
// There is a bug from criu: https://github.com/checkpoint-restore/criu/issues/116
1231
// We have to change the symbolic link from absolute path to relative path
1232
if options.WithPrevious {
1233
os.Remove(path.Join(c.CheckpointPath(), "parent"))
1234
if err := os.Symlink("../pre-checkpoint", path.Join(c.CheckpointPath(), "parent")); err != nil {
1239
if options.TargetFile != "" {
1240
if err := c.exportCheckpoint(options); err != nil {
1244
if err := c.createCheckpointImage(ctx, options); err != nil {
1249
logrus.Debugf("Checkpointed container %s", c.ID())
1251
if !options.KeepRunning && !options.PreCheckPoint {
1252
c.state.State = define.ContainerStateStopped
1253
c.state.Checkpointed = true
1254
c.state.CheckpointedTime = time.Now()
1255
c.state.Restored = false
1256
c.state.RestoredTime = time.Time{}
1258
// Clean up Storage and Network
1259
if err := c.cleanup(ctx); err != nil {
1264
criuStatistics, err := func() (*define.CRIUCheckpointRestoreStatistics, error) {
1265
if !options.PrintStats {
1268
statsDirectory, err := os.Open(c.bundlePath())
1270
return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
1273
dumpStatistics, err := stats.CriuGetDumpStats(statsDirectory)
1275
return nil, fmt.Errorf("displaying checkpointing statistics not possible: %w", err)
1278
return &define.CRIUCheckpointRestoreStatistics{
1279
FreezingTime: dumpStatistics.GetFreezingTime(),
1280
FrozenTime: dumpStatistics.GetFrozenTime(),
1281
MemdumpTime: dumpStatistics.GetMemdumpTime(),
1282
MemwriteTime: dumpStatistics.GetMemwriteTime(),
1283
PagesScanned: dumpStatistics.GetPagesScanned(),
1284
PagesWritten: dumpStatistics.GetPagesWritten(),
1291
if !options.Keep && !options.PreCheckPoint {
1292
cleanup := []string{
1295
metadata.ConfigDumpFile,
1296
metadata.SpecDumpFile,
1298
for _, del := range cleanup {
1299
file := filepath.Join(c.bundlePath(), del)
1300
if err := os.Remove(file); err != nil {
1301
logrus.Debugf("Unable to remove file %s", file)
1304
// The file has been deleted. Do not mention it.
1305
c.state.CheckpointLog = ""
1308
c.state.FinishedTime = time.Now()
1309
return criuStatistics, runtimeCheckpointDuration, c.save()
1312
func (c *Container) generateContainerSpec() error {
1313
// Make sure the newly created config.json exists on disk
1315
// NewFromSpec() is deprecated according to its comment
1316
// however the recommended replace just causes a nil map panic
1317
g := generate.NewFromSpec(c.config.Spec)
1319
if err := c.saveSpec(g.Config); err != nil {
1320
return fmt.Errorf("saving imported container specification for restore failed: %w", err)
1326
func (c *Container) importCheckpointImage(ctx context.Context, imageID string) error {
1327
img, _, err := c.Runtime().LibimageRuntime().LookupImage(imageID, nil)
1332
mountPoint, err := img.Mount(ctx, nil, "")
1334
if err := c.unmount(true); err != nil {
1335
logrus.Errorf("Failed to unmount container: %v", err)
1342
// Import all checkpoint files except ConfigDumpFile and SpecDumpFile. We
1343
// generate new container config files to enable to specifying a new
1345
checkpoint := []string{
1347
metadata.CheckpointDirectory,
1348
metadata.CheckpointVolumesDirectory,
1349
metadata.DevShmCheckpointTar,
1350
metadata.RootFsDiffTar,
1351
metadata.DeletedFilesFile,
1352
metadata.PodOptionsFile,
1353
metadata.PodDumpFile,
1356
for _, name := range checkpoint {
1357
src := filepath.Join(mountPoint, name)
1358
dst := filepath.Join(c.bundlePath(), name)
1359
if err := archive.NewDefaultArchiver().CopyWithTar(src, dst); err != nil {
1360
logrus.Debugf("Can't import '%s' from checkpoint image", name)
1364
return c.generateContainerSpec()
1367
func (c *Container) importCheckpointTar(input string) error {
1368
if err := crutils.CRImportCheckpointWithoutConfig(c.bundlePath(), input); err != nil {
1372
return c.generateContainerSpec()
1375
func (c *Container) importPreCheckpoint(input string) error {
1376
archiveFile, err := os.Open(input)
1378
return fmt.Errorf("failed to open pre-checkpoint archive for import: %w", err)
1381
defer archiveFile.Close()
1383
err = archive.Untar(archiveFile, c.bundlePath(), nil)
1385
return fmt.Errorf("unpacking of pre-checkpoint archive %s failed: %w", input, err)
1390
func (c *Container) restore(ctx context.Context, options ContainerCheckpointOptions) (criuStatistics *define.CRIUCheckpointRestoreStatistics, runtimeRestoreDuration int64, retErr error) {
1391
minCriuVersion := func() int {
1392
if options.Pod == "" {
1393
return criu.MinCriuVersion
1395
return criu.PodCriuVersion
1397
if err := c.checkpointRestoreSupported(minCriuVersion); err != nil {
1401
if options.Pod != "" && !crutils.CRRuntimeSupportsPodCheckpointRestore(c.ociRuntime.Path()) {
1402
return nil, 0, fmt.Errorf("runtime %s does not support pod restore", c.ociRuntime.Path())
1405
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateExited) {
1406
return nil, 0, fmt.Errorf("container %s is running or paused, cannot restore: %w", c.ID(), define.ErrCtrStateInvalid)
1409
if options.ImportPrevious != "" {
1410
if err := c.importPreCheckpoint(options.ImportPrevious); err != nil {
1415
if options.TargetFile != "" {
1416
if err := c.importCheckpointTar(options.TargetFile); err != nil {
1419
} else if options.CheckpointImageID != "" {
1420
if err := c.importCheckpointImage(ctx, options.CheckpointImageID); err != nil {
1425
// Let's try to stat() CRIU's inventory file. If it does not exist, it makes
1426
// no sense to try a restore. This is a minimal check if a checkpoint exists.
1427
if err := fileutils.Exists(filepath.Join(c.CheckpointPath(), "inventory.img")); errors.Is(err, fs.ErrNotExist) {
1428
return nil, 0, fmt.Errorf("a complete checkpoint for this container cannot be found, cannot restore: %w", err)
1431
if err := crutils.CRCreateFileWithLabel(c.bundlePath(), "restore.log", c.MountLabel()); err != nil {
1435
// Setting RestoreLog early in case there is a failure.
1436
c.state.RestoreLog = path.Join(c.bundlePath(), "restore.log")
1437
c.state.CheckpointPath = c.CheckpointPath()
1439
if options.IgnoreStaticIP || options.IgnoreStaticMAC {
1440
networks, err := c.networks()
1445
for net, opts := range networks {
1446
if options.IgnoreStaticIP {
1447
opts.StaticIPs = nil
1449
if options.IgnoreStaticMAC {
1450
opts.StaticMAC = nil
1452
if err := c.runtime.state.NetworkModify(c, net, opts); err != nil {
1453
return nil, 0, fmt.Errorf("failed to rewrite network config: %w", err)
1458
// Read network configuration from checkpoint
1459
var netStatus map[string]types.StatusBlock
1460
_, err := metadata.ReadJSONFile(&netStatus, c.bundlePath(), metadata.NetworkStatusFile)
1462
logrus.Infof("Failed to unmarshal network status, cannot restore the same ip/mac: %v", err)
1464
// If the restored container should get a new name, the IP address of
1465
// the container will not be restored. This assumes that if a new name is
1466
// specified, the container is restored multiple times.
1467
// TODO: This implicit restoring with or without IP depending on an
1468
// unrelated restore parameter (--name) does not seem like the
1470
if err == nil && options.Name == "" && (!options.IgnoreStaticIP || !options.IgnoreStaticMAC) {
1471
// The file with the network.status does exist. Let's restore the
1472
// container with the same networks settings as during checkpointing.
1473
networkOpts, err := c.networks()
1478
netOpts := make(map[string]types.PerNetworkOptions, len(netStatus))
1479
for network, perNetOpts := range networkOpts {
1480
// unset mac and ips before we start adding the ones from the status
1481
perNetOpts.StaticMAC = nil
1482
perNetOpts.StaticIPs = nil
1483
for name, netInt := range netStatus[network].Interfaces {
1484
perNetOpts.InterfaceName = name
1485
if !options.IgnoreStaticMAC {
1486
perNetOpts.StaticMAC = netInt.MacAddress
1488
if !options.IgnoreStaticIP {
1489
for _, netAddress := range netInt.Subnets {
1490
perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
1493
// Normally interfaces have a length of 1, only for some special cni configs we could get more.
1494
// For now just use the first interface to get the ips this should be good enough for most cases.
1497
netOpts[network] = perNetOpts
1499
c.perNetworkOpts = netOpts
1504
if err := c.cleanup(ctx); err != nil {
1505
logrus.Errorf("Cleaning up container %s: %v", c.ID(), err)
1510
if err := c.prepare(); err != nil {
1515
jsonPath := filepath.Join(c.bundlePath(), "config.json")
1516
logrus.Debugf("generate.NewFromFile at %v", jsonPath)
1517
g, err := generate.NewFromFile(jsonPath)
1519
logrus.Debugf("generate.NewFromFile failed with %v", err)
1523
// Restoring from an import means that we are doing migration
1524
if options.TargetFile != "" || options.CheckpointImageID != "" {
1525
g.SetRootPath(c.state.Mountpoint)
1528
// We want to have the same network namespace as before.
1529
if err := c.addNetworkNamespace(&g); err != nil {
1533
if options.Pod != "" {
1534
// Running in a Pod means that we have to change all namespace settings to
1535
// the ones from the infrastructure container.
1536
pod, err := c.runtime.LookupPod(options.Pod)
1538
return nil, 0, fmt.Errorf("pod %q cannot be retrieved: %w", options.Pod, err)
1541
infraContainer, err := pod.InfraContainer()
1543
return nil, 0, fmt.Errorf("cannot retrieved infra container from pod %q: %w", options.Pod, err)
1546
infraContainer.lock.Lock()
1547
if err := infraContainer.syncContainer(); err != nil {
1548
infraContainer.lock.Unlock()
1549
return nil, 0, fmt.Errorf("syncing infrastructure container %s status: %w", infraContainer.ID(), err)
1551
if infraContainer.state.State != define.ContainerStateRunning {
1552
if err := infraContainer.initAndStart(ctx); err != nil {
1553
infraContainer.lock.Unlock()
1554
return nil, 0, fmt.Errorf("starting infrastructure container %s status: %w", infraContainer.ID(), err)
1557
infraContainer.lock.Unlock()
1559
if c.config.IPCNsCtr != "" {
1560
nsPath, err := infraContainer.namespacePath(IPCNS)
1562
return nil, 0, fmt.Errorf("cannot retrieve IPC namespace path for Pod %q: %w", options.Pod, err)
1564
if err := g.AddOrReplaceLinuxNamespace(string(spec.IPCNamespace), nsPath); err != nil {
1569
if c.config.NetNsCtr != "" {
1570
nsPath, err := infraContainer.namespacePath(NetNS)
1572
return nil, 0, fmt.Errorf("cannot retrieve network namespace path for Pod %q: %w", options.Pod, err)
1574
if err := g.AddOrReplaceLinuxNamespace(string(spec.NetworkNamespace), nsPath); err != nil {
1579
if c.config.PIDNsCtr != "" {
1580
nsPath, err := infraContainer.namespacePath(PIDNS)
1582
return nil, 0, fmt.Errorf("cannot retrieve PID namespace path for Pod %q: %w", options.Pod, err)
1584
if err := g.AddOrReplaceLinuxNamespace(string(spec.PIDNamespace), nsPath); err != nil {
1589
if c.config.UTSNsCtr != "" {
1590
nsPath, err := infraContainer.namespacePath(UTSNS)
1592
return nil, 0, fmt.Errorf("cannot retrieve UTS namespace path for Pod %q: %w", options.Pod, err)
1594
if err := g.AddOrReplaceLinuxNamespace(string(spec.UTSNamespace), nsPath); err != nil {
1599
if c.config.CgroupNsCtr != "" {
1600
nsPath, err := infraContainer.namespacePath(CgroupNS)
1602
return nil, 0, fmt.Errorf("cannot retrieve Cgroup namespace path for Pod %q: %w", options.Pod, err)
1604
if err := g.AddOrReplaceLinuxNamespace(string(spec.CgroupNamespace), nsPath); err != nil {
1610
if err := c.makeBindMounts(); err != nil {
1614
if options.TargetFile != "" || options.CheckpointImageID != "" {
1615
for dstPath, srcPath := range c.state.BindMounts {
1616
newMount := spec.Mount{
1617
Type: define.TypeBind,
1619
Destination: dstPath,
1620
Options: []string{define.TypeBind, "private"},
1622
if c.IsReadOnly() && (dstPath != "/dev/shm" || !c.config.ReadWriteTmpfs) {
1623
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
1625
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
1626
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
1628
if !MountExists(g.Mounts(), dstPath) {
1629
g.AddMount(newMount)
1634
// Restore /dev/shm content
1635
if c.config.ShmDir != "" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
1636
shmDirTarFileFullPath := filepath.Join(c.bundlePath(), metadata.DevShmCheckpointTar)
1637
if err := fileutils.Exists(shmDirTarFileFullPath); err != nil {
1638
logrus.Debug("Container checkpoint doesn't contain dev/shm: ", err.Error())
1640
shmDirTarFile, err := os.Open(shmDirTarFileFullPath)
1644
defer shmDirTarFile.Close()
1646
if err := archive.UntarUncompressed(shmDirTarFile, c.config.ShmDir, nil); err != nil {
1652
// Cleanup for a working restore.
1653
if err := c.removeConmonFiles(); err != nil {
1657
// Save the OCI spec to disk
1658
if err := c.saveSpec(g.Config); err != nil {
1662
// When restoring from an imported archive, allow restoring the content of volumes.
1663
// Volumes are created in setupContainer()
1664
if !options.IgnoreVolumes && (options.TargetFile != "" || options.CheckpointImageID != "") {
1665
for _, v := range c.config.NamedVolumes {
1666
volumeFilePath := filepath.Join(c.bundlePath(), metadata.CheckpointVolumesDirectory, v.Name+".tar")
1668
volumeFile, err := os.Open(volumeFilePath)
1670
return nil, 0, fmt.Errorf("failed to open volume file %s: %w", volumeFilePath, err)
1672
defer volumeFile.Close()
1674
volume, err := c.runtime.GetVolume(v.Name)
1676
return nil, 0, fmt.Errorf("failed to retrieve volume %s: %w", v.Name, err)
1679
mountPoint, err := volume.MountPoint()
1683
if mountPoint == "" {
1684
return nil, 0, fmt.Errorf("unable to import volume %s as it is not mounted: %w", volume.Name(), err)
1686
if err := archive.UntarUncompressed(volumeFile, mountPoint, nil); err != nil {
1687
return nil, 0, fmt.Errorf("failed to extract volume %s to %s: %w", volumeFilePath, mountPoint, err)
1692
// Before actually restarting the container, apply the root file-system changes
1693
if !options.IgnoreRootfs {
1694
if err := crutils.CRApplyRootFsDiffTar(c.bundlePath(), c.state.Mountpoint); err != nil {
1698
if err := crutils.CRRemoveDeletedFiles(c.ID(), c.bundlePath(), c.state.Mountpoint); err != nil {
1703
runtimeRestoreDuration, err = c.ociRuntime.CreateContainer(c, &options)
1708
criuStatistics, err = func() (*define.CRIUCheckpointRestoreStatistics, error) {
1709
if !options.PrintStats {
1712
statsDirectory, err := os.Open(c.bundlePath())
1714
return nil, fmt.Errorf("not able to open %q: %w", c.bundlePath(), err)
1717
restoreStatistics, err := stats.CriuGetRestoreStats(statsDirectory)
1719
return nil, fmt.Errorf("displaying restore statistics not possible: %w", err)
1722
return &define.CRIUCheckpointRestoreStatistics{
1723
PagesCompared: restoreStatistics.GetPagesCompared(),
1724
PagesSkippedCow: restoreStatistics.GetPagesSkippedCow(),
1725
ForkingTime: restoreStatistics.GetForkingTime(),
1726
RestoreTime: restoreStatistics.GetRestoreTime(),
1727
PagesRestored: restoreStatistics.GetPagesRestored(),
1734
logrus.Debugf("Restored container %s", c.ID())
1736
c.state.State = define.ContainerStateRunning
1737
c.state.Checkpointed = false
1738
c.state.Restored = true
1739
c.state.CheckpointedTime = time.Time{}
1740
c.state.RestoredTime = time.Now()
1743
// Delete all checkpoint related files. At this point, in theory, all files
1744
// should exist. Still ignoring errors for now as the container should be
1745
// restored and running. Not erroring out just because some cleanup operation
1746
// failed. Starting with the checkpoint directory
1747
err = os.RemoveAll(c.CheckpointPath())
1749
logrus.Debugf("Non-fatal: removal of checkpoint directory (%s) failed: %v", c.CheckpointPath(), err)
1751
c.state.CheckpointPath = ""
1752
err = os.RemoveAll(c.PreCheckPointPath())
1754
logrus.Debugf("Non-fatal: removal of pre-checkpoint directory (%s) failed: %v", c.PreCheckPointPath(), err)
1756
err = os.RemoveAll(c.CheckpointVolumesPath())
1758
logrus.Debugf("Non-fatal: removal of checkpoint volumes directory (%s) failed: %v", c.CheckpointVolumesPath(), err)
1760
cleanup := [...]string{
1765
metadata.DevShmCheckpointTar,
1766
metadata.NetworkStatusFile,
1767
metadata.RootFsDiffTar,
1768
metadata.DeletedFilesFile,
1770
for _, del := range cleanup {
1771
file := filepath.Join(c.bundlePath(), del)
1772
err = os.Remove(file)
1774
logrus.Debugf("Non-fatal: removal of checkpoint file (%s) failed: %v", file, err)
1777
c.state.CheckpointLog = ""
1778
c.state.RestoreLog = ""
1781
return criuStatistics, runtimeRestoreDuration, c.save()
1784
// Retrieves a container's "root" net namespace container dependency.
1785
func (c *Container) getRootNetNsDepCtr() (depCtr *Container, err error) {
1786
containersVisited := map[string]int{c.config.ID: 1}
1787
nextCtr := c.config.NetNsCtr
1789
// Make sure we aren't in a loop
1790
if _, visited := containersVisited[nextCtr]; visited {
1791
return nil, errors.New("loop encountered while determining net namespace container")
1793
containersVisited[nextCtr] = 1
1795
depCtr, err = c.runtime.state.Container(nextCtr)
1797
return nil, fmt.Errorf("fetching dependency %s of container %s: %w", c.config.NetNsCtr, c.ID(), err)
1799
// This should never happen without an error
1803
nextCtr = depCtr.config.NetNsCtr
1807
return nil, errors.New("unexpected error depCtr is nil without reported error from runtime state")
1812
// Ensure standard bind mounts are mounted into all root directories (including chroot directories)
1813
func (c *Container) mountIntoRootDirs(mountName string, mountPath string) error {
1814
c.state.BindMounts[mountName] = mountPath
1816
for _, chrootDir := range c.config.ChrootDirs {
1817
c.state.BindMounts[filepath.Join(chrootDir, mountName)] = mountPath
1823
// Make standard bind mounts to include in the container
1824
func (c *Container) makeBindMounts() error {
1825
if err := idtools.SafeChown(c.state.RunDir, c.RootUID(), c.RootGID()); err != nil {
1826
return fmt.Errorf("cannot chown run directory: %w", err)
1829
if c.state.BindMounts == nil {
1830
c.state.BindMounts = make(map[string]string)
1832
netDisabled, err := c.NetworkDisabled()
1838
// If /etc/resolv.conf and /etc/hosts exist, delete them so we
1839
// will recreate. Only do this if we aren't sharing them with
1840
// another container.
1841
if c.config.NetNsCtr == "" {
1842
if resolvePath, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]; ok {
1843
if err := os.Remove(resolvePath); err != nil && !os.IsNotExist(err) {
1844
return fmt.Errorf("container %s: %w", c.ID(), err)
1846
delete(c.state.BindMounts, resolvconf.DefaultResolvConf)
1848
if hostsPath, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
1849
if err := os.Remove(hostsPath); err != nil && !os.IsNotExist(err) {
1850
return fmt.Errorf("container %s: %w", c.ID(), err)
1852
delete(c.state.BindMounts, config.DefaultHostsFile)
1856
if c.config.NetNsCtr != "" && (!c.config.UseImageResolvConf || !c.config.UseImageHosts) {
1857
// We share a net namespace.
1858
// We want /etc/resolv.conf and /etc/hosts from the
1859
// other container. Unless we're not creating both of
1861
depCtr, err := c.getRootNetNsDepCtr()
1863
return fmt.Errorf("fetching network namespace dependency container for container %s: %w", c.ID(), err)
1866
// We need that container's bind mounts
1867
bindMounts, err := depCtr.BindMounts()
1869
return fmt.Errorf("fetching bind mounts from dependency %s of container %s: %w", depCtr.ID(), c.ID(), err)
1872
// The other container may not have a resolv.conf or /etc/hosts
1873
// If it doesn't, don't copy them
1874
resolvPath, exists := bindMounts[resolvconf.DefaultResolvConf]
1875
if !c.config.UseImageResolvConf && exists {
1876
err := c.mountIntoRootDirs(resolvconf.DefaultResolvConf, resolvPath)
1879
return fmt.Errorf("assigning mounts to container %s: %w", c.ID(), err)
1883
// check if dependency container has an /etc/hosts file.
1884
// It may not have one, so only use it if it does.
1885
hostsPath, exists := bindMounts[config.DefaultHostsFile]
1886
if !c.config.UseImageHosts && exists {
1887
// we cannot use the dependency container lock due ABBA deadlocks in cleanup()
1888
lock, err := lockfile.GetLockFile(hostsPath)
1890
return fmt.Errorf("failed to lock hosts file: %w", err)
1894
// add the newly added container to the hosts file
1895
// we always use 127.0.0.1 as ip since they have the same netns
1896
err = etchosts.Add(hostsPath, getLocalhostHostEntry(c))
1899
return fmt.Errorf("creating hosts file for container %s which depends on container %s: %w", c.ID(), depCtr.ID(), err)
1902
// finally, save it in the new container
1903
err = c.mountIntoRootDirs(config.DefaultHostsFile, hostsPath)
1905
return fmt.Errorf("assigning mounts to container %s: %w", c.ID(), err)
1909
if !hasCurrentUserMapped(c) {
1910
if err := makeAccessible(resolvPath, c.RootUID(), c.RootGID()); err != nil {
1913
if err := makeAccessible(hostsPath, c.RootUID(), c.RootGID()); err != nil {
1918
if !c.config.UseImageResolvConf {
1919
if err := c.createResolvConf(); err != nil {
1920
return fmt.Errorf("creating resolv.conf for container %s: %w", c.ID(), err)
1924
if !c.config.UseImageHosts {
1925
if err := c.createHostsFile(); err != nil {
1926
return fmt.Errorf("creating hosts file for container %s: %w", c.ID(), err)
1931
if c.state.BindMounts[config.DefaultHostsFile] != "" {
1932
if err := c.relabel(c.state.BindMounts[config.DefaultHostsFile], c.config.MountLabel, true); err != nil {
1937
if c.state.BindMounts[resolvconf.DefaultResolvConf] != "" {
1938
if err := c.relabel(c.state.BindMounts[resolvconf.DefaultResolvConf], c.config.MountLabel, true); err != nil {
1942
} else if !c.config.UseImageHosts && c.state.BindMounts[config.DefaultHostsFile] == "" {
1943
if err := c.createHostsFile(); err != nil {
1944
return fmt.Errorf("creating hosts file for container %s: %w", c.ID(), err)
1948
if c.config.ShmDir != "" {
1949
// If ShmDir has a value SHM is always added when we mount the container
1950
c.state.BindMounts["/dev/shm"] = c.config.ShmDir
1953
if c.config.Passwd == nil || *c.config.Passwd {
1954
newPasswd, newGroup, err := c.generatePasswdAndGroup()
1956
return fmt.Errorf("creating temporary passwd file for container %s: %w", c.ID(), err)
1958
if newPasswd != "" {
1960
// If it already exists, delete so we can recreate
1961
delete(c.state.BindMounts, "/etc/passwd")
1962
c.state.BindMounts["/etc/passwd"] = newPasswd
1966
// If it already exists, delete so we can recreate
1967
delete(c.state.BindMounts, "/etc/group")
1968
c.state.BindMounts["/etc/group"] = newGroup
1972
runPath, err := c.getPlatformRunPath()
1974
return fmt.Errorf("cannot determine run directory for container: %w", err)
1976
containerenvPath := filepath.Join(runPath, ".containerenv")
1978
_, hasRunContainerenv := c.state.BindMounts[containerenvPath]
1979
if !hasRunContainerenv {
1981
// check in the spec mounts
1982
for _, m := range c.config.Spec.Mounts {
1984
case m.Destination == containerenvPath:
1985
hasRunContainerenv = true
1987
case m.Destination == runPath && m.Type != define.TypeTmpfs:
1988
hasRunContainerenv = true
1994
// Make .containerenv if it does not exist
1995
if !hasRunContainerenv {
1996
containerenv := c.runtime.graphRootMountedFlag(c.config.Spec.Mounts)
1998
if rootless.IsRootless() {
2001
imageID, imageName := c.Image()
2004
// Populate the .containerenv with container information
2005
containerenv = fmt.Sprintf(`engine="podman-%s"
2011
%s`, version.Version.String(), c.Name(), c.ID(), imageName, imageID, isRootless, containerenv)
2013
containerenvHostPath, err := c.writeStringToRundir(".containerenv", containerenv)
2015
return fmt.Errorf("creating containerenv file for container %s: %w", c.ID(), err)
2017
c.state.BindMounts[containerenvPath] = containerenvHostPath
2020
// Add Subscription Mounts
2021
subscriptionMounts := subscriptions.MountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.Containers.DefaultMountsFile, c.state.Mountpoint, c.RootUID(), c.RootGID(), rootless.IsRootless(), false)
2022
for _, mount := range subscriptionMounts {
2023
if _, ok := c.state.BindMounts[mount.Destination]; !ok {
2024
c.state.BindMounts[mount.Destination] = mount.Source
2028
// Secrets are mounted by getting the secret data from the secrets manager,
2029
// copying the data into the container's static dir,
2030
// then mounting the copied dir into /run/secrets.
2031
// The secrets mounting must come after subscription mounts, since subscription mounts
2032
// creates the /run/secrets dir in the container where we mount as well.
2033
if len(c.Secrets()) > 0 {
2034
// create /run/secrets if subscriptions did not create
2035
if err := c.createSecretMountDir(runPath); err != nil {
2036
return fmt.Errorf("creating secrets mount: %w", err)
2038
for _, secret := range c.Secrets() {
2039
secretFileName := secret.Name
2040
base := filepath.Join(runPath, "secrets")
2041
if secret.Target != "" {
2042
secretFileName = secret.Target
2043
// If absolute path for target given remove base.
2044
if filepath.IsAbs(secretFileName) {
2048
src := filepath.Join(c.config.SecretsPath, secret.Name)
2049
dest := filepath.Join(base, secretFileName)
2050
c.state.BindMounts[dest] = src
2054
return c.makePlatformBindMounts()
2057
// createResolvConf create the resolv.conf file and bind mount it
2058
func (c *Container) createResolvConf() error {
2059
destPath := filepath.Join(c.state.RunDir, "resolv.conf")
2060
f, err := os.Create(destPath)
2065
return c.bindMountRootFile(destPath, resolvconf.DefaultResolvConf)
2068
// addResolvConf add resolv.conf entries
2069
func (c *Container) addResolvConf() error {
2070
destPath, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
2072
// no resolv.conf mount, do nothing
2077
networkNameServers []string
2078
networkSearchDomains []string
2081
netStatus := c.getNetworkStatus()
2082
for _, status := range netStatus {
2083
if status.DNSServerIPs != nil {
2084
for _, nsIP := range status.DNSServerIPs {
2085
networkNameServers = append(networkNameServers, nsIP.String())
2087
logrus.Debugf("Adding nameserver(s) from network status of '%q'", status.DNSServerIPs)
2089
if status.DNSSearchDomains != nil {
2090
networkSearchDomains = append(networkSearchDomains, status.DNSSearchDomains...)
2091
logrus.Debugf("Adding search domain(s) from network status of '%q'", status.DNSSearchDomains)
2095
ipv6 := c.checkForIPv6(netStatus)
2097
networkBackend := c.runtime.config.Network.NetworkBackend
2098
nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers.Get())+len(c.config.DNSServer))
2100
// If NetworkBackend is `netavark` do not populate `/etc/resolv.conf`
2101
// with custom dns server since after https://github.com/containers/netavark/pull/452
2102
// netavark will always set required `nameservers` in StatusBlock and libpod
2103
// will correctly populate `networkNameServers`. Also see https://github.com/containers/podman/issues/16172
2105
// Exception: Populate `/etc/resolv.conf` if container is not connected to any network
2106
// with dns enabled then we do not get any nameservers back.
2107
if networkBackend != string(types.Netavark) || len(networkNameServers) == 0 {
2108
nameservers = append(nameservers, c.runtime.config.Containers.DNSServers.Get()...)
2109
for _, ip := range c.config.DNSServer {
2110
nameservers = append(nameservers, ip.String())
2113
// If the user provided dns, it trumps all; then dns masq; then resolv.conf
2114
keepHostServers := false
2115
if len(nameservers) == 0 {
2116
// when no network name servers or not netavark use host servers
2117
// for aardvark dns we only want our single server in there
2118
if len(networkNameServers) == 0 || networkBackend != string(types.Netavark) {
2119
keepHostServers = true
2121
// first add the nameservers from the networks status
2122
nameservers = networkNameServers
2124
// pasta and slirp4netns have a built in DNS forwarder.
2125
nameservers = c.addSpecialDNS(nameservers)
2128
// Set DNS search domains
2129
search := networkSearchDomains
2131
if len(c.config.DNSSearch) > 0 || len(c.runtime.config.Containers.DNSSearches.Get()) > 0 {
2132
customSearch := make([]string, 0, len(c.config.DNSSearch)+len(c.runtime.config.Containers.DNSSearches.Get()))
2133
customSearch = append(customSearch, c.runtime.config.Containers.DNSSearches.Get()...)
2134
customSearch = append(customSearch, c.config.DNSSearch...)
2135
search = customSearch
2138
options := make([]string, 0, len(c.config.DNSOption)+len(c.runtime.config.Containers.DNSOptions.Get()))
2139
options = append(options, c.runtime.config.Containers.DNSOptions.Get()...)
2140
options = append(options, c.config.DNSOption...)
2142
var namespaces []spec.LinuxNamespace
2143
if c.config.Spec.Linux != nil {
2144
namespaces = c.config.Spec.Linux.Namespaces
2147
if err := resolvconf.New(&resolvconf.Params{
2149
KeepHostServers: keepHostServers,
2150
Nameservers: nameservers,
2151
Namespaces: namespaces,
2156
return fmt.Errorf("building resolv.conf for container %s: %w", c.ID(), err)
2162
// Check if a container uses IPv6.
2163
func (c *Container) checkForIPv6(netStatus map[string]types.StatusBlock) bool {
2164
for _, status := range netStatus {
2165
for _, netInt := range status.Interfaces {
2166
for _, netAddress := range netInt.Subnets {
2167
// Note: only using To16() does not work since it also returns a valid ip for ipv4
2168
if netAddress.IPNet.IP.To4() == nil && netAddress.IPNet.IP.To16() != nil {
2175
if c.pastaResult != nil {
2176
return c.pastaResult.IPv6
2179
return c.isSlirp4netnsIPv6()
2182
// Add a new nameserver to the container's resolv.conf, ensuring that it is the
2183
// first nameserver present.
2184
// Usable only with running containers.
2185
func (c *Container) addNameserver(ips []string) error {
2186
// Take no action if container is not running.
2187
if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
2191
// Do we have a resolv.conf at all?
2192
path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
2197
if err := resolvconf.Add(path, ips); err != nil {
2198
return fmt.Errorf("adding new nameserver to container %s resolv.conf: %w", c.ID(), err)
2204
// Remove an entry from the existing resolv.conf of the container.
2205
// Usable only with running containers.
2206
func (c *Container) removeNameserver(ips []string) error {
2207
// Take no action if container is not running.
2208
if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
2212
// Do we have a resolv.conf at all?
2213
path, ok := c.state.BindMounts[resolvconf.DefaultResolvConf]
2218
if err := resolvconf.Remove(path, ips); err != nil {
2219
return fmt.Errorf("removing nameservers from container %s resolv.conf: %w", c.ID(), err)
2225
func getLocalhostHostEntry(c *Container) etchosts.HostEntries {
2226
return etchosts.HostEntries{{IP: "127.0.0.1", Names: []string{c.Hostname(), c.config.Name}}}
2229
// getHostsEntries returns the container ip host entries for the correct netmode
2230
func (c *Container) getHostsEntries() (etchosts.HostEntries, error) {
2231
var entries etchosts.HostEntries
2232
names := []string{c.Hostname(), c.config.Name}
2234
case c.config.NetMode.IsBridge():
2235
entries = etchosts.GetNetworkHostEntries(c.state.NetworkStatus, names...)
2236
case c.config.NetMode.IsPasta():
2237
// this should never be the case but check just to be sure and not panic
2238
if len(c.pastaResult.IPAddresses) > 0 {
2239
entries = etchosts.HostEntries{{IP: c.pastaResult.IPAddresses[0].String(), Names: names}}
2241
case c.config.NetMode.IsSlirp4netns():
2242
ip, err := getSlirp4netnsIP(c.slirp4netnsSubnet)
2246
entries = etchosts.HostEntries{{IP: ip.String(), Names: names}}
2249
entries = etchosts.HostEntries{{IP: "127.0.0.1", Names: names}}
2255
func (c *Container) createHostsFile() error {
2256
targetFile := filepath.Join(c.state.RunDir, "hosts")
2257
f, err := os.Create(targetFile)
2262
return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
2265
func (c *Container) addHosts() error {
2266
targetFile, ok := c.state.BindMounts[config.DefaultHostsFile]
2268
// no host file nothing to do
2271
containerIPsEntries, err := c.getHostsEntries()
2273
return fmt.Errorf("failed to get container ip host entries: %w", err)
2276
// Consider container level BaseHostsFile configuration first.
2277
// If it is empty, fallback to containers.conf level configuration.
2278
baseHostsFileConf := c.config.BaseHostsFile
2279
if baseHostsFileConf == "" {
2280
baseHostsFileConf = c.runtime.config.Containers.BaseHostsFile
2282
baseHostFile, err := etchosts.GetBaseHostFile(baseHostsFileConf, c.state.Mountpoint)
2287
var exclude []net.IP
2288
if c.pastaResult != nil {
2289
exclude = c.pastaResult.IPAddresses
2292
return etchosts.New(&etchosts.Params{
2293
BaseFile: baseHostFile,
2294
ExtraHosts: c.config.HostAdd,
2295
ContainerIPs: containerIPsEntries,
2296
HostContainersInternalIP: etchosts.GetHostContainersInternalIPExcluding(
2297
c.runtime.config, c.state.NetworkStatus, c.runtime.network, exclude),
2298
TargetFile: targetFile,
2302
// bindMountRootFile will chown and relabel the source file to make it usable in the container.
2303
// It will also add the path to the container bind mount map.
2304
// source is the path on the host, dest is the path in the container.
2305
func (c *Container) bindMountRootFile(source, dest string) error {
2306
if err := idtools.SafeChown(source, c.RootUID(), c.RootGID()); err != nil {
2309
if err := c.relabel(source, c.MountLabel(), false); err != nil {
2313
return c.mountIntoRootDirs(dest, source)
2316
// generateGroupEntry generates an entry or entries into /etc/group as
2317
// required by container configuration.
2318
// Generally speaking, we will make an entry under two circumstances:
2319
// 1. The container is started as a specific user:group, and that group is both
2320
// numeric, and does not already exist in /etc/group.
2321
// 2. It is requested that Libpod add the group that launched Podman to
2322
// /etc/group via AddCurrentUserPasswdEntry (though this does not trigger if
2323
// the group in question already exists in /etc/passwd).
2325
// Returns group entry (as a string that can be appended to /etc/group) and any
2326
// error that occurred.
2327
func (c *Container) generateGroupEntry() (string, error) {
2330
// Things we *can't* handle: adding the user we added in
2331
// generatePasswdEntry to any *existing* groups.
2333
if c.config.AddCurrentUserPasswdEntry {
2334
entry, gid, err := c.generateCurrentUserGroupEntry()
2338
groupString += entry
2341
if c.config.User != "" || c.config.GroupEntry != "" {
2342
entry, err := c.generateUserGroupEntry(addedGID)
2346
groupString += entry
2349
return groupString, nil
2352
// Make an entry in /etc/group for the group of the user running podman iff we
2354
func (c *Container) generateCurrentUserGroupEntry() (string, int, error) {
2355
gid := rootless.GetRootlessGID()
2360
g, err := user.LookupGroupId(strconv.Itoa(gid))
2362
return "", 0, fmt.Errorf("failed to get current group: %w", err)
2365
// Look up group name to see if it exists in the image.
2366
_, err = lookup.GetGroup(c.state.Mountpoint, g.Name)
2367
if err != runcuser.ErrNoGroupEntries {
2371
// Look up GID to see if it exists in the image.
2372
_, err = lookup.GetGroup(c.state.Mountpoint, g.Gid)
2373
if err != runcuser.ErrNoGroupEntries {
2377
// We need to get the username of the rootless user so we can add it to
2380
uid := rootless.GetRootlessUID()
2382
u, err := user.LookupId(strconv.Itoa(uid))
2384
return "", 0, fmt.Errorf("failed to get current user to make group entry: %w", err)
2386
username = u.Username
2390
return fmt.Sprintf("%s:x:%s:%s\n", g.Name, g.Gid, username), gid, nil
2393
// Make an entry in /etc/group for the group the container was specified to run
2395
func (c *Container) generateUserGroupEntry(addedGID int) (string, error) {
2396
if c.config.User == "" && c.config.GroupEntry == "" {
2400
splitUser := strings.SplitN(c.config.User, ":", 2)
2401
group := splitUser[0]
2402
if len(splitUser) > 1 {
2403
group = splitUser[1]
2406
gid, err := strconv.ParseUint(group, 10, 32)
2408
return "", nil //nolint: nilerr
2411
if addedGID != 0 && addedGID == int(gid) {
2415
// Check if the group already exists
2416
g, err := lookup.GetGroup(c.state.Mountpoint, group)
2417
if err != runcuser.ErrNoGroupEntries {
2421
if c.config.GroupEntry != "" {
2422
return c.groupEntry(g.Name, strconv.Itoa(g.Gid), g.List), nil
2425
return fmt.Sprintf("%d:x:%d:%s\n", gid, gid, splitUser[0]), nil
2428
func (c *Container) groupEntry(groupname, gid string, list []string) string {
2429
s := c.config.GroupEntry
2430
s = strings.ReplaceAll(s, "$GROUPNAME", groupname)
2431
s = strings.ReplaceAll(s, "$GID", gid)
2432
s = strings.ReplaceAll(s, "$USERLIST", strings.Join(list, ","))
2436
// generatePasswdEntry generates an entry or entries into /etc/passwd as
2437
// required by container configuration.
2438
// Generally speaking, we will make an entry under two circumstances:
2439
// 1. The container is started as a specific user who is not in /etc/passwd.
2440
// This only triggers if the user is given as a *numeric* ID.
2441
// 2. It is requested that Libpod add the user that launched Podman to
2442
// /etc/passwd via AddCurrentUserPasswdEntry (though this does not trigger if
2443
// the user in question already exists in /etc/passwd) or the UID to be added
2445
// 3. The user specified additional host user accounts to add to the /etc/passwd file
2447
// Returns password entry (as a string that can be appended to /etc/passwd) and
2448
// any error that occurred.
2449
func (c *Container) generatePasswdEntry() (string, error) {
2453
for _, userid := range c.config.HostUsers {
2454
// Look up User on host
2455
u, err := util.LookupUser(userid)
2459
entry, err := c.userPasswdEntry(u)
2463
passwdString += entry
2465
if c.config.AddCurrentUserPasswdEntry {
2466
entry, uid, _, err := c.generateCurrentUserPasswdEntry()
2470
passwdString += entry
2473
if c.config.User != "" {
2474
entry, err := c.generateUserPasswdEntry(addedUID)
2478
passwdString += entry
2481
return passwdString, nil
2484
// generateCurrentUserPasswdEntry generates an /etc/passwd entry for the user
2485
// running the container engine.
2486
// Returns a passwd entry for the user, and the UID and GID of the added entry.
2487
func (c *Container) generateCurrentUserPasswdEntry() (string, int, int, error) {
2488
uid := rootless.GetRootlessUID()
2490
return "", 0, 0, nil
2493
u, err := user.LookupId(strconv.Itoa(uid))
2495
return "", 0, 0, fmt.Errorf("failed to get current user: %w", err)
2497
pwd, err := c.userPasswdEntry(u)
2499
return "", 0, 0, err
2502
return pwd, uid, rootless.GetRootlessGID(), nil
2505
// Sets the HOME env. variable with precedence: existing home env. variable, execUser home
2506
func (c *Container) setHomeEnvIfNeeded() error {
2507
getExecUserHome := func() (string, error) {
2508
overrides := c.getUserOverrides()
2509
execUser, err := lookup.GetUserGroupInfo(c.state.Mountpoint, c.config.User, overrides)
2511
if slices.Contains(c.config.HostUsers, c.config.User) {
2512
execUser, err = lookupHostUser(c.config.User)
2520
return execUser.Home, nil
2523
// Ensure HOME is not already set in Env
2524
for _, s := range c.config.Spec.Process.Env {
2525
if strings.HasPrefix(s, "HOME=") {
2530
home, err := getExecUserHome()
2535
c.config.Spec.Process.Env = append(c.config.Spec.Process.Env, fmt.Sprintf("HOME=%s", home))
2539
func (c *Container) userPasswdEntry(u *user.User) (string, error) {
2540
// Look up the user to see if it exists in the container image.
2541
_, err := lookup.GetUser(c.state.Mountpoint, u.Username)
2542
if err != runcuser.ErrNoPasswdEntries {
2546
// Look up the UID to see if it exists in the container image.
2547
_, err = lookup.GetUser(c.state.Mountpoint, u.Uid)
2548
if err != runcuser.ErrNoPasswdEntries {
2552
// If the user's actual home directory exists, or was mounted in - use
2554
homeDir := c.WorkingDir()
2557
if MountExists(c.config.Spec.Mounts, hDir) {
2561
hDir = filepath.Dir(hDir)
2563
if homeDir != u.HomeDir {
2564
for _, hDir := range c.UserVolumes() {
2565
if hDir == u.HomeDir {
2572
if c.config.PasswdEntry != "" {
2573
return c.passwdEntry(u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
2576
return fmt.Sprintf("%s:*:%s:%s:%s:%s:/bin/sh\n", u.Username, u.Uid, u.Gid, u.Name, homeDir), nil
2579
// generateUserPasswdEntry generates an /etc/passwd entry for the container user
2580
// to run in the container.
2581
// The UID and GID of the added entry will also be returned.
2582
// Accepts one argument, that being any UID that has already been added to the
2583
// passwd file by other functions; if it matches the UID we were given, we don't
2584
// need to do anything.
2585
func (c *Container) generateUserPasswdEntry(addedUID int) (string, error) {
2590
if c.config.User == "" {
2593
splitSpec := strings.SplitN(c.config.User, ":", 2)
2594
userspec := splitSpec[0]
2595
if len(splitSpec) > 1 {
2596
groupspec = splitSpec[1]
2598
// If a non numeric User, then don't generate passwd
2599
uid, err := strconv.ParseUint(userspec, 10, 32)
2601
return "", nil //nolint: nilerr
2604
if addedUID != 0 && int(uid) == addedUID {
2608
// Look up the user to see if it exists in the container image
2609
_, err = lookup.GetUser(c.state.Mountpoint, userspec)
2610
if err != runcuser.ErrNoPasswdEntries {
2614
if groupspec != "" {
2615
ugid, err := strconv.ParseUint(groupspec, 10, 32)
2619
group, err := lookup.GetGroup(c.state.Mountpoint, groupspec)
2621
return "", fmt.Errorf("unable to get gid %s from group file: %w", groupspec, err)
2627
if c.config.PasswdEntry != "" {
2628
entry := c.passwdEntry(strconv.FormatUint(uid, 10), strconv.FormatUint(uid, 10), strconv.FormatInt(int64(gid), 10), "container user", c.WorkingDir())
2632
u, err := user.LookupId(strconv.FormatUint(uid, 10))
2634
return fmt.Sprintf("%s:*:%d:%d:%s:%s:/bin/sh\n", u.Username, uid, gid, u.Name, c.WorkingDir()), nil
2636
return fmt.Sprintf("%d:*:%d:%d:container user:%s:/bin/sh\n", uid, uid, gid, c.WorkingDir()), nil
2639
func (c *Container) passwdEntry(username, uid, gid, name, homeDir string) string {
2640
s := c.config.PasswdEntry
2641
s = strings.ReplaceAll(s, "$USERNAME", username)
2642
s = strings.ReplaceAll(s, "$UID", uid)
2643
s = strings.ReplaceAll(s, "$GID", gid)
2644
s = strings.ReplaceAll(s, "$NAME", name)
2645
s = strings.ReplaceAll(s, "$HOME", homeDir)
2649
// generatePasswdAndGroup generates container-specific passwd and group files
2650
// iff g.config.User is a number or we are configured to make a passwd entry for
2651
// the current user or the user specified HostsUsers
2652
// Returns path to file to mount at /etc/passwd, path to file to mount at
2653
// /etc/group, and any error that occurred. If no passwd/group file were
2654
// required, the empty string will be returned for those path (this may occur
2655
// even if no error happened).
2656
// This may modify the mounted container's /etc/passwd and /etc/group instead of
2657
// making copies to bind-mount in, so we don't break useradd (it wants to make a
2658
// copy of /etc/passwd and rename the copy to /etc/passwd, which is impossible
2659
// with a bind mount). This is done in cases where the container is *not*
2660
// read-only. In this case, the function will return nothing ("", "", nil).
2661
func (c *Container) generatePasswdAndGroup() (string, string, error) {
2662
if !c.config.AddCurrentUserPasswdEntry && c.config.User == "" &&
2663
len(c.config.HostUsers) == 0 && c.config.GroupEntry == "" {
2670
// First, check if there's a mount at /etc/passwd or group, we don't
2671
// want to interfere with user mounts.
2672
if MountExists(c.config.Spec.Mounts, "/etc/passwd") {
2675
if MountExists(c.config.Spec.Mounts, "/etc/group") {
2679
// Next, check if we already made the files. If we didn't, don't need to
2680
// do anything more.
2682
passwdPath := filepath.Join(c.config.StaticDir, "passwd")
2683
if err := fileutils.Exists(passwdPath); err == nil {
2688
groupPath := filepath.Join(c.config.StaticDir, "group")
2689
if err := fileutils.Exists(groupPath); err == nil {
2694
// If we don't need a /etc/passwd or /etc/group at this point we can
2696
if !needPasswd && !needGroup {
2703
ro := c.IsReadOnly()
2706
passwdEntry, err := c.generatePasswdEntry()
2711
needsWrite := passwdEntry != ""
2713
case ro && needsWrite:
2714
logrus.Debugf("Making /etc/passwd for container %s", c.ID())
2715
originPasswdFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
2717
return "", "", fmt.Errorf("creating path to container %s /etc/passwd: %w", c.ID(), err)
2719
orig, err := os.ReadFile(originPasswdFile)
2720
if err != nil && !os.IsNotExist(err) {
2723
passwdFile, err := c.writeStringToStaticDir("passwd", string(orig)+passwdEntry)
2725
return "", "", fmt.Errorf("failed to create temporary passwd file: %w", err)
2727
if err := os.Chmod(passwdFile, 0644); err != nil {
2730
passwdPath = passwdFile
2731
case !ro && needsWrite:
2732
logrus.Debugf("Modifying container %s /etc/passwd", c.ID())
2733
containerPasswd, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/passwd")
2735
return "", "", fmt.Errorf("looking up location of container %s /etc/passwd: %w", c.ID(), err)
2738
f, err := os.OpenFile(containerPasswd, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
2740
return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
2744
if _, err := f.WriteString(passwdEntry); err != nil {
2745
return "", "", fmt.Errorf("unable to append to container %s /etc/passwd: %w", c.ID(), err)
2748
logrus.Debugf("Not modifying container %s /etc/passwd", c.ID())
2752
groupEntry, err := c.generateGroupEntry()
2757
needsWrite := groupEntry != ""
2759
case ro && needsWrite:
2760
logrus.Debugf("Making /etc/group for container %s", c.ID())
2761
originGroupFile, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
2763
return "", "", fmt.Errorf("creating path to container %s /etc/group: %w", c.ID(), err)
2765
orig, err := os.ReadFile(originGroupFile)
2766
if err != nil && !os.IsNotExist(err) {
2769
groupFile, err := c.writeStringToStaticDir("group", string(orig)+groupEntry)
2771
return "", "", fmt.Errorf("failed to create temporary group file: %w", err)
2773
if err := os.Chmod(groupFile, 0644); err != nil {
2776
groupPath = groupFile
2777
case !ro && needsWrite:
2778
logrus.Debugf("Modifying container %s /etc/group", c.ID())
2779
containerGroup, err := securejoin.SecureJoin(c.state.Mountpoint, "/etc/group")
2781
return "", "", fmt.Errorf("looking up location of container %s /etc/group: %w", c.ID(), err)
2784
f, err := os.OpenFile(containerGroup, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
2786
return "", "", fmt.Errorf("container %s: %w", c.ID(), err)
2790
if _, err := f.WriteString(groupEntry); err != nil {
2791
return "", "", fmt.Errorf("unable to append to container %s /etc/group: %w", c.ID(), err)
2794
logrus.Debugf("Not modifying container %s /etc/group", c.ID())
2798
return passwdPath, groupPath, nil
2801
func (c *Container) cleanupOverlayMounts() error {
2802
return overlay.CleanupContent(c.config.StaticDir)
2805
// Creates and mounts an empty dir to mount secrets into, if it does not already exist
2806
func (c *Container) createSecretMountDir(runPath string) error {
2807
src := filepath.Join(c.state.RunDir, "/run/secrets")
2808
err := fileutils.Exists(src)
2809
if os.IsNotExist(err) {
2810
if err := umask.MkdirAllIgnoreUmask(src, os.FileMode(0o755)); err != nil {
2813
if err := c.relabel(src, c.config.MountLabel, false); err != nil {
2816
if err := idtools.SafeChown(src, c.RootUID(), c.RootGID()); err != nil {
2819
c.state.BindMounts[filepath.Join(runPath, "secrets")] = src
2826
func hasIdmapOption(options []string) bool {
2827
for _, o := range options {
2828
if o == "idmap" || strings.HasPrefix(o, "idmap=") {
2835
// Fix ownership and permissions of the specified volume if necessary.
2836
func (c *Container) fixVolumePermissions(v *ContainerNamedVolume) error {
2837
vol, err := c.runtime.state.Volume(v.Name)
2839
return fmt.Errorf("retrieving named volume %s for container %s: %w", v.Name, c.ID(), err)
2843
defer vol.lock.Unlock()
2845
// The volume may need a copy-up. Check the state.
2846
if err := vol.update(); err != nil {
2850
// Volumes owned by a volume driver are not chowned - we don't want to
2851
// mess with a mount not managed by us.
2852
if vol.state.NeedsChown && (!vol.UsesVolumeDriver() && vol.config.Driver != "image") {
2853
vol.state.NeedsChown = false
2855
uid := int(c.config.Spec.Process.User.UID)
2856
gid := int(c.config.Spec.Process.User.GID)
2858
// if the volume is mounted with "idmap", leave the IDs in from the current environment.
2859
if c.config.IDMappings.UIDMap != nil && !hasIdmapOption(v.Options) {
2860
p := idtools.IDPair{
2864
mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap)
2865
newPair, err := mappings.ToHost(p)
2867
return fmt.Errorf("mapping user %d:%d: %w", uid, gid, err)
2873
vol.state.UIDChowned = uid
2874
vol.state.GIDChowned = gid
2876
if err := vol.save(); err != nil {
2880
mountPoint, err := vol.MountPoint()
2885
if err := idtools.SafeLchown(mountPoint, uid, gid); err != nil {
2889
// Make sure the new volume matches the permissions of the target directory unless 'U' is
2890
// provided (since the volume was already chowned in this case).
2891
// https://github.com/containers/podman/issues/10188
2892
if slices.Contains(v.Options, "U") {
2896
st, err := os.Lstat(filepath.Join(c.state.Mountpoint, v.Dest))
2898
if stat, ok := st.Sys().(*syscall.Stat_t); ok {
2899
uid, gid := int(stat.Uid), int(stat.Gid)
2901
if c.config.IDMappings.UIDMap != nil {
2902
p := idtools.IDPair{
2906
mappings := idtools.NewIDMappingsFromMaps(c.config.IDMappings.UIDMap, c.config.IDMappings.GIDMap)
2907
newUID, newGID, err := mappings.ToContainer(p)
2909
return fmt.Errorf("mapping user %d:%d: %w", uid, gid, err)
2911
uid, gid = newUID, newGID
2914
if err := idtools.SafeLchown(mountPoint, uid, gid); err != nil {
2918
if err := os.Chmod(mountPoint, st.Mode()); err != nil {
2921
if err := setVolumeAtime(mountPoint, st); err != nil {
2924
} else if !os.IsNotExist(err) {
2931
func (c *Container) relabel(src, mountLabel string, shared bool) error {
2932
if !selinux.GetEnabled() || mountLabel == "" {
2935
// only relabel on initial creation of container
2936
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
2937
label, err := label.FileLabel(src)
2941
// If labels are different, might be on a tmpfs
2942
if label == mountLabel {
2946
err := label.Relabel(src, mountLabel, shared)
2947
if errors.Is(err, unix.ENOTSUP) {
2948
logrus.Debugf("Labeling not supported on %q", src)
2954
func (c *Container) ChangeHostPathOwnership(src string, recurse bool, uid, gid int) error {
2955
// only chown on initial creation of container
2956
if !c.ensureState(define.ContainerStateConfigured, define.ContainerStateUnknown) {
2957
st, err := os.Stat(src)
2962
// If labels are different, might be on a tmpfs
2963
if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid {
2967
return chown.ChangeHostPathOwnership(src, recurse, uid, gid)
2970
func (c *Container) umask() (uint32, error) {
2971
decVal, err := strconv.ParseUint(c.config.Umask, 8, 32)
2973
return 0, fmt.Errorf("invalid Umask Value: %w", err)
2975
return uint32(decVal), nil
2978
func maybeClampOOMScoreAdj(oomScoreValue int) (int, error) {
2979
v, err := os.ReadFile("/proc/self/oom_score_adj")
2981
return oomScoreValue, err
2983
currentValue, err := strconv.Atoi(strings.TrimRight(string(v), "\n"))
2985
return oomScoreValue, err
2987
if currentValue > oomScoreValue {
2988
logrus.Warnf("Requested oom_score_adj=%d is lower than the current one, changing to %d", oomScoreValue, currentValue)
2989
return currentValue, nil
2991
return oomScoreValue, nil