podman
1985 строк · 62.0 Кб
1//go:build linux || freebsd
2// +build linux freebsd
3
4package buildah5
6import (7"bytes"8"encoding/json"9"errors"10"fmt"11"io"12"io/fs"13"net"14"os"15"os/exec"16"os/signal"17"path/filepath"18"runtime"19"strconv"20"strings"21"sync"22"sync/atomic"23"syscall"24"time"25
26"github.com/containers/buildah/bind"27"github.com/containers/buildah/copier"28"github.com/containers/buildah/define"29"github.com/containers/buildah/internal"30internalUtil "github.com/containers/buildah/internal/util"31"github.com/containers/buildah/internal/volumes"32"github.com/containers/buildah/pkg/overlay"33"github.com/containers/buildah/pkg/sshagent"34"github.com/containers/buildah/util"35"github.com/containers/common/libnetwork/etchosts"36"github.com/containers/common/libnetwork/network"37"github.com/containers/common/libnetwork/resolvconf"38netTypes "github.com/containers/common/libnetwork/types"39netUtil "github.com/containers/common/libnetwork/util"40"github.com/containers/common/pkg/config"41"github.com/containers/common/pkg/subscriptions"42imageTypes "github.com/containers/image/v5/types"43"github.com/containers/storage"44"github.com/containers/storage/pkg/fileutils"45"github.com/containers/storage/pkg/idtools"46"github.com/containers/storage/pkg/ioutils"47"github.com/containers/storage/pkg/lockfile"48"github.com/containers/storage/pkg/reexec"49"github.com/containers/storage/pkg/unshare"50storageTypes "github.com/containers/storage/types"51"github.com/opencontainers/go-digest"52"github.com/opencontainers/runtime-spec/specs-go"53"github.com/opencontainers/runtime-tools/generate"54"github.com/opencontainers/selinux/go-selinux/label"55"github.com/sirupsen/logrus"56"golang.org/x/sys/unix"57"golang.org/x/term"58)
59
60func (b *Builder) createResolvConf(rdir string, chownOpts *idtools.IDPair) (string, error) {61cfile := filepath.Join(rdir, "resolv.conf")62f, err := os.Create(cfile)63if err != nil {64return "", err65}66defer f.Close()67
68uid := 069gid := 070if chownOpts != nil {71uid = chownOpts.UID72gid = chownOpts.GID73}74if err = f.Chown(uid, gid); err != nil {75return "", err76}77
78if err := relabel(cfile, b.MountLabel, false); err != nil {79return "", err80}81return cfile, nil82}
83
84// addResolvConf copies files from host and sets them up to bind mount into container
85func (b *Builder) addResolvConfEntries(file string, networkNameServer []string,86namespaces []specs.LinuxNamespace, keepHostServers, ipv6 bool) error {87defaultConfig, err := config.Default()88if err != nil {89return fmt.Errorf("failed to get config: %w", err)90}91
92dnsServers, dnsSearch, dnsOptions := b.CommonBuildOpts.DNSServers, b.CommonBuildOpts.DNSSearch, b.CommonBuildOpts.DNSOptions93nameservers := make([]string, 0, len(defaultConfig.Containers.DNSServers.Get())+len(dnsServers))94nameservers = append(nameservers, defaultConfig.Containers.DNSServers.Get()...)95nameservers = append(nameservers, dnsServers...)96
97searches := make([]string, 0, len(defaultConfig.Containers.DNSSearches.Get())+len(dnsSearch))98searches = append(searches, defaultConfig.Containers.DNSSearches.Get()...)99searches = append(searches, dnsSearch...)100
101options := make([]string, 0, len(defaultConfig.Containers.DNSOptions.Get())+len(dnsOptions))102options = append(options, defaultConfig.Containers.DNSOptions.Get()...)103options = append(options, dnsOptions...)104
105if len(nameservers) == 0 {106nameservers = networkNameServer107}108
109if err := resolvconf.New(&resolvconf.Params{110Path: file,111Namespaces: namespaces,112IPv6Enabled: ipv6,113KeepHostServers: keepHostServers,114Nameservers: nameservers,115Searches: searches,116Options: options,117}); err != nil {118return fmt.Errorf("building resolv.conf for container %s: %w", b.ContainerID, err)119}120
121return nil122}
123
124// createHostsFile creates a containers hosts file
125func (b *Builder) createHostsFile(rdir string, chownOpts *idtools.IDPair) (string, error) {126targetfile := filepath.Join(rdir, "hosts")127f, err := os.Create(targetfile)128if err != nil {129return "", err130}131defer f.Close()132uid := 0133gid := 0134if chownOpts != nil {135uid = chownOpts.UID136gid = chownOpts.GID137}138if err := f.Chown(uid, gid); err != nil {139return "", err140}141if err := relabel(targetfile, b.MountLabel, false); err != nil {142return "", err143}144
145return targetfile, nil146}
147
148func (b *Builder) addHostsEntries(file, imageRoot string, entries etchosts.HostEntries, exculde []net.IP) error {149conf, err := config.Default()150if err != nil {151return err152}153
154base, err := etchosts.GetBaseHostFile(conf.Containers.BaseHostsFile, imageRoot)155if err != nil {156return err157}158return etchosts.New(&etchosts.Params{159BaseFile: base,160ExtraHosts: b.CommonBuildOpts.AddHost,161HostContainersInternalIP: etchosts.GetHostContainersInternalIPExcluding(conf, nil, nil, exculde),162TargetFile: file,163ContainerIPs: entries,164})165}
166
167// generateHostname creates a containers /etc/hostname file
168func (b *Builder) generateHostname(rdir, hostname string, chownOpts *idtools.IDPair) (string, error) {169var err error170hostnamePath := "/etc/hostname"171
172var hostnameBuffer bytes.Buffer173hostnameBuffer.Write([]byte(fmt.Sprintf("%s\n", hostname)))174
175cfile := filepath.Join(rdir, filepath.Base(hostnamePath))176if err = ioutils.AtomicWriteFile(cfile, hostnameBuffer.Bytes(), 0644); err != nil {177return "", fmt.Errorf("writing /etc/hostname into the container: %w", err)178}179
180uid := 0181gid := 0182if chownOpts != nil {183uid = chownOpts.UID184gid = chownOpts.GID185}186if err = os.Chown(cfile, uid, gid); err != nil {187return "", err188}189if err := relabel(cfile, b.MountLabel, false); err != nil {190return "", err191}192
193return cfile, nil194}
195
196func setupTerminal(g *generate.Generator, terminalPolicy TerminalPolicy, terminalSize *specs.Box) {197switch terminalPolicy {198case DefaultTerminal:199onTerminal := term.IsTerminal(unix.Stdin) && term.IsTerminal(unix.Stdout) && term.IsTerminal(unix.Stderr)200if onTerminal {201logrus.Debugf("stdio is a terminal, defaulting to using a terminal")202} else {203logrus.Debugf("stdio is not a terminal, defaulting to not using a terminal")204}205g.SetProcessTerminal(onTerminal)206case WithTerminal:207g.SetProcessTerminal(true)208case WithoutTerminal:209g.SetProcessTerminal(false)210}211if terminalSize != nil {212g.SetProcessConsoleSize(terminalSize.Width, terminalSize.Height)213}214}
215
216// Search for a command that isn't given as an absolute path using the $PATH
217// under the rootfs. We can't resolve absolute symbolic links without
218// chroot()ing, which we may not be able to do, so just accept a link as a
219// valid resolution.
220func runLookupPath(g *generate.Generator, command []string) []string {221// Look for the configured $PATH.222spec := g.Config223envPath := ""224for i := range spec.Process.Env {225if strings.HasPrefix(spec.Process.Env[i], "PATH=") {226envPath = spec.Process.Env[i]227}228}229// If there is no configured $PATH, supply one.230if envPath == "" {231defaultPath := "/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin"232envPath = "PATH=" + defaultPath233g.AddProcessEnv("PATH", defaultPath)234}235// No command, nothing to do.236if len(command) == 0 {237return command238}239// Command is already an absolute path, use it as-is.240if filepath.IsAbs(command[0]) {241return command242}243// For each element in the PATH,244for _, pathEntry := range filepath.SplitList(envPath[5:]) {245// if it's the empty string, it's ".", which is the Cwd,246if pathEntry == "" {247pathEntry = spec.Process.Cwd248}249// build the absolute path which it might be,250candidate := filepath.Join(pathEntry, command[0])251// check if it's there,252if fi, err := os.Lstat(filepath.Join(spec.Root.Path, candidate)); fi != nil && err == nil {253// and if it's not a directory, and either a symlink or executable,254if !fi.IsDir() && ((fi.Mode()&os.ModeSymlink != 0) || (fi.Mode()&0111 != 0)) {255// use that.256return append([]string{candidate}, command[1:]...)257}258}259}260return command261}
262
263func (b *Builder) configureUIDGID(g *generate.Generator, mountPoint string, options RunOptions) (string, error) {264// Set the user UID/GID/supplemental group list/capabilities lists.265user, homeDir, err := b.userForRun(mountPoint, options.User)266if err != nil {267return "", err268}269if err := setupCapabilities(g, b.Capabilities, options.AddCapabilities, options.DropCapabilities); err != nil {270return "", err271}272g.SetProcessUID(user.UID)273g.SetProcessGID(user.GID)274g.AddProcessAdditionalGid(user.GID)275for _, gid := range user.AdditionalGids {276g.AddProcessAdditionalGid(gid)277}278for _, group := range b.GroupAdd {279if group == "keep-groups" {280if len(b.GroupAdd) > 1 {281return "", errors.New("the '--group-add keep-groups' option is not allowed with any other --group-add options")282}283g.AddAnnotation("run.oci.keep_original_groups", "1")284continue285}286gid, err := strconv.ParseUint(group, 10, 32)287if err != nil {288return "", err289}290g.AddProcessAdditionalGid(uint32(gid))291}292
293// Remove capabilities if not running as root except Bounding set294if user.UID != 0 && g.Config.Process.Capabilities != nil {295bounding := g.Config.Process.Capabilities.Bounding296g.ClearProcessCapabilities()297g.Config.Process.Capabilities.Bounding = bounding298}299
300return homeDir, nil301}
302
303func (b *Builder) configureEnvironment(g *generate.Generator, options RunOptions, defaultEnv []string) {304g.ClearProcessEnv()305
306if b.CommonBuildOpts.HTTPProxy {307for _, envSpec := range config.ProxyEnv {308if envVal, ok := os.LookupEnv(envSpec); ok {309g.AddProcessEnv(envSpec, envVal)310}311}312}313
314for _, envSpec := range util.MergeEnv(util.MergeEnv(defaultEnv, b.Env()), options.Env) {315env := strings.SplitN(envSpec, "=", 2)316if len(env) > 1 {317g.AddProcessEnv(env[0], env[1])318}319}320}
321
322// getNetworkInterface creates the network interface
323func getNetworkInterface(store storage.Store, cniConfDir, cniPluginPath string) (netTypes.ContainerNetwork, error) {324conf, err := config.Default()325if err != nil {326return nil, err327}328// copy the config to not modify the default by accident329newconf := *conf330if len(cniConfDir) > 0 {331newconf.Network.NetworkConfigDir = cniConfDir332}333if len(cniPluginPath) > 0 {334plugins := strings.Split(cniPluginPath, string(os.PathListSeparator))335newconf.Network.CNIPluginDirs.Set(plugins)336}337
338_, netInt, err := network.NetworkBackend(store, &newconf, false)339if err != nil {340return nil, err341}342return netInt, nil343}
344
345func netStatusToNetResult(netStatus map[string]netTypes.StatusBlock, hostnames []string) *netResult {346result := &netResult{347keepHostResolvers: false,348}349for _, status := range netStatus {350for _, dns := range status.DNSServerIPs {351result.dnsServers = append(result.dnsServers, dns.String())352}353for _, netInt := range status.Interfaces {354for _, netAddress := range netInt.Subnets {355e := etchosts.HostEntry{IP: netAddress.IPNet.IP.String(), Names: hostnames}356result.entries = append(result.entries, e)357if !result.ipv6 && netUtil.IsIPv6(netAddress.IPNet.IP) {358result.ipv6 = true359}360}361}362}363return result364}
365
366// DefaultNamespaceOptions returns the default namespace settings from the
367// runtime-tools generator library.
368func DefaultNamespaceOptions() (define.NamespaceOptions, error) {369cfg, err := config.Default()370if err != nil {371return nil, fmt.Errorf("failed to get container config: %w", err)372}373options := define.NamespaceOptions{374{Name: string(specs.CgroupNamespace), Host: cfg.CgroupNS() == "host"},375{Name: string(specs.IPCNamespace), Host: cfg.IPCNS() == "host"},376{Name: string(specs.MountNamespace), Host: false},377{Name: string(specs.NetworkNamespace), Host: cfg.NetNS() == "host"},378{Name: string(specs.PIDNamespace), Host: cfg.PidNS() == "host"},379{Name: string(specs.UserNamespace), Host: cfg.Containers.UserNS == "" || cfg.Containers.UserNS == "host"},380{Name: string(specs.UTSNamespace), Host: cfg.UTSNS() == "host"},381}382return options, nil383}
384
385func checkAndOverrideIsolationOptions(isolation define.Isolation, options *RunOptions) error {386switch isolation {387case IsolationOCIRootless:388// only change the netns if the caller did not set it389if ns := options.NamespaceOptions.Find(string(specs.NetworkNamespace)); ns == nil {390if _, err := exec.LookPath("slirp4netns"); err != nil {391// if slirp4netns is not installed we have to use the hosts net namespace392options.NamespaceOptions.AddOrReplace(define.NamespaceOption{Name: string(specs.NetworkNamespace), Host: true})393}394}395fallthrough396case IsolationOCI:397pidns := options.NamespaceOptions.Find(string(specs.PIDNamespace))398userns := options.NamespaceOptions.Find(string(specs.UserNamespace))399if (pidns != nil && pidns.Host) && (userns != nil && !userns.Host) {400return fmt.Errorf("not allowed to mix host PID namespace with container user namespace")401}402case IsolationChroot:403logrus.Info("network namespace isolation not supported with chroot isolation, forcing host network")404options.NamespaceOptions.AddOrReplace(define.NamespaceOption{Name: string(specs.NetworkNamespace), Host: true})405}406return nil407}
408
409// fileCloser is a helper struct to prevent closing the file twice in the code
410// users must call (fileCloser).Close() and not fileCloser.File.Close()
411type fileCloser struct {412file *os.File413closed bool414}
415
416func (f *fileCloser) Close() {417if !f.closed {418if err := f.file.Close(); err != nil {419logrus.Errorf("failed to close file: %v", err)420}421f.closed = true422}423}
424
425// waitForSync waits for a maximum of 4 minutes to read something from the file
426func waitForSync(pipeR *os.File) error {427if err := pipeR.SetDeadline(time.Now().Add(4 * time.Minute)); err != nil {428return err429}430b := make([]byte, 16)431_, err := pipeR.Read(b)432return err433}
434
435func runUsingRuntime(options RunOptions, configureNetwork bool, moreCreateArgs []string, spec *specs.Spec, bundlePath, containerName string,436containerCreateW io.WriteCloser, containerStartR io.ReadCloser) (wstatus unix.WaitStatus, err error) {437if options.Logger == nil {438options.Logger = logrus.StandardLogger()439}440
441// Lock the caller to a single OS-level thread.442runtime.LockOSThread()443
444// Set up bind mounts for things that a namespaced user might not be able to get to directly.445unmountAll, err := bind.SetupIntermediateMountNamespace(spec, bundlePath)446if unmountAll != nil {447defer func() {448if err := unmountAll(); err != nil {449options.Logger.Error(err)450}451}()452}453if err != nil {454return 1, err455}456
457// Write the runtime configuration.458specbytes, err := json.Marshal(spec)459if err != nil {460return 1, fmt.Errorf("encoding configuration %#v as json: %w", spec, err)461}462if err = ioutils.AtomicWriteFile(filepath.Join(bundlePath, "config.json"), specbytes, 0600); err != nil {463return 1, fmt.Errorf("storing runtime configuration: %w", err)464}465
466logrus.Debugf("config = %v", string(specbytes))467
468// Decide which runtime to use.469runtime := options.Runtime470if runtime == "" {471runtime = util.Runtime()472}473localRuntime := util.FindLocalRuntime(runtime)474if localRuntime != "" {475runtime = localRuntime476}477
478// Default to just passing down our stdio.479getCreateStdio := func() (io.ReadCloser, io.WriteCloser, io.WriteCloser) {480return os.Stdin, os.Stdout, os.Stderr481}482
483// Figure out how we're doing stdio handling, and create pipes and sockets.484var stdio sync.WaitGroup485var consoleListener *net.UnixListener486var errorFds, closeBeforeReadingErrorFds []int487stdioPipe := make([][]int, 3)488copyConsole := false489copyPipes := false490finishCopy := make([]int, 2)491if err = unix.Pipe(finishCopy); err != nil {492return 1, fmt.Errorf("creating pipe for notifying to stop stdio: %w", err)493}494finishedCopy := make(chan struct{}, 1)495var pargs []string496if spec.Process != nil {497pargs = spec.Process.Args498if spec.Process.Terminal {499copyConsole = true500// Create a listening socket for accepting the container's terminal's PTY master.501socketPath := filepath.Join(bundlePath, "console.sock")502consoleListener, err = net.ListenUnix("unix", &net.UnixAddr{Name: socketPath, Net: "unix"})503if err != nil {504return 1, fmt.Errorf("creating socket %q to receive terminal descriptor: %w", consoleListener.Addr(), err)505}506// Add console socket arguments.507moreCreateArgs = append(moreCreateArgs, "--console-socket", socketPath)508} else {509copyPipes = true510// Figure out who should own the pipes.511uid, gid, err := util.GetHostRootIDs(spec)512if err != nil {513return 1, err514}515// Create stdio pipes.516if stdioPipe, err = runMakeStdioPipe(int(uid), int(gid)); err != nil {517return 1, err518}519if spec.Linux != nil {520if err = runLabelStdioPipes(stdioPipe, spec.Process.SelinuxLabel, spec.Linux.MountLabel); err != nil {521return 1, err522}523}524errorFds = []int{stdioPipe[unix.Stdout][0], stdioPipe[unix.Stderr][0]}525closeBeforeReadingErrorFds = []int{stdioPipe[unix.Stdout][1], stdioPipe[unix.Stderr][1]}526// Set stdio to our pipes.527getCreateStdio = func() (io.ReadCloser, io.WriteCloser, io.WriteCloser) {528stdin := os.NewFile(uintptr(stdioPipe[unix.Stdin][0]), "/dev/stdin")529stdout := os.NewFile(uintptr(stdioPipe[unix.Stdout][1]), "/dev/stdout")530stderr := os.NewFile(uintptr(stdioPipe[unix.Stderr][1]), "/dev/stderr")531return stdin, stdout, stderr532}533}534} else {535if options.Quiet {536// Discard stdout.537getCreateStdio = func() (io.ReadCloser, io.WriteCloser, io.WriteCloser) {538return os.Stdin, nil, os.Stderr539}540}541}542
543runtimeArgs := options.Args[:]544if options.CgroupManager == config.SystemdCgroupsManager {545runtimeArgs = append(runtimeArgs, "--systemd-cgroup")546}547
548// Build the commands that we'll execute.549pidFile := filepath.Join(bundlePath, "pid")550args := append(append(append(runtimeArgs, "create", "--bundle", bundlePath, "--pid-file", pidFile), moreCreateArgs...), containerName)551create := exec.Command(runtime, args...)552setPdeathsig(create)553create.Dir = bundlePath554stdin, stdout, stderr := getCreateStdio()555create.Stdin, create.Stdout, create.Stderr = stdin, stdout, stderr556
557args = append(options.Args, "start", containerName)558start := exec.Command(runtime, args...)559setPdeathsig(start)560start.Dir = bundlePath561start.Stderr = os.Stderr562
563kill := func(signal string) *exec.Cmd {564args := append(options.Args, "kill", containerName)565if signal != "" {566args = append(args, signal)567}568kill := exec.Command(runtime, args...)569kill.Dir = bundlePath570kill.Stderr = os.Stderr571return kill572}573
574args = append(options.Args, "delete", containerName)575del := exec.Command(runtime, args...)576del.Dir = bundlePath577del.Stderr = os.Stderr578
579// Actually create the container.580logrus.Debugf("Running %q", create.Args)581err = create.Run()582if err != nil {583return 1, fmt.Errorf("from %s creating container for %v: %s: %w", runtime, pargs, runCollectOutput(options.Logger, errorFds, closeBeforeReadingErrorFds), err)584}585defer func() {586err2 := del.Run()587if err2 != nil {588if err == nil {589err = fmt.Errorf("deleting container: %w", err2)590} else {591options.Logger.Infof("error from %s deleting container: %v", runtime, err2)592}593}594}()595
596// Make sure we read the container's exit status when it exits.597pidValue, err := os.ReadFile(pidFile)598if err != nil {599return 1, err600}601pid, err := strconv.Atoi(strings.TrimSpace(string(pidValue)))602if err != nil {603return 1, fmt.Errorf("parsing pid %s as a number: %w", string(pidValue), err)604}605var stopped uint32606var reaping sync.WaitGroup607reaping.Add(1)608go func() {609defer reaping.Done()610var err error611_, err = unix.Wait4(pid, &wstatus, 0, nil)612if err != nil {613wstatus = 0614options.Logger.Errorf("error waiting for container child process %d: %v\n", pid, err)615}616atomic.StoreUint32(&stopped, 1)617}()618
619if configureNetwork {620if _, err := containerCreateW.Write([]byte{1}); err != nil {621return 1, err622}623containerCreateW.Close()624logrus.Debug("waiting for parent start message")625b := make([]byte, 1)626if _, err := containerStartR.Read(b); err != nil {627return 1, fmt.Errorf("did not get container start message from parent: %w", err)628}629containerStartR.Close()630}631
632if copyPipes {633// We don't need the ends of the pipes that belong to the container.634stdin.Close()635if stdout != nil {636stdout.Close()637}638stderr.Close()639}640
641// Handle stdio for the container in the background.642stdio.Add(1)643go runCopyStdio(options.Logger, &stdio, copyPipes, stdioPipe, copyConsole, consoleListener, finishCopy, finishedCopy, spec)644
645// Start the container.646logrus.Debugf("Running %q", start.Args)647err = start.Run()648if err != nil {649return 1, fmt.Errorf("from %s starting container: %w", runtime, err)650}651defer func() {652if atomic.LoadUint32(&stopped) == 0 {653if err := kill("").Run(); err != nil {654options.Logger.Infof("error from %s stopping container: %v", runtime, err)655}656atomic.StoreUint32(&stopped, 1)657}658}()659
660// Wait for the container to exit.661interrupted := make(chan os.Signal, 100)662go func() {663for range interrupted {664if err := kill("SIGKILL").Run(); err != nil {665logrus.Errorf("%v sending SIGKILL", err)666}667}668}()669signal.Notify(interrupted, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)670for {671now := time.Now()672var state specs.State673args = append(options.Args, "state", containerName)674stat := exec.Command(runtime, args...)675stat.Dir = bundlePath676stat.Stderr = os.Stderr677stateOutput, err := stat.Output()678if err != nil {679if atomic.LoadUint32(&stopped) != 0 {680// container exited681break682}683return 1, fmt.Errorf("reading container state from %s (got output: %q): %w", runtime, string(stateOutput), err)684}685if err = json.Unmarshal(stateOutput, &state); err != nil {686return 1, fmt.Errorf("parsing container state %q from %s: %w", string(stateOutput), runtime, err)687}688switch state.Status {689case "running":690case "stopped":691atomic.StoreUint32(&stopped, 1)692default:693return 1, fmt.Errorf("container status unexpectedly changed to %q", state.Status)694}695if atomic.LoadUint32(&stopped) != 0 {696break697}698select {699case <-finishedCopy:700atomic.StoreUint32(&stopped, 1)701case <-time.After(time.Until(now.Add(100 * time.Millisecond))):702continue703}704if atomic.LoadUint32(&stopped) != 0 {705break706}707}708signal.Stop(interrupted)709close(interrupted)710
711// Close the writing end of the stop-handling-stdio notification pipe.712unix.Close(finishCopy[1])713// Wait for the stdio copy goroutine to flush.714stdio.Wait()715// Wait until we finish reading the exit status.716reaping.Wait()717
718return wstatus, nil719}
720
721func runCollectOutput(logger *logrus.Logger, fds, closeBeforeReadingFds []int) string { //nolint:interfacer722for _, fd := range closeBeforeReadingFds {723unix.Close(fd)724}725var b bytes.Buffer726buf := make([]byte, 8192)727for _, fd := range fds {728nread, err := unix.Read(fd, buf)729if err != nil {730if errno, isErrno := err.(syscall.Errno); isErrno {731switch errno {732default:733logger.Errorf("error reading from pipe %d: %v", fd, err)734case syscall.EINTR, syscall.EAGAIN:735}736} else {737logger.Errorf("unable to wait for data from pipe %d: %v", fd, err)738}739continue740}741for nread > 0 {742r := buf[:nread]743if nwritten, err := b.Write(r); err != nil || nwritten != len(r) {744if nwritten != len(r) {745logger.Errorf("error buffering data from pipe %d: %v", fd, err)746break747}748}749nread, err = unix.Read(fd, buf)750if err != nil {751if errno, isErrno := err.(syscall.Errno); isErrno {752switch errno {753default:754logger.Errorf("error reading from pipe %d: %v", fd, err)755case syscall.EINTR, syscall.EAGAIN:756}757} else {758logger.Errorf("unable to wait for data from pipe %d: %v", fd, err)759}760break761}762}763}764return b.String()765}
766
767func setNonblock(logger *logrus.Logger, fd int, description string, nonblocking bool) (bool, error) { //nolint:interfacer768mask, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0)769if err != nil {770return false, err771}772blocked := mask&unix.O_NONBLOCK == 0773
774if err := unix.SetNonblock(fd, nonblocking); err != nil {775if nonblocking {776logger.Errorf("error setting %s to nonblocking: %v", description, err)777} else {778logger.Errorf("error setting descriptor %s blocking: %v", description, err)779}780}781return blocked, err782}
783
784func runCopyStdio(logger *logrus.Logger, stdio *sync.WaitGroup, copyPipes bool, stdioPipe [][]int, copyConsole bool, consoleListener *net.UnixListener, finishCopy []int, finishedCopy chan struct{}, spec *specs.Spec) {785defer func() {786unix.Close(finishCopy[0])787if copyPipes {788unix.Close(stdioPipe[unix.Stdin][1])789unix.Close(stdioPipe[unix.Stdout][0])790unix.Close(stdioPipe[unix.Stderr][0])791}792stdio.Done()793finishedCopy <- struct{}{}794close(finishedCopy)795}()796// Map describing where data on an incoming descriptor should go.797relayMap := make(map[int]int)798// Map describing incoming and outgoing descriptors.799readDesc := make(map[int]string)800writeDesc := make(map[int]string)801// Buffers.802relayBuffer := make(map[int]*bytes.Buffer)803// Set up the terminal descriptor or pipes for polling.804if copyConsole {805// Accept a connection over our listening socket.806fd, err := runAcceptTerminal(logger, consoleListener, spec.Process.ConsoleSize)807if err != nil {808logger.Errorf("%v", err)809return810}811terminalFD := fd812// Input from our stdin, output from the terminal descriptor.813relayMap[unix.Stdin] = terminalFD814readDesc[unix.Stdin] = "stdin"815relayBuffer[terminalFD] = new(bytes.Buffer)816writeDesc[terminalFD] = "container terminal input"817relayMap[terminalFD] = unix.Stdout818readDesc[terminalFD] = "container terminal output"819relayBuffer[unix.Stdout] = new(bytes.Buffer)820writeDesc[unix.Stdout] = "output"821// Set our terminal's mode to raw, to pass handling of special822// terminal input to the terminal in the container.823if term.IsTerminal(unix.Stdin) {824if state, err := term.MakeRaw(unix.Stdin); err != nil {825logger.Warnf("error setting terminal state: %v", err)826} else {827defer func() {828if err = term.Restore(unix.Stdin, state); err != nil {829logger.Errorf("unable to restore terminal state: %v", err)830}831}()832}833}834}835if copyPipes {836// Input from our stdin, output from the stdout and stderr pipes.837relayMap[unix.Stdin] = stdioPipe[unix.Stdin][1]838readDesc[unix.Stdin] = "stdin"839relayBuffer[stdioPipe[unix.Stdin][1]] = new(bytes.Buffer)840writeDesc[stdioPipe[unix.Stdin][1]] = "container stdin"841relayMap[stdioPipe[unix.Stdout][0]] = unix.Stdout842readDesc[stdioPipe[unix.Stdout][0]] = "container stdout"843relayBuffer[unix.Stdout] = new(bytes.Buffer)844writeDesc[unix.Stdout] = "stdout"845relayMap[stdioPipe[unix.Stderr][0]] = unix.Stderr846readDesc[stdioPipe[unix.Stderr][0]] = "container stderr"847relayBuffer[unix.Stderr] = new(bytes.Buffer)848writeDesc[unix.Stderr] = "stderr"849}850// Set our reading descriptors to non-blocking.851for rfd, wfd := range relayMap {852blocked, err := setNonblock(logger, rfd, readDesc[rfd], true)853if err != nil {854return855}856if blocked {857defer setNonblock(logger, rfd, readDesc[rfd], false) // nolint:errcheck858}859setNonblock(logger, wfd, writeDesc[wfd], false) // nolint:errcheck860}861
862if copyPipes {863setNonblock(logger, stdioPipe[unix.Stdin][1], writeDesc[stdioPipe[unix.Stdin][1]], true) // nolint:errcheck864}865
866runCopyStdioPassData(copyPipes, stdioPipe, finishCopy, relayMap, relayBuffer, readDesc, writeDesc)867}
868
869func canRetry(err error) bool {870if errno, isErrno := err.(syscall.Errno); isErrno {871return errno == syscall.EINTR || errno == syscall.EAGAIN872}873return false874}
875
876func runCopyStdioPassData(copyPipes bool, stdioPipe [][]int, finishCopy []int, relayMap map[int]int, relayBuffer map[int]*bytes.Buffer, readDesc map[int]string, writeDesc map[int]string) {877closeStdin := false878
879// Pass data back and forth.880pollTimeout := -1881for len(relayMap) > 0 {882// Start building the list of descriptors to poll.883pollFds := make([]unix.PollFd, 0, len(relayMap)+1)884// Poll for a notification that we should stop handling stdio.885pollFds = append(pollFds, unix.PollFd{Fd: int32(finishCopy[0]), Events: unix.POLLIN | unix.POLLHUP})886// Poll on our reading descriptors.887for rfd := range relayMap {888pollFds = append(pollFds, unix.PollFd{Fd: int32(rfd), Events: unix.POLLIN | unix.POLLHUP})889}890buf := make([]byte, 8192)891// Wait for new data from any input descriptor, or a notification that we're done.892_, err := unix.Poll(pollFds, pollTimeout)893if !util.LogIfNotRetryable(err, fmt.Sprintf("error waiting for stdio/terminal data to relay: %v", err)) {894return895}896removes := make(map[int]struct{})897for _, pollFd := range pollFds {898// If this descriptor's just been closed from the other end, mark it for899// removal from the set that we're checking for.900if pollFd.Revents&unix.POLLHUP == unix.POLLHUP {901removes[int(pollFd.Fd)] = struct{}{}902}903// If the descriptor was closed elsewhere, remove it from our list.904if pollFd.Revents&unix.POLLNVAL != 0 {905logrus.Debugf("error polling descriptor %s: closed?", readDesc[int(pollFd.Fd)])906removes[int(pollFd.Fd)] = struct{}{}907}908// If the POLLIN flag isn't set, then there's no data to be read from this descriptor.909if pollFd.Revents&unix.POLLIN == 0 {910continue911}912// Read whatever there is to be read.913readFD := int(pollFd.Fd)914writeFD, needToRelay := relayMap[readFD]915if needToRelay {916n, err := unix.Read(readFD, buf)917if !util.LogIfNotRetryable(err, fmt.Sprintf("unable to read %s data: %v", readDesc[readFD], err)) {918return919}920// If it's zero-length on our stdin and we're921// using pipes, it's an EOF, so close the stdin922// pipe's writing end.923if n == 0 && !canRetry(err) && int(pollFd.Fd) == unix.Stdin {924removes[int(pollFd.Fd)] = struct{}{}925} else if n > 0 {926// Buffer the data in case we get blocked on where they need to go.927nwritten, err := relayBuffer[writeFD].Write(buf[:n])928if err != nil {929logrus.Debugf("buffer: %v", err)930continue931}932if nwritten != n {933logrus.Debugf("buffer: expected to buffer %d bytes, wrote %d", n, nwritten)934continue935}936// If this is the last of the data we'll be able to read from this937// descriptor, read all that there is to read.938for pollFd.Revents&unix.POLLHUP == unix.POLLHUP {939nr, err := unix.Read(readFD, buf)940util.LogIfUnexpectedWhileDraining(err, fmt.Sprintf("read %s: %v", readDesc[readFD], err))941if nr <= 0 {942break943}944nwritten, err := relayBuffer[writeFD].Write(buf[:nr])945if err != nil {946logrus.Debugf("buffer: %v", err)947break948}949if nwritten != nr {950logrus.Debugf("buffer: expected to buffer %d bytes, wrote %d", nr, nwritten)951break952}953}954}955}956}957// Try to drain the output buffers. Set the default timeout958// for the next poll() to 100ms if we still have data to write.959pollTimeout = -1960for writeFD := range relayBuffer {961if relayBuffer[writeFD].Len() > 0 {962n, err := unix.Write(writeFD, relayBuffer[writeFD].Bytes())963if !util.LogIfNotRetryable(err, fmt.Sprintf("unable to write %s data: %v", writeDesc[writeFD], err)) {964return965}966if n > 0 {967relayBuffer[writeFD].Next(n)968}969if closeStdin && writeFD == stdioPipe[unix.Stdin][1] && stdioPipe[unix.Stdin][1] >= 0 && relayBuffer[stdioPipe[unix.Stdin][1]].Len() == 0 {970logrus.Debugf("closing stdin")971unix.Close(stdioPipe[unix.Stdin][1])972stdioPipe[unix.Stdin][1] = -1973}974}975if relayBuffer[writeFD].Len() > 0 {976pollTimeout = 100977}978}979// Remove any descriptors which we don't need to poll any more from the poll descriptor list.980for remove := range removes {981if copyPipes && remove == unix.Stdin {982closeStdin = true983if relayBuffer[stdioPipe[unix.Stdin][1]].Len() == 0 {984logrus.Debugf("closing stdin")985unix.Close(stdioPipe[unix.Stdin][1])986stdioPipe[unix.Stdin][1] = -1987}988}989delete(relayMap, remove)990}991// If the we-can-return pipe had anything for us, we're done.992for _, pollFd := range pollFds {993if int(pollFd.Fd) == finishCopy[0] && pollFd.Revents != 0 {994// The pipe is closed, indicating that we can stop now.995return996}997}998}999}
1000
1001func runAcceptTerminal(logger *logrus.Logger, consoleListener *net.UnixListener, terminalSize *specs.Box) (int, error) {1002defer consoleListener.Close()1003c, err := consoleListener.AcceptUnix()1004if err != nil {1005return -1, fmt.Errorf("accepting socket descriptor connection: %w", err)1006}1007defer c.Close()1008// Expect a control message over our new connection.1009b := make([]byte, 8192)1010oob := make([]byte, 8192)1011n, oobn, _, _, err := c.ReadMsgUnix(b, oob)1012if err != nil {1013return -1, fmt.Errorf("reading socket descriptor: %w", err)1014}1015if n > 0 {1016logrus.Debugf("socket descriptor is for %q", string(b[:n]))1017}1018if oobn > len(oob) {1019return -1, fmt.Errorf("too much out-of-bounds data (%d bytes)", oobn)1020}1021// Parse the control message.1022scm, err := unix.ParseSocketControlMessage(oob[:oobn])1023if err != nil {1024return -1, fmt.Errorf("parsing out-of-bound data as a socket control message: %w", err)1025}1026logrus.Debugf("control messages: %v", scm)1027// Expect to get a descriptor.1028terminalFD := -11029for i := range scm {1030fds, err := unix.ParseUnixRights(&scm[i])1031if err != nil {1032return -1, fmt.Errorf("parsing unix rights control message: %v: %w", &scm[i], err)1033}1034logrus.Debugf("fds: %v", fds)1035if len(fds) == 0 {1036continue1037}1038terminalFD = fds[0]1039break1040}1041if terminalFD == -1 {1042return -1, fmt.Errorf("unable to read terminal descriptor")1043}1044// Set the pseudoterminal's size to the configured size, or our own.1045winsize := &unix.Winsize{}1046if terminalSize != nil {1047// Use configured sizes.1048winsize.Row = uint16(terminalSize.Height)1049winsize.Col = uint16(terminalSize.Width)1050} else {1051if term.IsTerminal(unix.Stdin) {1052// Use the size of our terminal.1053if winsize, err = unix.IoctlGetWinsize(unix.Stdin, unix.TIOCGWINSZ); err != nil {1054logger.Warnf("error reading size of controlling terminal: %v", err)1055winsize.Row = 01056winsize.Col = 01057}1058}1059}1060if winsize.Row != 0 && winsize.Col != 0 {1061if err = unix.IoctlSetWinsize(terminalFD, unix.TIOCSWINSZ, winsize); err != nil {1062logger.Warnf("error setting size of container pseudoterminal: %v", err)1063}1064// FIXME - if we're connected to a terminal, we should1065// be passing the updated terminal size down when we1066// receive a SIGWINCH.1067}1068return terminalFD, nil1069}
1070
1071func runUsingRuntimeMain() {1072var options runUsingRuntimeSubprocOptions1073// Set logging.1074if level := os.Getenv("LOGLEVEL"); level != "" {1075if ll, err := strconv.Atoi(level); err == nil {1076logrus.SetLevel(logrus.Level(ll))1077}1078}1079// Unpack our configuration.1080confPipe := os.NewFile(3, "confpipe")1081if confPipe == nil {1082fmt.Fprintf(os.Stderr, "error reading options pipe\n")1083os.Exit(1)1084}1085defer confPipe.Close()1086if err := json.NewDecoder(confPipe).Decode(&options); err != nil {1087fmt.Fprintf(os.Stderr, "error decoding options: %v\n", err)1088os.Exit(1)1089}1090// Set ourselves up to read the container's exit status. We're doing this in a child process1091// so that we won't mess with the setting in a caller of the library.1092if err := setChildProcess(); err != nil {1093os.Exit(1)1094}1095ospec := options.Spec1096if ospec == nil {1097fmt.Fprintf(os.Stderr, "options spec not specified\n")1098os.Exit(1)1099}1100
1101// open the pipes used to communicate with the parent process1102var containerCreateW *os.File1103var containerStartR *os.File1104if options.ConfigureNetwork {1105containerCreateW = os.NewFile(4, "containercreatepipe")1106if containerCreateW == nil {1107fmt.Fprintf(os.Stderr, "could not open fd 4\n")1108os.Exit(1)1109}1110containerStartR = os.NewFile(5, "containerstartpipe")1111if containerStartR == nil {1112fmt.Fprintf(os.Stderr, "could not open fd 5\n")1113os.Exit(1)1114}1115}1116
1117// Run the container, start to finish.1118status, err := runUsingRuntime(options.Options, options.ConfigureNetwork, options.MoreCreateArgs, ospec, options.BundlePath, options.ContainerName, containerCreateW, containerStartR)1119if err != nil {1120fmt.Fprintf(os.Stderr, "error running container: %v\n", err)1121os.Exit(1)1122}1123// Pass the container's exit status back to the caller by exiting with the same status.1124if status.Exited() {1125os.Exit(status.ExitStatus())1126} else if status.Signaled() {1127fmt.Fprintf(os.Stderr, "container exited on %s\n", status.Signal())1128os.Exit(1)1129}1130os.Exit(1)1131}
1132
1133func (b *Builder) runUsingRuntimeSubproc(isolation define.Isolation, options RunOptions, configureNetwork bool, networkString string,1134moreCreateArgs []string, spec *specs.Spec, rootPath, bundlePath, containerName, buildContainerName, hostsFile, resolvFile string) (err error) {1135// Lock the caller to a single OS-level thread.1136runtime.LockOSThread()1137defer runtime.UnlockOSThread()1138
1139var confwg sync.WaitGroup1140config, conferr := json.Marshal(runUsingRuntimeSubprocOptions{1141Options: options,1142Spec: spec,1143RootPath: rootPath,1144BundlePath: bundlePath,1145ConfigureNetwork: configureNetwork,1146MoreCreateArgs: moreCreateArgs,1147ContainerName: containerName,1148Isolation: isolation,1149})1150if conferr != nil {1151return fmt.Errorf("encoding configuration for %q: %w", runUsingRuntimeCommand, conferr)1152}1153cmd := reexec.Command(runUsingRuntimeCommand)1154setPdeathsig(cmd)1155cmd.Dir = bundlePath1156cmd.Stdin = options.Stdin1157if cmd.Stdin == nil {1158cmd.Stdin = os.Stdin1159}1160cmd.Stdout = options.Stdout1161if cmd.Stdout == nil {1162cmd.Stdout = os.Stdout1163}1164cmd.Stderr = options.Stderr1165if cmd.Stderr == nil {1166cmd.Stderr = os.Stderr1167}1168cmd.Env = util.MergeEnv(os.Environ(), []string{fmt.Sprintf("LOGLEVEL=%d", logrus.GetLevel())})1169preader, pwriter, err := os.Pipe()1170if err != nil {1171return fmt.Errorf("creating configuration pipe: %w", err)1172}1173confwg.Add(1)1174go func() {1175_, conferr = io.Copy(pwriter, bytes.NewReader(config))1176if conferr != nil {1177conferr = fmt.Errorf("while copying configuration down pipe to child process: %w", conferr)1178}1179confwg.Done()1180}()1181
1182// create network configuration pipes1183var containerCreateR, containerCreateW fileCloser1184var containerStartR, containerStartW fileCloser1185if configureNetwork {1186containerCreateR.file, containerCreateW.file, err = os.Pipe()1187if err != nil {1188return fmt.Errorf("creating container create pipe: %w", err)1189}1190defer containerCreateR.Close()1191defer containerCreateW.Close()1192
1193containerStartR.file, containerStartW.file, err = os.Pipe()1194if err != nil {1195return fmt.Errorf("creating container start pipe: %w", err)1196}1197defer containerStartR.Close()1198defer containerStartW.Close()1199cmd.ExtraFiles = []*os.File{containerCreateW.file, containerStartR.file}1200}1201
1202cmd.ExtraFiles = append([]*os.File{preader}, cmd.ExtraFiles...)1203defer preader.Close()1204defer pwriter.Close()1205if err := cmd.Start(); err != nil {1206return fmt.Errorf("while starting runtime: %w", err)1207}1208
1209interrupted := make(chan os.Signal, 100)1210go func() {1211for receivedSignal := range interrupted {1212if err := cmd.Process.Signal(receivedSignal); err != nil {1213logrus.Infof("%v while attempting to forward %v to child process", err, receivedSignal)1214}1215}1216}()1217signal.Notify(interrupted, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)1218
1219if configureNetwork {1220// we already passed the fd to the child, now close the writer so we do not hang if the child closes it1221containerCreateW.Close()1222if err := waitForSync(containerCreateR.file); err != nil {1223// we do not want to return here since we want to capture the exit code from the child via cmd.Wait()1224// close the pipes here so that the child will not hang forever1225containerCreateR.Close()1226containerStartW.Close()1227logrus.Errorf("did not get container create message from subprocess: %v", err)1228} else {1229pidFile := filepath.Join(bundlePath, "pid")1230pidValue, err := os.ReadFile(pidFile)1231if err != nil {1232return err1233}1234pid, err := strconv.Atoi(strings.TrimSpace(string(pidValue)))1235if err != nil {1236return fmt.Errorf("parsing pid %s as a number: %w", string(pidValue), err)1237}1238
1239teardown, netResult, err := b.runConfigureNetwork(pid, isolation, options, networkString, containerName, []string{spec.Hostname, buildContainerName})1240if teardown != nil {1241defer teardown()1242}1243if err != nil {1244return fmt.Errorf("setup network: %w", err)1245}1246
1247// only add hosts if we manage the hosts file1248if hostsFile != "" {1249err = b.addHostsEntries(hostsFile, rootPath, netResult.entries, netResult.excludeIPs)1250if err != nil {1251return err1252}1253}1254
1255if resolvFile != "" {1256err = b.addResolvConfEntries(resolvFile, netResult.dnsServers, spec.Linux.Namespaces, netResult.keepHostResolvers, netResult.ipv6)1257if err != nil {1258return err1259}1260}1261
1262logrus.Debug("network namespace successfully setup, send start message to child")1263_, err = containerStartW.file.Write([]byte{1})1264if err != nil {1265return err1266}1267}1268}1269
1270if err := cmd.Wait(); err != nil {1271return fmt.Errorf("while running runtime: %w", err)1272}1273confwg.Wait()1274signal.Stop(interrupted)1275close(interrupted)1276if err == nil {1277return conferr1278}1279if conferr != nil {1280logrus.Debugf("%v", conferr)1281}1282return err1283}
1284
1285type runUsingRuntimeSubprocOptions struct {1286Options RunOptions
1287Spec *specs.Spec1288RootPath string1289BundlePath string1290ConfigureNetwork bool1291MoreCreateArgs []string1292ContainerName string1293Isolation define.Isolation1294}
1295
1296func init() {1297reexec.Register(runUsingRuntimeCommand, runUsingRuntimeMain)1298}
1299
1300// If this succeeds, the caller must call cleanupMounts().
1301func (b *Builder) setupMounts(mountPoint string, spec *specs.Spec, bundlePath string, optionMounts []specs.Mount, bindFiles map[string]string, builtinVolumes, volumeMounts []string, runFileMounts []string, runMountInfo runMountInfo) (*runMountArtifacts, error) {1302// Start building a new list of mounts.1303var mounts []specs.Mount1304haveMount := func(destination string) bool {1305for _, mount := range mounts {1306if mount.Destination == destination {1307// Already have something to mount there.1308return true1309}1310}1311return false1312}1313
1314specMounts, err := setupSpecialMountSpecChanges(spec, b.CommonBuildOpts.ShmSize)1315if err != nil {1316return nil, err1317}1318
1319// Get the list of files we need to bind into the container.1320bindFileMounts := runSetupBoundFiles(bundlePath, bindFiles)1321
1322// After this point we need to know the per-container persistent storage directory.1323cdir, err := b.store.ContainerDirectory(b.ContainerID)1324if err != nil {1325return nil, fmt.Errorf("determining work directory for container %q: %w", b.ContainerID, err)1326}1327
1328// Figure out which UID and GID to tell the subscriptions package to use1329// for files that it creates.1330rootUID, rootGID, err := util.GetHostRootIDs(spec)1331if err != nil {1332return nil, err1333}1334
1335// Get host UID and GID of the container process.1336var uidMap = []specs.LinuxIDMapping{}1337var gidMap = []specs.LinuxIDMapping{}1338if spec.Linux != nil {1339uidMap = spec.Linux.UIDMappings1340gidMap = spec.Linux.GIDMappings1341}1342processUID, processGID, err := util.GetHostIDs(uidMap, gidMap, spec.Process.User.UID, spec.Process.User.GID)1343if err != nil {1344return nil, err1345}1346
1347// Get the list of subscriptions mounts.1348subscriptionMounts := subscriptions.MountsWithUIDGID(b.MountLabel, cdir, b.DefaultMountsFilePath, mountPoint, int(rootUID), int(rootGID), unshare.IsRootless(), false)1349
1350idMaps := IDMaps{1351uidmap: uidMap,1352gidmap: gidMap,1353rootUID: int(rootUID),1354rootGID: int(rootGID),1355processUID: int(processUID),1356processGID: int(processGID),1357}1358// Get the list of mounts that are just for this Run() call.1359runMounts, mountArtifacts, err := b.runSetupRunMounts(mountPoint, runFileMounts, runMountInfo, idMaps)1360if err != nil {1361return nil, err1362}1363succeeded := false1364defer func() {1365if !succeeded {1366volumes.UnlockLockArray(mountArtifacts.TargetLocks)1367}1368}()1369// Add temporary copies of the contents of volume locations at the1370// volume locations, unless we already have something there.1371builtins, err := runSetupBuiltinVolumes(b.MountLabel, mountPoint, cdir, builtinVolumes, int(rootUID), int(rootGID))1372if err != nil {1373return nil, err1374}1375
1376// Get the list of explicitly-specified volume mounts.1377var mountLabel = ""1378if spec.Linux != nil {1379mountLabel = spec.Linux.MountLabel1380}1381volumes, err := b.runSetupVolumeMounts(mountLabel, volumeMounts, optionMounts, idMaps)1382if err != nil {1383return nil, err1384}1385
1386// prepare list of mount destinations which can be cleaned up safely.1387// we can clean bindFiles, subscriptionMounts and specMounts1388// everything other than these might have users content1389mountArtifacts.RunMountTargets = append(append(append(mountArtifacts.RunMountTargets, cleanableDestinationListFromMounts(bindFileMounts)...), cleanableDestinationListFromMounts(subscriptionMounts)...), cleanableDestinationListFromMounts(specMounts)...)1390
1391allMounts := util.SortMounts(append(append(append(append(append(volumes, builtins...), runMounts...), subscriptionMounts...), bindFileMounts...), specMounts...))1392// Add them all, in the preferred order, except where they conflict with something that was previously added.1393for _, mount := range allMounts {1394if haveMount(mount.Destination) {1395// Already mounting something there, no need to bother with this one.1396continue1397}1398// Add the mount.1399mounts = append(mounts, mount)1400}1401
1402// Set the list in the spec.1403spec.Mounts = mounts1404succeeded = true1405return mountArtifacts, nil1406}
1407
1408func runSetupBuiltinVolumes(mountLabel, mountPoint, containerDir string, builtinVolumes []string, rootUID, rootGID int) ([]specs.Mount, error) {1409var mounts []specs.Mount1410hostOwner := idtools.IDPair{UID: rootUID, GID: rootGID}1411// Add temporary copies of the contents of volume locations at the1412// volume locations, unless we already have something there.1413for _, volume := range builtinVolumes {1414volumePath := filepath.Join(containerDir, "buildah-volumes", digest.Canonical.FromString(volume).Hex())1415initializeVolume := false1416// If we need to, create the directory that we'll use to hold1417// the volume contents. If we do need to create it, then we'll1418// need to populate it, too, so make a note of that.1419if err := fileutils.Exists(volumePath); err != nil {1420if !errors.Is(err, fs.ErrNotExist) {1421return nil, err1422}1423logrus.Debugf("setting up built-in volume path at %q for %q", volumePath, volume)1424if err = os.MkdirAll(volumePath, 0755); err != nil {1425return nil, err1426}1427if err = relabel(volumePath, mountLabel, false); err != nil {1428return nil, err1429}1430initializeVolume = true1431}1432// Make sure the volume exists in the rootfs and read its attributes.1433createDirPerms := os.FileMode(0755)1434err := copier.Mkdir(mountPoint, filepath.Join(mountPoint, volume), copier.MkdirOptions{1435ChownNew: &hostOwner,1436ChmodNew: &createDirPerms,1437})1438if err != nil {1439return nil, fmt.Errorf("ensuring volume path %q: %w", filepath.Join(mountPoint, volume), err)1440}1441srcPath, err := copier.Eval(mountPoint, filepath.Join(mountPoint, volume), copier.EvalOptions{})1442if err != nil {1443return nil, fmt.Errorf("evaluating path %q: %w", srcPath, err)1444}1445stat, err := os.Stat(srcPath)1446if err != nil && !errors.Is(err, os.ErrNotExist) {1447return nil, err1448}1449// If we need to populate the mounted volume's contents with1450// content from the rootfs, set it up now.1451if initializeVolume {1452if err = os.Chmod(volumePath, stat.Mode().Perm()); err != nil {1453return nil, err1454}1455if err = os.Chown(volumePath, int(stat.Sys().(*syscall.Stat_t).Uid), int(stat.Sys().(*syscall.Stat_t).Gid)); err != nil {1456return nil, err1457}1458logrus.Debugf("populating directory %q for volume %q using contents of %q", volumePath, volume, srcPath)1459if err = extractWithTar(mountPoint, srcPath, volumePath); err != nil && !errors.Is(err, os.ErrNotExist) {1460return nil, fmt.Errorf("populating directory %q for volume %q using contents of %q: %w", volumePath, volume, srcPath, err)1461}1462}1463// Add the bind mount.1464mounts = append(mounts, specs.Mount{1465Source: volumePath,1466Destination: volume,1467Type: define.TypeBind,1468Options: define.BindOptions,1469})1470}1471return mounts, nil1472}
1473
1474// Destinations which can be cleaned up after every RUN
1475func cleanableDestinationListFromMounts(mounts []specs.Mount) []string {1476mountDest := []string{}1477for _, mount := range mounts {1478// Add all destination to mountArtifacts so that they can be cleaned up later1479if mount.Destination != "" {1480cleanPath := true1481for _, prefix := range nonCleanablePrefixes {1482if strings.HasPrefix(mount.Destination, prefix) {1483cleanPath = false1484break1485}1486}1487if cleanPath {1488mountDest = append(mountDest, mount.Destination)1489}1490}1491}1492return mountDest1493}
1494
1495func checkIfMountDestinationPreExists(root string, dest string) (bool, error) {1496statResults, err := copier.Stat(root, "", copier.StatOptions{}, []string{dest})1497if err != nil {1498return false, err1499}1500if len(statResults) > 0 {1501// We created exact path for globbing so it will1502// return only one result.1503if statResults[0].Error != "" && len(statResults[0].Globbed) == 0 {1504// Path do not exist.1505return false, nil1506}1507// Path exists.1508return true, nil1509}1510return false, nil1511}
1512
1513// runSetupRunMounts sets up mounts that exist only in this RUN, not in subsequent runs
1514//
1515// If this function succeeds, the caller must unlock runMountArtifacts.TargetLocks (when??)
1516func (b *Builder) runSetupRunMounts(mountPoint string, mounts []string, sources runMountInfo, idMaps IDMaps) ([]specs.Mount, *runMountArtifacts, error) {1517mountTargets := make([]string, 0, 10)1518tmpFiles := make([]string, 0, len(mounts))1519mountImages := make([]string, 0, 10)1520finalMounts := make([]specs.Mount, 0, len(mounts))1521agents := make([]*sshagent.AgentServer, 0, len(mounts))1522sshCount := 01523defaultSSHSock := ""1524targetLocks := []*lockfile.LockFile{}1525succeeded := false1526defer func() {1527if !succeeded {1528volumes.UnlockLockArray(targetLocks)1529}1530}()1531for _, mount := range mounts {1532var mountSpec *specs.Mount1533var err error1534var envFile, image string1535var agent *sshagent.AgentServer1536var tl *lockfile.LockFile1537tokens := strings.Split(mount, ",")1538
1539// If `type` is not set default to TypeBind1540mountType := define.TypeBind1541
1542for _, field := range tokens {1543if strings.HasPrefix(field, "type=") {1544kv := strings.Split(field, "=")1545if len(kv) != 2 {1546return nil, nil, errors.New("invalid mount type")1547}1548mountType = kv[1]1549}1550}1551switch mountType {1552case "secret":1553mountSpec, envFile, err = b.getSecretMount(tokens, sources.Secrets, idMaps, sources.WorkDir)1554if err != nil {1555return nil, nil, err1556}1557if mountSpec != nil {1558finalMounts = append(finalMounts, *mountSpec)1559if envFile != "" {1560tmpFiles = append(tmpFiles, envFile)1561}1562}1563case "ssh":1564mountSpec, agent, err = b.getSSHMount(tokens, sshCount, sources.SSHSources, idMaps)1565if err != nil {1566return nil, nil, err1567}1568if mountSpec != nil {1569finalMounts = append(finalMounts, *mountSpec)1570agents = append(agents, agent)1571if sshCount == 0 {1572defaultSSHSock = mountSpec.Destination1573}1574// Count is needed as the default destination of the ssh sock inside the container is /run/buildkit/ssh_agent.{i}1575sshCount++1576}1577case define.TypeBind:1578mountSpec, image, err = b.getBindMount(tokens, sources.SystemContext, sources.ContextDir, sources.StageMountPoints, idMaps, sources.WorkDir)1579if err != nil {1580return nil, nil, err1581}1582finalMounts = append(finalMounts, *mountSpec)1583// only perform cleanup if image was mounted ignore everything else1584if image != "" {1585mountImages = append(mountImages, image)1586}1587case "tmpfs":1588mountSpec, err = b.getTmpfsMount(tokens, idMaps)1589if err != nil {1590return nil, nil, err1591}1592finalMounts = append(finalMounts, *mountSpec)1593case "cache":1594mountSpec, tl, err = b.getCacheMount(tokens, sources.StageMountPoints, idMaps, sources.WorkDir)1595if err != nil {1596return nil, nil, err1597}1598finalMounts = append(finalMounts, *mountSpec)1599if tl != nil {1600targetLocks = append(targetLocks, tl)1601}1602default:1603return nil, nil, fmt.Errorf("invalid mount type %q", mountType)1604}1605
1606if mountSpec != nil {1607pathPreExists, err := checkIfMountDestinationPreExists(mountPoint, mountSpec.Destination)1608if err != nil {1609return nil, nil, err1610}1611if !pathPreExists {1612// In such case it means that the path did not exists before1613// creating any new mounts therefore we must clean the newly1614// created directory after this step.1615mountTargets = append(mountTargets, mountSpec.Destination)1616}1617}1618}1619succeeded = true1620artifacts := &runMountArtifacts{1621RunMountTargets: mountTargets,1622TmpFiles: tmpFiles,1623Agents: agents,1624MountedImages: mountImages,1625SSHAuthSock: defaultSSHSock,1626TargetLocks: targetLocks,1627}1628return finalMounts, artifacts, nil1629}
1630
1631func (b *Builder) getBindMount(tokens []string, context *imageTypes.SystemContext, contextDir string, stageMountPoints map[string]internal.StageMountDetails, idMaps IDMaps, workDir string) (*specs.Mount, string, error) {1632if contextDir == "" {1633return nil, "", errors.New("Context Directory for current run invocation is not configured")1634}1635var optionMounts []specs.Mount1636mount, image, err := volumes.GetBindMount(context, tokens, contextDir, b.store, b.MountLabel, stageMountPoints, workDir)1637if err != nil {1638return nil, image, err1639}1640optionMounts = append(optionMounts, mount)1641volumes, err := b.runSetupVolumeMounts(b.MountLabel, nil, optionMounts, idMaps)1642if err != nil {1643return nil, image, err1644}1645return &volumes[0], image, nil1646}
1647
1648func (b *Builder) getTmpfsMount(tokens []string, idMaps IDMaps) (*specs.Mount, error) {1649var optionMounts []specs.Mount1650mount, err := volumes.GetTmpfsMount(tokens)1651if err != nil {1652return nil, err1653}1654optionMounts = append(optionMounts, mount)1655volumes, err := b.runSetupVolumeMounts(b.MountLabel, nil, optionMounts, idMaps)1656if err != nil {1657return nil, err1658}1659return &volumes[0], nil1660}
1661
1662func (b *Builder) getSecretMount(tokens []string, secrets map[string]define.Secret, idMaps IDMaps, workdir string) (*specs.Mount, string, error) {1663errInvalidSyntax := errors.New("secret should have syntax id=id[,target=path,required=bool,mode=uint,uid=uint,gid=uint")1664if len(tokens) == 0 {1665return nil, "", errInvalidSyntax1666}1667var err error1668var id, target string1669var required bool1670var uid, gid uint321671var mode uint32 = 04001672for _, val := range tokens {1673kv := strings.SplitN(val, "=", 2)1674switch kv[0] {1675case "type":1676// This is already processed1677continue1678case "id":1679id = kv[1]1680case "target", "dst", "destination":1681target = kv[1]1682if !filepath.IsAbs(target) {1683target = filepath.Join(workdir, target)1684}1685case "required":1686required = true1687if len(kv) > 1 {1688required, err = strconv.ParseBool(kv[1])1689if err != nil {1690return nil, "", errInvalidSyntax1691}1692}1693case "mode":1694mode64, err := strconv.ParseUint(kv[1], 8, 32)1695if err != nil {1696return nil, "", errInvalidSyntax1697}1698mode = uint32(mode64)1699case "uid":1700uid64, err := strconv.ParseUint(kv[1], 10, 32)1701if err != nil {1702return nil, "", errInvalidSyntax1703}1704uid = uint32(uid64)1705case "gid":1706gid64, err := strconv.ParseUint(kv[1], 10, 32)1707if err != nil {1708return nil, "", errInvalidSyntax1709}1710gid = uint32(gid64)1711default:1712return nil, "", errInvalidSyntax1713}1714}1715
1716if id == "" {1717return nil, "", errInvalidSyntax1718}1719// Default location for secretis is /run/secrets/id1720if target == "" {1721target = "/run/secrets/" + id1722}1723
1724secr, ok := secrets[id]1725if !ok {1726if required {1727return nil, "", fmt.Errorf("secret required but no secret with id %s found", id)1728}1729return nil, "", nil1730}1731var data []byte1732var envFile string1733var ctrFileOnHost string1734
1735switch secr.SourceType {1736case "env":1737data = []byte(os.Getenv(secr.Source))1738tmpFile, err := os.CreateTemp(define.TempDir, "buildah*")1739if err != nil {1740return nil, "", err1741}1742envFile = tmpFile.Name()1743ctrFileOnHost = tmpFile.Name()1744case "file":1745containerWorkingDir, err := b.store.ContainerDirectory(b.ContainerID)1746if err != nil {1747return nil, "", err1748}1749data, err = os.ReadFile(secr.Source)1750if err != nil {1751return nil, "", err1752}1753ctrFileOnHost = filepath.Join(containerWorkingDir, "secrets", id)1754default:1755return nil, "", errors.New("invalid source secret type")1756}1757
1758// Copy secrets to container working dir (or tmp dir if it's an env), since we need to chmod,1759// chown and relabel it for the container user and we don't want to mess with the original file1760if err := os.MkdirAll(filepath.Dir(ctrFileOnHost), 0755); err != nil {1761return nil, "", err1762}1763if err := os.WriteFile(ctrFileOnHost, data, 0644); err != nil {1764return nil, "", err1765}1766
1767if err := relabel(ctrFileOnHost, b.MountLabel, false); err != nil {1768return nil, "", err1769}1770hostUID, hostGID, err := util.GetHostIDs(idMaps.uidmap, idMaps.gidmap, uid, gid)1771if err != nil {1772return nil, "", err1773}1774if err := os.Lchown(ctrFileOnHost, int(hostUID), int(hostGID)); err != nil {1775return nil, "", err1776}1777if err := os.Chmod(ctrFileOnHost, os.FileMode(mode)); err != nil {1778return nil, "", err1779}1780newMount := specs.Mount{1781Destination: target,1782Type: define.TypeBind,1783Source: ctrFileOnHost,1784Options: append(define.BindOptions, "rprivate", "ro"),1785}1786return &newMount, envFile, nil1787}
1788
1789// getSSHMount parses the --mount type=ssh flag in the Containerfile, checks if there's an ssh source provided, and creates and starts an ssh-agent to be forwarded into the container
1790func (b *Builder) getSSHMount(tokens []string, count int, sshsources map[string]*sshagent.Source, idMaps IDMaps) (*specs.Mount, *sshagent.AgentServer, error) {1791errInvalidSyntax := errors.New("ssh should have syntax id=id[,target=path,required=bool,mode=uint,uid=uint,gid=uint")1792
1793var err error1794var id, target string1795var required bool1796var uid, gid uint321797var mode uint32 = 4001798for _, val := range tokens {1799kv := strings.SplitN(val, "=", 2)1800if len(kv) < 2 {1801return nil, nil, errInvalidSyntax1802}1803switch kv[0] {1804case "type":1805// This is already processed1806continue1807case "id":1808id = kv[1]1809case "target", "dst", "destination":1810target = kv[1]1811case "required":1812required, err = strconv.ParseBool(kv[1])1813if err != nil {1814return nil, nil, errInvalidSyntax1815}1816case "mode":1817mode64, err := strconv.ParseUint(kv[1], 8, 32)1818if err != nil {1819return nil, nil, errInvalidSyntax1820}1821mode = uint32(mode64)1822case "uid":1823uid64, err := strconv.ParseUint(kv[1], 10, 32)1824if err != nil {1825return nil, nil, errInvalidSyntax1826}1827uid = uint32(uid64)1828case "gid":1829gid64, err := strconv.ParseUint(kv[1], 10, 32)1830if err != nil {1831return nil, nil, errInvalidSyntax1832}1833gid = uint32(gid64)1834default:1835return nil, nil, errInvalidSyntax1836}1837}1838
1839if id == "" {1840id = "default"1841}1842// Default location for secretis is /run/buildkit/ssh_agent.{i}1843if target == "" {1844target = fmt.Sprintf("/run/buildkit/ssh_agent.%d", count)1845}1846
1847sshsource, ok := sshsources[id]1848if !ok {1849if required {1850return nil, nil, fmt.Errorf("ssh required but no ssh with id %s found", id)1851}1852return nil, nil, nil1853}1854// Create new agent from keys or socket1855fwdAgent, err := sshagent.NewAgentServer(sshsource)1856if err != nil {1857return nil, nil, err1858}1859// Start ssh server, and get the host sock we're mounting in the container1860hostSock, err := fwdAgent.Serve(b.ProcessLabel)1861if err != nil {1862return nil, nil, err1863}1864
1865if err := relabel(filepath.Dir(hostSock), b.MountLabel, false); err != nil {1866if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {1867b.Logger.Errorf("error shutting down agent: %v", shutdownErr)1868}1869return nil, nil, err1870}1871if err := relabel(hostSock, b.MountLabel, false); err != nil {1872if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {1873b.Logger.Errorf("error shutting down agent: %v", shutdownErr)1874}1875return nil, nil, err1876}1877hostUID, hostGID, err := util.GetHostIDs(idMaps.uidmap, idMaps.gidmap, uid, gid)1878if err != nil {1879if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {1880b.Logger.Errorf("error shutting down agent: %v", shutdownErr)1881}1882return nil, nil, err1883}1884if err := os.Lchown(hostSock, int(hostUID), int(hostGID)); err != nil {1885if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {1886b.Logger.Errorf("error shutting down agent: %v", shutdownErr)1887}1888return nil, nil, err1889}1890if err := os.Chmod(hostSock, os.FileMode(mode)); err != nil {1891if shutdownErr := fwdAgent.Shutdown(); shutdownErr != nil {1892b.Logger.Errorf("error shutting down agent: %v", shutdownErr)1893}1894return nil, nil, err1895}1896newMount := specs.Mount{1897Destination: target,1898Type: define.TypeBind,1899Source: hostSock,1900Options: append(define.BindOptions, "rprivate", "ro"),1901}1902return &newMount, fwdAgent, nil1903}
1904
1905func (b *Builder) cleanupTempVolumes() {1906for tempVolume, val := range b.TempVolumes {1907if val {1908if err := overlay.RemoveTemp(tempVolume); err != nil {1909b.Logger.Errorf(err.Error())1910}1911b.TempVolumes[tempVolume] = false1912}1913}1914}
1915
1916// cleanupRunMounts cleans up run mounts so they only appear in this run.
1917func (b *Builder) cleanupRunMounts(context *imageTypes.SystemContext, mountpoint string, artifacts *runMountArtifacts) error {1918for _, agent := range artifacts.Agents {1919err := agent.Shutdown()1920if err != nil {1921return err1922}1923}1924
1925//cleanup any mounted images for this run1926for _, image := range artifacts.MountedImages {1927if image != "" {1928// if flow hits here some image was mounted for this run1929i, err := internalUtil.LookupImage(context, b.store, image)1930if err == nil {1931// silently try to unmount and do nothing1932// if image is being used by something else1933_ = i.Unmount(false)1934}1935if errors.Is(err, storageTypes.ErrImageUnknown) {1936// Ignore only if ErrImageUnknown1937// Reason: Image is already unmounted do nothing1938continue1939}1940return err1941}1942}1943opts := copier.RemoveOptions{1944All: true,1945}1946for _, path := range artifacts.RunMountTargets {1947err := copier.Remove(mountpoint, path, opts)1948if err != nil {1949return err1950}1951}1952var prevErr error1953for _, path := range artifacts.TmpFiles {1954err := os.Remove(path)1955if !errors.Is(err, os.ErrNotExist) {1956if prevErr != nil {1957logrus.Error(prevErr)1958}1959prevErr = err1960}1961}1962// unlock if any locked files from this RUN statement1963volumes.UnlockLockArray(artifacts.TargetLocks)1964return prevErr1965}
1966
1967// setPdeathsig sets a parent-death signal for the process
1968// the goroutine that starts the child process should lock itself to
1969// a native thread using runtime.LockOSThread() until the child exits
1970func setPdeathsig(cmd *exec.Cmd) {1971if cmd.SysProcAttr == nil {1972cmd.SysProcAttr = &syscall.SysProcAttr{}1973}1974cmd.SysProcAttr.Pdeathsig = syscall.SIGKILL1975}
1976
1977func relabel(path, mountLabel string, recurse bool) error {1978if err := label.Relabel(path, mountLabel, recurse); err != nil {1979if !errors.Is(err, syscall.ENOTSUP) {1980return err1981}1982logrus.Debugf("Labeling not supported on %q", path)1983}1984return nil1985}
1986