podman

Форк
0
839 строк · 26.5 Кб
1
//go:build linux || freebsd
2
// +build linux freebsd
3

4
package chroot
5

6
import (
7
	"bytes"
8
	"encoding/json"
9
	"fmt"
10
	"io"
11
	"os"
12
	"os/exec"
13
	"os/signal"
14
	"path/filepath"
15
	"runtime"
16
	"strconv"
17
	"strings"
18
	"sync"
19
	"syscall"
20

21
	"github.com/containers/buildah/bind"
22
	"github.com/containers/buildah/util"
23
	"github.com/containers/storage/pkg/ioutils"
24
	"github.com/containers/storage/pkg/reexec"
25
	"github.com/containers/storage/pkg/unshare"
26
	"github.com/opencontainers/runtime-spec/specs-go"
27
	"github.com/sirupsen/logrus"
28
	"golang.org/x/sys/unix"
29
	"golang.org/x/term"
30
)
31

32
const (
33
	// runUsingChrootCommand is a command we use as a key for reexec
34
	runUsingChrootCommand = "buildah-chroot-runtime"
35
	// runUsingChrootExec is a command we use as a key for reexec
36
	runUsingChrootExecCommand = "buildah-chroot-exec"
37
	// containersConfEnv is an environment variable that we need to pass down except for the command itself
38
	containersConfEnv = "CONTAINERS_CONF"
39
)
40

41
func init() {
42
	reexec.Register(runUsingChrootCommand, runUsingChrootMain)
43
	reexec.Register(runUsingChrootExecCommand, runUsingChrootExecMain)
44
	for limitName, limitNumber := range rlimitsMap {
45
		rlimitsReverseMap[limitNumber] = limitName
46
	}
47
}
48

49
type runUsingChrootExecSubprocOptions struct {
50
	Spec       *specs.Spec
51
	BundlePath string
52
}
53

54
// RunUsingChroot runs a chrooted process, using some of the settings from the
55
// passed-in spec, and using the specified bundlePath to hold temporary files,
56
// directories, and mountpoints.
57
func RunUsingChroot(spec *specs.Spec, bundlePath, homeDir string, stdin io.Reader, stdout, stderr io.Writer) (err error) {
58
	var confwg sync.WaitGroup
59
	var homeFound bool
60
	for _, env := range spec.Process.Env {
61
		if strings.HasPrefix(env, "HOME=") {
62
			homeFound = true
63
			break
64
		}
65
	}
66
	if !homeFound {
67
		spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("HOME=%s", homeDir))
68
	}
69
	runtime.LockOSThread()
70
	defer runtime.UnlockOSThread()
71

72
	// Write the runtime configuration, mainly for debugging.
73
	specbytes, err := json.Marshal(spec)
74
	if err != nil {
75
		return err
76
	}
77
	if err = ioutils.AtomicWriteFile(filepath.Join(bundlePath, "config.json"), specbytes, 0600); err != nil {
78
		return fmt.Errorf("storing runtime configuration: %w", err)
79
	}
80
	logrus.Debugf("config = %v", string(specbytes))
81

82
	// Default to using stdin/stdout/stderr if we weren't passed objects to use.
83
	if stdin == nil {
84
		stdin = os.Stdin
85
	}
86
	if stdout == nil {
87
		stdout = os.Stdout
88
	}
89
	if stderr == nil {
90
		stderr = os.Stderr
91
	}
92

93
	// Create a pipe for passing configuration down to the next process.
94
	preader, pwriter, err := os.Pipe()
95
	if err != nil {
96
		return fmt.Errorf("creating configuration pipe: %w", err)
97
	}
98
	config, conferr := json.Marshal(runUsingChrootSubprocOptions{
99
		Spec:       spec,
100
		BundlePath: bundlePath,
101
	})
102
	if conferr != nil {
103
		return fmt.Errorf("encoding configuration for %q: %w", runUsingChrootCommand, conferr)
104
	}
105

106
	// Set our terminal's mode to raw, to pass handling of special
107
	// terminal input to the terminal in the container.
108
	if spec.Process.Terminal && term.IsTerminal(unix.Stdin) {
109
		state, err := term.MakeRaw(unix.Stdin)
110
		if err != nil {
111
			logrus.Warnf("error setting terminal state: %v", err)
112
		} else {
113
			defer func() {
114
				if err = term.Restore(unix.Stdin, state); err != nil {
115
					logrus.Errorf("unable to restore terminal state: %v", err)
116
				}
117
			}()
118
		}
119
	}
120

121
	// Raise any resource limits that are higher than they are now, before
122
	// we drop any more privileges.
123
	if err = setRlimits(spec, false, true); err != nil {
124
		return err
125
	}
126

127
	// Start the grandparent subprocess.
128
	cmd := unshare.Command(runUsingChrootCommand)
129
	setPdeathsig(cmd.Cmd)
130
	cmd.Stdin, cmd.Stdout, cmd.Stderr = stdin, stdout, stderr
131
	cmd.Dir = "/"
132
	cmd.Env = []string{fmt.Sprintf("LOGLEVEL=%d", logrus.GetLevel())}
133
	if _, ok := os.LookupEnv(containersConfEnv); ok {
134
		cmd.Env = append(cmd.Env, containersConfEnv+"="+os.Getenv(containersConfEnv))
135
	}
136

137
	interrupted := make(chan os.Signal, 100)
138
	cmd.Hook = func(int) error {
139
		signal.Notify(interrupted, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
140
		go func() {
141
			for receivedSignal := range interrupted {
142
				if err := cmd.Process.Signal(receivedSignal); err != nil {
143
					logrus.Infof("%v while attempting to forward %v to child process", err, receivedSignal)
144
				}
145
			}
146
		}()
147
		return nil
148
	}
149

150
	logrus.Debugf("Running %#v in %#v", cmd.Cmd, cmd)
151
	confwg.Add(1)
152
	go func() {
153
		_, conferr = io.Copy(pwriter, bytes.NewReader(config))
154
		pwriter.Close()
155
		confwg.Done()
156
	}()
157
	cmd.ExtraFiles = append([]*os.File{preader}, cmd.ExtraFiles...)
158
	err = cmd.Run()
159
	confwg.Wait()
160
	signal.Stop(interrupted)
161
	close(interrupted)
162
	if err == nil {
163
		return conferr
164
	}
165
	return err
166
}
167

168
// main() for grandparent subprocess.  Its main job is to shuttle stdio back
169
// and forth, managing a pseudo-terminal if we want one, for our child, the
170
// parent subprocess.
171
func runUsingChrootMain() {
172
	var options runUsingChrootSubprocOptions
173

174
	runtime.LockOSThread()
175

176
	// Set logging.
177
	if level := os.Getenv("LOGLEVEL"); level != "" {
178
		if ll, err := strconv.Atoi(level); err == nil {
179
			logrus.SetLevel(logrus.Level(ll))
180
		}
181
		os.Unsetenv("LOGLEVEL")
182
	}
183

184
	// Unpack our configuration.
185
	confPipe := os.NewFile(3, "confpipe")
186
	if confPipe == nil {
187
		fmt.Fprintf(os.Stderr, "error reading options pipe\n")
188
		os.Exit(1)
189
	}
190
	defer confPipe.Close()
191
	if err := json.NewDecoder(confPipe).Decode(&options); err != nil {
192
		fmt.Fprintf(os.Stderr, "error decoding options: %v\n", err)
193
		os.Exit(1)
194
	}
195

196
	if options.Spec == nil || options.Spec.Process == nil {
197
		fmt.Fprintf(os.Stderr, "invalid options spec in runUsingChrootMain\n")
198
		os.Exit(1)
199
	}
200

201
	// Prepare to shuttle stdio back and forth.
202
	rootUID32, rootGID32, err := util.GetHostRootIDs(options.Spec)
203
	if err != nil {
204
		logrus.Errorf("error determining ownership for container stdio")
205
		os.Exit(1)
206
	}
207
	rootUID := int(rootUID32)
208
	rootGID := int(rootGID32)
209
	relays := make(map[int]int)
210
	closeOnceRunning := []*os.File{}
211
	var ctty *os.File
212
	var stdin io.Reader
213
	var stdinCopy io.WriteCloser
214
	var stdout io.Writer
215
	var stderr io.Writer
216
	fdDesc := make(map[int]string)
217
	if options.Spec.Process.Terminal {
218
		ptyMasterFd, ptyFd, err := getPtyDescriptors()
219
		if err != nil {
220
			logrus.Errorf("error opening PTY descriptors: %v", err)
221
			os.Exit(1)
222
		}
223
		// Make notes about what's going where.
224
		relays[ptyMasterFd] = unix.Stdout
225
		relays[unix.Stdin] = ptyMasterFd
226
		fdDesc[ptyMasterFd] = "container terminal"
227
		fdDesc[unix.Stdin] = "stdin"
228
		fdDesc[unix.Stdout] = "stdout"
229
		winsize := &unix.Winsize{}
230
		// Set the pseudoterminal's size to the configured size, or our own.
231
		if options.Spec.Process.ConsoleSize != nil {
232
			// Use configured sizes.
233
			winsize.Row = uint16(options.Spec.Process.ConsoleSize.Height)
234
			winsize.Col = uint16(options.Spec.Process.ConsoleSize.Width)
235
		} else {
236
			if term.IsTerminal(unix.Stdin) {
237
				// Use the size of our terminal.
238
				winsize, err = unix.IoctlGetWinsize(unix.Stdin, unix.TIOCGWINSZ)
239
				if err != nil {
240
					logrus.Debugf("error reading current terminal's size")
241
					winsize.Row = 0
242
					winsize.Col = 0
243
				}
244
			}
245
		}
246
		if winsize.Row != 0 && winsize.Col != 0 {
247
			if err = unix.IoctlSetWinsize(ptyFd, unix.TIOCSWINSZ, winsize); err != nil {
248
				logrus.Warnf("error setting terminal size for pty")
249
			}
250
			// FIXME - if we're connected to a terminal, we should
251
			// be passing the updated terminal size down when we
252
			// receive a SIGWINCH.
253
		}
254
		// Open an *os.File object that we can pass to our child.
255
		ctty = os.NewFile(uintptr(ptyFd), "/dev/tty")
256
		// Set ownership for the PTY.
257
		if err = ctty.Chown(rootUID, rootGID); err != nil {
258
			var cttyInfo unix.Stat_t
259
			err2 := unix.Fstat(ptyFd, &cttyInfo)
260
			from := ""
261
			op := "setting"
262
			if err2 == nil {
263
				op = "changing"
264
				from = fmt.Sprintf("from %d/%d ", cttyInfo.Uid, cttyInfo.Gid)
265
			}
266
			logrus.Warnf("error %s ownership of container PTY %sto %d/%d: %v", op, from, rootUID, rootGID, err)
267
		}
268
		// Set permissions on the PTY.
269
		if err = ctty.Chmod(0620); err != nil {
270
			logrus.Errorf("error setting permissions of container PTY: %v", err)
271
			os.Exit(1)
272
		}
273
		// Make a note that our child (the parent subprocess) should
274
		// have the PTY connected to its stdio, and that we should
275
		// close it once it's running.
276
		stdin = ctty
277
		stdout = ctty
278
		stderr = ctty
279
		closeOnceRunning = append(closeOnceRunning, ctty)
280
	} else {
281
		// Create pipes for stdio.
282
		stdinRead, stdinWrite, err := os.Pipe()
283
		if err != nil {
284
			logrus.Errorf("error opening pipe for stdin: %v", err)
285
		}
286
		stdoutRead, stdoutWrite, err := os.Pipe()
287
		if err != nil {
288
			logrus.Errorf("error opening pipe for stdout: %v", err)
289
		}
290
		stderrRead, stderrWrite, err := os.Pipe()
291
		if err != nil {
292
			logrus.Errorf("error opening pipe for stderr: %v", err)
293
		}
294
		// Make notes about what's going where.
295
		relays[unix.Stdin] = int(stdinWrite.Fd())
296
		relays[int(stdoutRead.Fd())] = unix.Stdout
297
		relays[int(stderrRead.Fd())] = unix.Stderr
298
		fdDesc[int(stdinWrite.Fd())] = "container stdin pipe"
299
		fdDesc[int(stdoutRead.Fd())] = "container stdout pipe"
300
		fdDesc[int(stderrRead.Fd())] = "container stderr pipe"
301
		fdDesc[unix.Stdin] = "stdin"
302
		fdDesc[unix.Stdout] = "stdout"
303
		fdDesc[unix.Stderr] = "stderr"
304
		// Set ownership for the pipes.
305
		if err = stdinRead.Chown(rootUID, rootGID); err != nil {
306
			logrus.Errorf("error setting ownership of container stdin pipe: %v", err)
307
			os.Exit(1)
308
		}
309
		if err = stdoutWrite.Chown(rootUID, rootGID); err != nil {
310
			logrus.Errorf("error setting ownership of container stdout pipe: %v", err)
311
			os.Exit(1)
312
		}
313
		if err = stderrWrite.Chown(rootUID, rootGID); err != nil {
314
			logrus.Errorf("error setting ownership of container stderr pipe: %v", err)
315
			os.Exit(1)
316
		}
317
		// Make a note that our child (the parent subprocess) should
318
		// have the pipes connected to its stdio, and that we should
319
		// close its ends of them once it's running.
320
		stdin = stdinRead
321
		stdout = stdoutWrite
322
		stderr = stderrWrite
323
		closeOnceRunning = append(closeOnceRunning, stdinRead, stdoutWrite, stderrWrite)
324
		stdinCopy = stdinWrite
325
		defer stdoutRead.Close()
326
		defer stderrRead.Close()
327
	}
328
	for readFd, writeFd := range relays {
329
		if err := unix.SetNonblock(readFd, true); err != nil {
330
			logrus.Errorf("error setting descriptor %d (%s) non-blocking: %v", readFd, fdDesc[readFd], err)
331
			return
332
		}
333
		if err := unix.SetNonblock(writeFd, false); err != nil {
334
			logrus.Errorf("error setting descriptor %d (%s) blocking: %v", relays[writeFd], fdDesc[writeFd], err)
335
			return
336
		}
337
	}
338
	if err := unix.SetNonblock(relays[unix.Stdin], true); err != nil {
339
		logrus.Errorf("error setting %d to nonblocking: %v", relays[unix.Stdin], err)
340
	}
341
	go func() {
342
		buffers := make(map[int]*bytes.Buffer)
343
		for _, writeFd := range relays {
344
			buffers[writeFd] = new(bytes.Buffer)
345
		}
346
		pollTimeout := -1
347
		stdinClose := false
348
		for len(relays) > 0 {
349
			fds := make([]unix.PollFd, 0, len(relays))
350
			for fd := range relays {
351
				fds = append(fds, unix.PollFd{Fd: int32(fd), Events: unix.POLLIN | unix.POLLHUP})
352
			}
353
			_, err := unix.Poll(fds, pollTimeout)
354
			if !util.LogIfNotRetryable(err, fmt.Sprintf("poll: %v", err)) {
355
				return
356
			}
357
			removeFds := make(map[int]struct{})
358
			for _, rfd := range fds {
359
				if rfd.Revents&unix.POLLHUP == unix.POLLHUP {
360
					removeFds[int(rfd.Fd)] = struct{}{}
361
				}
362
				if rfd.Revents&unix.POLLNVAL == unix.POLLNVAL {
363
					logrus.Debugf("error polling descriptor %s: closed?", fdDesc[int(rfd.Fd)])
364
					removeFds[int(rfd.Fd)] = struct{}{}
365
				}
366
				if rfd.Revents&unix.POLLIN == 0 {
367
					if stdinClose && stdinCopy == nil {
368
						continue
369
					}
370
					continue
371
				}
372
				b := make([]byte, 8192)
373
				nread, err := unix.Read(int(rfd.Fd), b)
374
				util.LogIfNotRetryable(err, fmt.Sprintf("read %s: %v", fdDesc[int(rfd.Fd)], err))
375
				if nread > 0 {
376
					if wfd, ok := relays[int(rfd.Fd)]; ok {
377
						nwritten, err := buffers[wfd].Write(b[:nread])
378
						if err != nil {
379
							logrus.Debugf("buffer: %v", err)
380
							continue
381
						}
382
						if nwritten != nread {
383
							logrus.Debugf("buffer: expected to buffer %d bytes, wrote %d", nread, nwritten)
384
							continue
385
						}
386
					}
387
					// If this is the last of the data we'll be able to read
388
					// from this descriptor, read as much as there is to read.
389
					for rfd.Revents&unix.POLLHUP == unix.POLLHUP {
390
						nr, err := unix.Read(int(rfd.Fd), b)
391
						util.LogIfUnexpectedWhileDraining(err, fmt.Sprintf("read %s: %v", fdDesc[int(rfd.Fd)], err))
392
						if nr <= 0 {
393
							break
394
						}
395
						if wfd, ok := relays[int(rfd.Fd)]; ok {
396
							nwritten, err := buffers[wfd].Write(b[:nr])
397
							if err != nil {
398
								logrus.Debugf("buffer: %v", err)
399
								break
400
							}
401
							if nwritten != nr {
402
								logrus.Debugf("buffer: expected to buffer %d bytes, wrote %d", nr, nwritten)
403
								break
404
							}
405
						}
406
					}
407
				}
408
				if nread == 0 {
409
					removeFds[int(rfd.Fd)] = struct{}{}
410
				}
411
			}
412
			pollTimeout = -1
413
			for wfd, buffer := range buffers {
414
				if buffer.Len() > 0 {
415
					nwritten, err := unix.Write(wfd, buffer.Bytes())
416
					util.LogIfNotRetryable(err, fmt.Sprintf("write %s: %v", fdDesc[wfd], err))
417
					if nwritten >= 0 {
418
						_ = buffer.Next(nwritten)
419
					}
420
				}
421
				if buffer.Len() > 0 {
422
					pollTimeout = 100
423
				}
424
				if wfd == relays[unix.Stdin] && stdinClose && buffer.Len() == 0 {
425
					stdinCopy.Close()
426
					delete(relays, unix.Stdin)
427
				}
428
			}
429
			for rfd := range removeFds {
430
				if rfd == unix.Stdin {
431
					buffer, found := buffers[relays[unix.Stdin]]
432
					if found && buffer.Len() > 0 {
433
						stdinClose = true
434
						continue
435
					}
436
				}
437
				if !options.Spec.Process.Terminal && rfd == unix.Stdin {
438
					stdinCopy.Close()
439
				}
440
				delete(relays, rfd)
441
			}
442
		}
443
	}()
444

445
	// Set up mounts and namespaces, and run the parent subprocess.
446
	status, err := runUsingChroot(options.Spec, options.BundlePath, ctty, stdin, stdout, stderr, closeOnceRunning)
447
	if err != nil {
448
		fmt.Fprintf(os.Stderr, "error running subprocess: %v\n", err)
449
		os.Exit(1)
450
	}
451

452
	// Pass the process's exit status back to the caller by exiting with the same status.
453
	if status.Exited() {
454
		if status.ExitStatus() != 0 {
455
			fmt.Fprintf(os.Stderr, "subprocess exited with status %d\n", status.ExitStatus())
456
		}
457
		os.Exit(status.ExitStatus())
458
	} else if status.Signaled() {
459
		fmt.Fprintf(os.Stderr, "subprocess exited on %s\n", status.Signal())
460
		os.Exit(1)
461
	}
462
}
463

464
// runUsingChroot, still in the grandparent process, sets up various bind
465
// mounts and then runs the parent process in its own user namespace with the
466
// necessary ID mappings.
467
func runUsingChroot(spec *specs.Spec, bundlePath string, ctty *os.File, stdin io.Reader, stdout, stderr io.Writer, closeOnceRunning []*os.File) (wstatus unix.WaitStatus, err error) {
468
	var confwg sync.WaitGroup
469

470
	// Create a new mount namespace for ourselves and bind mount everything to a new location.
471
	undoIntermediates, err := bind.SetupIntermediateMountNamespace(spec, bundlePath)
472
	if err != nil {
473
		return 1, err
474
	}
475
	defer func() {
476
		if undoErr := undoIntermediates(); undoErr != nil {
477
			logrus.Debugf("error cleaning up intermediate mount NS: %v", err)
478
		}
479
	}()
480

481
	// Bind mount in our filesystems.
482
	undoChroots, err := setupChrootBindMounts(spec, bundlePath)
483
	if err != nil {
484
		return 1, err
485
	}
486
	defer func() {
487
		if undoErr := undoChroots(); undoErr != nil {
488
			logrus.Debugf("error cleaning up intermediate chroot bind mounts: %v", err)
489
		}
490
	}()
491

492
	// Create a pipe for passing configuration down to the next process.
493
	preader, pwriter, err := os.Pipe()
494
	if err != nil {
495
		return 1, fmt.Errorf("creating configuration pipe: %w", err)
496
	}
497
	config, conferr := json.Marshal(runUsingChrootExecSubprocOptions{
498
		Spec:       spec,
499
		BundlePath: bundlePath,
500
	})
501
	if conferr != nil {
502
		fmt.Fprintf(os.Stderr, "error re-encoding configuration for %q", runUsingChrootExecCommand)
503
		os.Exit(1)
504
	}
505

506
	// Apologize for the namespace configuration that we're about to ignore.
507
	logNamespaceDiagnostics(spec)
508

509
	// We need to lock the thread so that PR_SET_PDEATHSIG won't trigger if the current thread exits.
510
	runtime.LockOSThread()
511
	defer runtime.UnlockOSThread()
512

513
	// Start the parent subprocess.
514
	cmd := unshare.Command(append([]string{runUsingChrootExecCommand}, spec.Process.Args...)...)
515
	setPdeathsig(cmd.Cmd)
516
	cmd.Stdin, cmd.Stdout, cmd.Stderr = stdin, stdout, stderr
517
	cmd.Dir = "/"
518
	cmd.Env = []string{fmt.Sprintf("LOGLEVEL=%d", logrus.GetLevel())}
519
	if _, ok := os.LookupEnv(containersConfEnv); ok {
520
		cmd.Env = append(cmd.Env, containersConfEnv+"="+os.Getenv(containersConfEnv))
521
	}
522
	if ctty != nil {
523
		cmd.Setsid = true
524
		cmd.Ctty = ctty
525
	}
526
	cmd.ExtraFiles = append([]*os.File{preader}, cmd.ExtraFiles...)
527
	if err := setPlatformUnshareOptions(spec, cmd); err != nil {
528
		return 1, fmt.Errorf("setting platform unshare options: %w", err)
529

530
	}
531
	interrupted := make(chan os.Signal, 100)
532
	cmd.Hook = func(int) error {
533
		for _, f := range closeOnceRunning {
534
			f.Close()
535
		}
536
		signal.Notify(interrupted, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
537
		go func() {
538
			for receivedSignal := range interrupted {
539
				if err := cmd.Process.Signal(receivedSignal); err != nil {
540
					logrus.Infof("%v while attempting to forward %v to child process", err, receivedSignal)
541
				}
542
			}
543
		}()
544
		return nil
545
	}
546

547
	logrus.Debugf("Running %#v in %#v", cmd.Cmd, cmd)
548
	confwg.Add(1)
549
	go func() {
550
		_, conferr = io.Copy(pwriter, bytes.NewReader(config))
551
		pwriter.Close()
552
		confwg.Done()
553
	}()
554
	err = cmd.Run()
555
	confwg.Wait()
556
	signal.Stop(interrupted)
557
	close(interrupted)
558
	if err != nil {
559
		if exitError, ok := err.(*exec.ExitError); ok {
560
			if waitStatus, ok := exitError.ProcessState.Sys().(syscall.WaitStatus); ok {
561
				if waitStatus.Exited() {
562
					if waitStatus.ExitStatus() != 0 {
563
						fmt.Fprintf(os.Stderr, "subprocess exited with status %d\n", waitStatus.ExitStatus())
564
					}
565
					os.Exit(waitStatus.ExitStatus())
566
				} else if waitStatus.Signaled() {
567
					fmt.Fprintf(os.Stderr, "subprocess exited on %s\n", waitStatus.Signal())
568
					os.Exit(1)
569
				}
570
			}
571
		}
572
		fmt.Fprintf(os.Stderr, "process exited with error: %v", err)
573
		os.Exit(1)
574
	}
575

576
	return 0, nil
577
}
578

579
// main() for parent subprocess.  Its main job is to try to make our
580
// environment look like the one described by the runtime configuration blob,
581
// and then launch the intended command as a child.
582
func runUsingChrootExecMain() {
583
	args := os.Args[1:]
584
	var options runUsingChrootExecSubprocOptions
585
	var err error
586

587
	runtime.LockOSThread()
588

589
	// Set logging.
590
	if level := os.Getenv("LOGLEVEL"); level != "" {
591
		if ll, err := strconv.Atoi(level); err == nil {
592
			logrus.SetLevel(logrus.Level(ll))
593
		}
594
		os.Unsetenv("LOGLEVEL")
595
	}
596

597
	// Unpack our configuration.
598
	confPipe := os.NewFile(3, "confpipe")
599
	if confPipe == nil {
600
		fmt.Fprintf(os.Stderr, "error reading options pipe\n")
601
		os.Exit(1)
602
	}
603
	defer confPipe.Close()
604
	if err := json.NewDecoder(confPipe).Decode(&options); err != nil {
605
		fmt.Fprintf(os.Stderr, "error decoding options: %v\n", err)
606
		os.Exit(1)
607
	}
608

609
	// Set the hostname.  We're already in a distinct UTS namespace and are admins in the user
610
	// namespace which created it, so we shouldn't get a permissions error, but seccomp policy
611
	// might deny our attempt to call sethostname() anyway, so log a debug message for that.
612
	if options.Spec == nil || options.Spec.Process == nil {
613
		fmt.Fprintf(os.Stderr, "invalid options spec passed in\n")
614
		os.Exit(1)
615
	}
616

617
	if options.Spec.Hostname != "" {
618
		setContainerHostname(options.Spec.Hostname)
619
	}
620

621
	// Try to chroot into the root.  Do this before we potentially
622
	// block the syscall via the seccomp profile. Allow the
623
	// platform to override this - on FreeBSD, we use a simple
624
	// jail to set the hostname in the container
625
	if err := createPlatformContainer(options); err != nil {
626
		var oldst, newst unix.Stat_t
627
		if err := unix.Stat(options.Spec.Root.Path, &oldst); err != nil {
628
			fmt.Fprintf(os.Stderr, "error stat()ing intended root directory %q: %v\n", options.Spec.Root.Path, err)
629
			os.Exit(1)
630
		}
631
		if err := unix.Chdir(options.Spec.Root.Path); err != nil {
632
			fmt.Fprintf(os.Stderr, "error chdir()ing to intended root directory %q: %v\n", options.Spec.Root.Path, err)
633
			os.Exit(1)
634
		}
635
		if err := unix.Chroot(options.Spec.Root.Path); err != nil {
636
			fmt.Fprintf(os.Stderr, "error chroot()ing into directory %q: %v\n", options.Spec.Root.Path, err)
637
			os.Exit(1)
638
		}
639
		if err := unix.Stat("/", &newst); err != nil {
640
			fmt.Fprintf(os.Stderr, "error stat()ing current root directory: %v\n", err)
641
			os.Exit(1)
642
		}
643
		if oldst.Dev != newst.Dev || oldst.Ino != newst.Ino {
644
			fmt.Fprintf(os.Stderr, "unknown error chroot()ing into directory %q: %v\n", options.Spec.Root.Path, err)
645
			os.Exit(1)
646
		}
647
		logrus.Debugf("chrooted into %q", options.Spec.Root.Path)
648
	}
649

650
	// not doing because it's still shared: creating devices
651
	// not doing because it's not applicable: setting annotations
652
	// not doing because it's still shared: setting sysctl settings
653
	// not doing because cgroupfs is read only: configuring control groups
654
	// -> this means we can use the freezer to make sure there aren't any lingering processes
655
	// -> this means we ignore cgroups-based controls
656
	// not doing because we don't set any in the config: running hooks
657
	// not doing because we don't set it in the config: setting rootfs read-only
658
	// not doing because we don't set it in the config: setting rootfs propagation
659
	logrus.Debugf("setting apparmor profile")
660
	if err = setApparmorProfile(options.Spec); err != nil {
661
		fmt.Fprintf(os.Stderr, "error setting apparmor profile for process: %v\n", err)
662
		os.Exit(1)
663
	}
664
	if err = setSelinuxLabel(options.Spec); err != nil {
665
		fmt.Fprintf(os.Stderr, "error setting SELinux label for process: %v\n", err)
666
		os.Exit(1)
667
	}
668

669
	logrus.Debugf("setting resource limits")
670
	if err = setRlimits(options.Spec, false, false); err != nil {
671
		fmt.Fprintf(os.Stderr, "error setting process resource limits for process: %v\n", err)
672
		os.Exit(1)
673
	}
674

675
	// Try to change to the directory.
676
	cwd := options.Spec.Process.Cwd
677
	if !filepath.IsAbs(cwd) {
678
		cwd = "/" + cwd
679
	}
680
	cwd = filepath.Clean(cwd)
681
	if err := unix.Chdir("/"); err != nil {
682
		fmt.Fprintf(os.Stderr, "error chdir()ing into new root directory %q: %v\n", options.Spec.Root.Path, err)
683
		os.Exit(1)
684
	}
685
	if err := unix.Chdir(cwd); err != nil {
686
		fmt.Fprintf(os.Stderr, "error chdir()ing into directory %q under root %q: %v\n", cwd, options.Spec.Root.Path, err)
687
		os.Exit(1)
688
	}
689
	logrus.Debugf("changed working directory to %q", cwd)
690

691
	// Drop privileges.
692
	user := options.Spec.Process.User
693
	if len(user.AdditionalGids) > 0 {
694
		gids := make([]int, len(user.AdditionalGids))
695
		for i := range user.AdditionalGids {
696
			gids[i] = int(user.AdditionalGids[i])
697
		}
698
		logrus.Debugf("setting supplemental groups")
699
		if err = syscall.Setgroups(gids); err != nil {
700
			fmt.Fprintf(os.Stderr, "error setting supplemental groups list: %v", err)
701
			os.Exit(1)
702
		}
703
	} else {
704
		setgroups, _ := os.ReadFile("/proc/self/setgroups")
705
		if strings.Trim(string(setgroups), "\n") != "deny" {
706
			logrus.Debugf("clearing supplemental groups")
707
			if err = syscall.Setgroups([]int{}); err != nil {
708
				fmt.Fprintf(os.Stderr, "error clearing supplemental groups list: %v", err)
709
				os.Exit(1)
710
			}
711
		}
712
	}
713

714
	logrus.Debugf("setting gid")
715
	if err = unix.Setresgid(int(user.GID), int(user.GID), int(user.GID)); err != nil {
716
		fmt.Fprintf(os.Stderr, "error setting GID: %v", err)
717
		os.Exit(1)
718
	}
719

720
	if err = setSeccomp(options.Spec); err != nil {
721
		fmt.Fprintf(os.Stderr, "error setting seccomp filter for process: %v\n", err)
722
		os.Exit(1)
723
	}
724

725
	logrus.Debugf("setting capabilities")
726
	var keepCaps []string
727
	if user.UID != 0 {
728
		keepCaps = []string{"CAP_SETUID"}
729
	}
730
	if err := setCapabilities(options.Spec, keepCaps...); err != nil {
731
		fmt.Fprintf(os.Stderr, "error setting capabilities for process: %v\n", err)
732
		os.Exit(1)
733
	}
734

735
	logrus.Debugf("setting uid")
736
	if err = unix.Setresuid(int(user.UID), int(user.UID), int(user.UID)); err != nil {
737
		fmt.Fprintf(os.Stderr, "error setting UID: %v", err)
738
		os.Exit(1)
739
	}
740

741
	// Actually run the specified command.
742
	cmd := exec.Command(args[0], args[1:]...)
743
	setPdeathsig(cmd)
744
	cmd.Env = options.Spec.Process.Env
745
	cmd.Stdin, cmd.Stdout, cmd.Stderr = os.Stdin, os.Stdout, os.Stderr
746
	cmd.Dir = cwd
747
	logrus.Debugf("Running %#v (PATH = %q)", cmd, os.Getenv("PATH"))
748
	interrupted := make(chan os.Signal, 100)
749
	if err = cmd.Start(); err != nil {
750
		fmt.Fprintf(os.Stderr, "process failed to start with error: %v", err)
751
	}
752
	go func() {
753
		for range interrupted {
754
			if err := cmd.Process.Signal(syscall.SIGKILL); err != nil {
755
				logrus.Infof("%v while attempting to send SIGKILL to child process", err)
756
			}
757
		}
758
	}()
759
	signal.Notify(interrupted, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM)
760
	err = cmd.Wait()
761
	signal.Stop(interrupted)
762
	close(interrupted)
763
	if err != nil {
764
		if exitError, ok := err.(*exec.ExitError); ok {
765
			if waitStatus, ok := exitError.ProcessState.Sys().(syscall.WaitStatus); ok {
766
				if waitStatus.Exited() {
767
					if waitStatus.ExitStatus() != 0 {
768
						fmt.Fprintf(os.Stderr, "subprocess exited with status %d\n", waitStatus.ExitStatus())
769
					}
770
					os.Exit(waitStatus.ExitStatus())
771
				} else if waitStatus.Signaled() {
772
					fmt.Fprintf(os.Stderr, "subprocess exited on %s\n", waitStatus.Signal())
773
					os.Exit(1)
774
				}
775
			}
776
		}
777
		fmt.Fprintf(os.Stderr, "process exited with error: %v", err)
778
		os.Exit(1)
779
	}
780
}
781

782
// parses the resource limits for ourselves and any processes that
783
// we'll start into a format that's more in line with the kernel APIs
784
func parseRlimits(spec *specs.Spec) (map[int]unix.Rlimit, error) {
785
	if spec.Process == nil {
786
		return nil, nil
787
	}
788
	parsed := make(map[int]unix.Rlimit)
789
	for _, limit := range spec.Process.Rlimits {
790
		resource, recognized := rlimitsMap[strings.ToUpper(limit.Type)]
791
		if !recognized {
792
			return nil, fmt.Errorf("parsing limit type %q", limit.Type)
793
		}
794
		parsed[resource] = makeRlimit(limit)
795
	}
796
	return parsed, nil
797
}
798

799
// setRlimits sets any resource limits that we want to apply to processes that
800
// we'll start.
801
func setRlimits(spec *specs.Spec, onlyLower, onlyRaise bool) error {
802
	limits, err := parseRlimits(spec)
803
	if err != nil {
804
		return err
805
	}
806
	for resource, desired := range limits {
807
		var current unix.Rlimit
808
		if err := unix.Getrlimit(resource, &current); err != nil {
809
			return fmt.Errorf("reading %q limit: %w", rlimitsReverseMap[resource], err)
810
		}
811
		if desired.Max > current.Max && onlyLower {
812
			// this would raise a hard limit, and we're only here to lower them
813
			continue
814
		}
815
		if desired.Max < current.Max && onlyRaise {
816
			// this would lower a hard limit, and we're only here to raise them
817
			continue
818
		}
819
		if err := unix.Setrlimit(resource, &desired); err != nil {
820
			return fmt.Errorf("setting %q limit to soft=%d,hard=%d (was soft=%d,hard=%d): %w", rlimitsReverseMap[resource], desired.Cur, desired.Max, current.Cur, current.Max, err)
821
		}
822
	}
823
	return nil
824
}
825

826
func isDevNull(dev os.FileInfo) bool {
827
	if dev.Mode()&os.ModeCharDevice != 0 {
828
		stat, _ := dev.Sys().(*syscall.Stat_t)
829
		nullStat := syscall.Stat_t{}
830
		if err := syscall.Stat(os.DevNull, &nullStat); err != nil {
831
			logrus.Warnf("unable to stat /dev/null: %v", err)
832
			return false
833
		}
834
		if stat.Rdev == nullStat.Rdev {
835
			return true
836
		}
837
	}
838
	return false
839
}
840

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.