istio

Форк
0
/
implementation_linux.go 
277 строк · 12.0 Кб
1
// Copyright Istio Authors
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
package dependencies
16

17
import (
18
	"bytes"
19
	"fmt"
20
	"io"
21
	"os/exec"
22
	"runtime"
23
	"strings"
24
	"syscall"
25

26
	netns "github.com/containernetworking/plugins/pkg/ns"
27
	"golang.org/x/sys/unix"
28
	utilversion "k8s.io/apimachinery/pkg/util/version"
29

30
	"istio.io/istio/pkg/log"
31
	"istio.io/istio/tools/istio-iptables/pkg/constants"
32
)
33

34
// TODO the entire `istio-iptables` package is linux-specific, I'm not sure we really need
35
// platform-differentiators for the `dependencies` package itself.
36

37
// NoLocks returns true if this version does not use or support locks
38
func (v IptablesVersion) NoLocks() bool {
39
	// nf_tables does not use locks
40
	// legacy added locks in 1.6.2
41
	return !v.Legacy || v.Version.LessThan(IptablesRestoreLocking)
42
}
43

44
var (
45
	// IptablesRestoreLocking is the version where locking and -w is added to iptables-restore
46
	IptablesRestoreLocking = utilversion.MustParseGeneric("1.6.2")
47
	// IptablesLockfileEnv is the version where XTABLES_LOCKFILE is added to iptables.
48
	IptablesLockfileEnv = utilversion.MustParseGeneric("1.8.6")
49
)
50

51
func shouldUseBinaryForCurrentContext(iptablesBin string) (IptablesVersion, error) {
52
	// We assume that whatever `iptablesXXX` binary you pass us also has a `iptablesXXX-save` and `iptablesXXX-restore`
53
	// binary - which should always be true for any valid iptables installation
54
	// (we use both in our iptables code later on anyway)
55
	//
56
	// We could explicitly check for all 3 every time to be sure, but that's likely not necessary,
57
	// if we find one unless the host OS is badly broken we will find the others.
58
	iptablesSaveBin := fmt.Sprintf("%s-save", iptablesBin)
59
	iptablesRestoreBin := fmt.Sprintf("%s-restore", iptablesBin)
60
	var parsedVer *utilversion.Version
61
	var isNft bool
62
	// does the "xx-save" binary exist?
63
	rulesDump, binExistsErr := exec.Command(iptablesSaveBin).CombinedOutput()
64
	if binExistsErr != nil {
65
		return IptablesVersion{}, fmt.Errorf("binary %s not found in path: %w", iptablesSaveBin, binExistsErr)
66
	}
67

68
	// Binary is there, so try to parse version
69
	verCmd := exec.Command(iptablesSaveBin, "--version")
70
	// shockingly, `iptables-save` returns 0 if you pass it an unrecognized/bad option, so
71
	// `os/exec` will return a *nil* error, even if the command fails. So, we must slurp stderr, and check it to
72
	// see if the command *actually* failed due to not recognizing the version flag.
73
	var verStdOut bytes.Buffer
74
	var verStdErr bytes.Buffer
75
	verCmd.Stdout = &verStdOut
76
	verCmd.Stderr = &verStdErr
77

78
	verExec := verCmd.Run()
79
	if verExec == nil && !strings.Contains(verStdErr.String(), "unrecognized option") {
80
		var parseErr error
81
		// we found the binary - extract the version, then try to detect if rules already exist for that variant
82
		parsedVer, parseErr = parseIptablesVer(verStdOut.String())
83
		if parseErr != nil {
84
			return IptablesVersion{}, fmt.Errorf("iptables version %q is not a valid version string: %v", verStdOut.Bytes(), parseErr)
85
		}
86
		// Legacy will have no marking or 'legacy', so just look for nf_tables
87
		isNft = strings.Contains(verStdOut.String(), "nf_tables")
88
	} else {
89
		log.Warnf("found iptables binary %s, but it does not appear to support the '--version' flag, assuming very old legacy version", iptablesSaveBin)
90
		// Some really old iptables-legacy-save versions (1.6.1, ubuntu bionic) don't support any arguments at all, including `--version`
91
		// So if we get here, we found `iptables-save` in PATH, but it's too outdated to understand `--version`.
92
		//
93
		// We can eventually remove this.
94
		//
95
		// So assume it's legacy/an unknown version, but assume we can use it since it's in PATH
96
		parsedVer = utilversion.MustParseGeneric("0.0.0")
97
		isNft = false
98
	}
99

100
	// if binary seems to exist, check the dump of rules in our netns, and see if any rules exist there
101
	// Note that this is highly dependent on context.
102
	// new pod netns? probably no rules. Hostnetns? probably rules
103
	// So this is mostly just a "hint"/heuristic as to which version we should be using, if more than one binary is present.
104
	// `xx-save` should return _no_ output (0 lines) if no rules are defined in this netns for that binary variant.
105
	// `xx-save` should return at least 3 output lines if at least one rule is defined in this netns for that binary variant.
106
	existingRules := false
107
	if strings.Count(string(rulesDump), "\n") >= 3 {
108
		existingRules = true
109
		log.Debugf("found existing rules for %s", iptablesSaveBin)
110
	}
111
	return IptablesVersion{
112
		DetectedBinary:        iptablesBin,
113
		DetectedSaveBinary:    iptablesSaveBin,
114
		DetectedRestoreBinary: iptablesRestoreBin,
115
		Version:               parsedVer,
116
		Legacy:                !isNft,
117
		ExistingRules:         existingRules,
118
	}, nil
119
}
120

121
// runInSandbox builds a lightweight sandbox ("container") to build a suitable environment to run iptables commands in.
122
// This is used in CNI, where commands are executed from the host but from within the container network namespace.
123
// This puts us in somewhat unconventionally territory.
124
func runInSandbox(lockFile string, f func() error) error {
125
	chErr := make(chan error, 1)
126
	n, nerr := netns.GetCurrentNS()
127
	if nerr != nil {
128
		return fmt.Errorf("failed to get current namespace: %v", nerr)
129
	}
130
	// setupSandbox builds the sandbox.
131
	setupSandbox := func() error {
132
		// First, unshare the mount namespace. This allows us to create custom mounts without impacting the host
133
		if err := unix.Unshare(unix.CLONE_NEWNS); err != nil {
134
			return fmt.Errorf("failed to unshare to new mount namespace: %v", err)
135
		}
136
		if err := n.Set(); err != nil {
137
			return fmt.Errorf("failed to reset network namespace: %v", err)
138
		}
139
		// Remount / as a private mount so that our mounts do not impact outside the namespace
140
		// (see https://unix.stackexchange.com/questions/246312/why-is-my-bind-mount-visible-outside-its-mount-namespace).
141
		if err := unix.Mount("", "/", "", unix.MS_PRIVATE|unix.MS_REC, ""); err != nil {
142
			return fmt.Errorf("failed to remount /: %v", err)
143
		}
144
		// In CNI, we are running the pod network namespace, but the host filesystem. Locking the host is both useless and harmful,
145
		// as it opens the risk of lock contention with other node actors (such as kube-proxy), and isn't actually needed at all.
146
		// Older iptables cannot turn off the lock explicitly, so we hack around it...
147
		// Overwrite the lock file with the network namespace file (which is assumed to be unique).
148
		// We are setting the lockfile to `r.NetworkNamespace`.
149
		// /dev/null looks like a good option, but actually doesn't work (it will ensure only one actor can access it)
150
		if lockFile != "" {
151
			if err := mount(lockFile, "/run/xtables.lock"); err != nil {
152
				return fmt.Errorf("bind mount of %q failed: %v", lockFile, err)
153
			}
154
		}
155

156
		// In some setups, iptables can make remote network calls(!!). Since these come from a partially initialized pod network namespace,
157
		// these calls can be blocked (or NetworkPolicy, etc could block them anyways).
158
		// This is triggered by NSS, which allows various things to use arbitrary code to lookup configuration that typically comes from files.
159
		// In our case, the culprit is the `xt_owner` (`-m owner`) module in iptables calls the `passwd` service to lookup the user.
160
		// To disallow this, bindmount /dev/null over nsswitch.conf so this never happens.
161
		// This should be safe to do, even if the user has an nsswitch entry that would work fine: we always use a numeric ID
162
		// so the passwd lookup doesn't need to succeed at all for Istio to function.
163
		// Effectively, we want a mini-container. In fact, running in a real container would be ideal but it is hard to do portably.
164
		// See https://github.com/istio/istio/issues/48416 for a real world example of this case.
165
		if err := mount("/dev/null", "/etc/nsswitch.conf"); err != nil {
166
			return fmt.Errorf("bind mount to %q failed: %v", "/etc/nsswitch.conf", err)
167
		}
168
		return nil
169
	}
170

171
	executed := false
172
	// Once we call unshare(CLONE_NEWNS), we cannot undo it explicitly. Instead, we need to unshare on a specific thread,
173
	// then kill that thread when we are done (or rather, let Go runtime kill the thread).
174
	// Unfortunately, making a new thread breaks us out of the network namespace we entered previously, so we need to restore that as well
175
	go func() {
176
		chErr <- func() error {
177
			// We now have exclusive access to this thread. Once the goroutine exits without calling UnlockOSThread, the go runtime will kill the thread for us
178
			// Warning: Do not call UnlockOSThread! Notably, netns.Do does call this.
179
			runtime.LockOSThread()
180
			if err := setupSandbox(); err != nil {
181
				return err
182
			}
183
			// Mark we have actually run the command. This lets us distinguish from a failure in setupSandbox() vs f()
184
			executed = true
185
			return f()
186
		}()
187
	}()
188
	err := <-chErr
189
	if err != nil && !executed {
190
		// We failed to setup the environment. Now we go into best effort mode.
191
		// Users running into this may have IPTables lock used unexpectedly or make unexpected NSS calls.
192
		// This is to support environments with restrictive access (from SELinux, but possibly others) that block these calls
193
		// See https://github.com/istio/istio/issues/48746
194
		log.Warnf("failed to setup execution environment, attempting to continue anyways: %v", err)
195
		// Try to execute as-is
196
		return f()
197
	}
198
	// Otherwise, we did execute; return the error from that execution.
199
	return err
200
}
201

202
func mount(src, dst string) error {
203
	return syscall.Mount(src, dst, "", syscall.MS_BIND|syscall.MS_RDONLY, "")
204
}
205

206
func (r *RealDependencies) executeXTables(cmd constants.IptablesCmd, iptVer *IptablesVersion, ignoreErrors bool, stdin io.ReadSeeker, args ...string) error {
207
	mode := "without lock"
208
	cmdBin := iptVer.CmdToString(cmd)
209
	if cmdBin == "" {
210
		return fmt.Errorf("called without iptables binary, cannot execute!: %+v", iptVer)
211
	}
212
	var c *exec.Cmd
213
	needLock := iptVer.IsWriteCmd(cmd) && !iptVer.NoLocks()
214
	run := func(c *exec.Cmd) error {
215
		return c.Run()
216
	}
217
	if r.CNIMode {
218
		c = exec.Command(cmdBin, args...)
219
		// In CNI, we are running the pod network namespace, but the host filesystem, so we need to do some tricks
220
		// Call our binary again, but with <original binary> "unshare (subcommand to trigger mounts)" --lock-file=<network namespace> <original command...>
221
		// We do not shell out and call `mount` since this and sh are not available on all systems
222
		var lockFile string
223
		if needLock {
224
			if iptVer.Version.LessThan(IptablesLockfileEnv) {
225
				mode = "without lock by mount and nss"
226
				lockFile = r.NetworkNamespace
227
			} else {
228
				mode = "without lock by env and nss"
229
				c.Env = append(c.Env, "XTABLES_LOCKFILE="+r.NetworkNamespace)
230
			}
231
		} else {
232
			mode = "without nss"
233
		}
234

235
		run = func(c *exec.Cmd) error {
236
			return runInSandbox(lockFile, func() error {
237
				return c.Run()
238
			})
239
		}
240
	} else {
241
		if needLock {
242
			// We want the lock. Wait up to 30s for it.
243
			args = append(args, "--wait=30")
244
			c = exec.Command(cmdBin, args...)
245
			log.Debugf("running with lock")
246
			mode = "with wait lock"
247
		} else {
248
			// No locking supported/needed, just run as is. Nothing special
249
			c = exec.Command(cmdBin, args...)
250
		}
251
	}
252

253
	log.Infof("Running command (%s): %s %s", mode, cmdBin, strings.Join(args, " "))
254
	stdout := &bytes.Buffer{}
255
	stderr := &bytes.Buffer{}
256
	c.Stdout = stdout
257
	c.Stderr = stderr
258
	c.Stdin = stdin
259
	err := run(c)
260
	if len(stdout.String()) != 0 {
261
		log.Infof("Command output: \n%v", stdout.String())
262
	}
263

264
	// TODO Check naming and redirection logic
265
	if (err != nil || len(stderr.String()) != 0) && !ignoreErrors {
266
		stderrStr := stderr.String()
267

268
		// Transform to xtables-specific error messages with more useful and actionable hints.
269
		if err != nil {
270
			stderrStr = transformToXTablesErrorMessage(stderrStr, err)
271
		}
272

273
		log.Errorf("Command error output: %v", stderrStr)
274
	}
275

276
	return err
277
}
278

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.