podman

Форк
0
/
kube.go 
1390 строк · 44.1 Кб
1
//go:build !remote
2

3
package libpod
4

5
import (
6
	"context"
7
	"errors"
8
	"fmt"
9
	"math/rand"
10
	"os"
11
	"reflect"
12
	"sort"
13
	"strconv"
14
	"strings"
15
	"time"
16

17
	"github.com/containers/common/libnetwork/types"
18
	"github.com/containers/common/pkg/config"
19
	"github.com/containers/podman/v5/libpod/define"
20
	"github.com/containers/podman/v5/pkg/domain/entities"
21
	"github.com/containers/podman/v5/pkg/env"
22
	v1 "github.com/containers/podman/v5/pkg/k8s.io/api/core/v1"
23
	"github.com/containers/podman/v5/pkg/k8s.io/apimachinery/pkg/api/resource"
24
	v12 "github.com/containers/podman/v5/pkg/k8s.io/apimachinery/pkg/apis/meta/v1"
25
	"github.com/containers/podman/v5/pkg/k8s.io/apimachinery/pkg/util/intstr"
26
	"github.com/containers/podman/v5/pkg/lookup"
27
	"github.com/containers/podman/v5/pkg/namespaces"
28
	"github.com/containers/podman/v5/pkg/specgen"
29
	"github.com/containers/podman/v5/pkg/util"
30
	"github.com/opencontainers/runtime-spec/specs-go"
31
	"github.com/sirupsen/logrus"
32
	"golang.org/x/exp/slices"
33
)
34

35
// GenerateForKube takes a slice of libpod containers and generates
36
// one v1.Pod description that includes just a single container.
37
func GenerateForKube(ctx context.Context, ctrs []*Container, getService, podmanOnly bool) (*v1.Pod, error) {
38
	// Generate the v1.Pod yaml description
39
	return simplePodWithV1Containers(ctx, ctrs, getService, podmanOnly)
40
}
41

42
// GenerateForKube takes a slice of libpod containers and generates
43
// one v1.Pod description
44
func (p *Pod) GenerateForKube(ctx context.Context, getService, podmanOnly bool) (*v1.Pod, []v1.ServicePort, error) {
45
	// Generate the v1.Pod yaml description
46
	var (
47
		ports        []v1.ContainerPort
48
		servicePorts []v1.ServicePort
49
	)
50

51
	allContainers, err := p.allContainers()
52
	if err != nil {
53
		return nil, servicePorts, err
54
	}
55
	// If the pod has no containers, no sense to generate YAML
56
	if len(allContainers) == 0 {
57
		return nil, servicePorts, fmt.Errorf("pod %s has no containers", p.ID())
58
	}
59
	// If only an infra container is present, makes no sense to generate YAML
60
	if len(allContainers) == 1 && p.HasInfraContainer() {
61
		return nil, servicePorts, fmt.Errorf("pod %s only has an infra container", p.ID())
62
	}
63

64
	extraHost := make([]v1.HostAlias, 0)
65
	hostNetwork := false
66
	hostUsers := true
67
	infraName := ""
68
	if p.HasInfraContainer() {
69
		infraContainer, err := p.getInfraContainer()
70
		if err != nil {
71
			return nil, servicePorts, err
72
		}
73
		for _, host := range infraContainer.config.ContainerNetworkConfig.HostAdd {
74
			hostname, ip, hasIP := strings.Cut(host, ":")
75
			if !hasIP {
76
				return nil, servicePorts, errors.New("invalid hostAdd")
77
			}
78
			extraHost = append(extraHost, v1.HostAlias{
79
				IP:        ip,
80
				Hostnames: []string{hostname},
81
			})
82
		}
83
		ports, err = portMappingToContainerPort(infraContainer.config.PortMappings, getService)
84
		if err != nil {
85
			return nil, servicePorts, err
86
		}
87
		spState := newServicePortState()
88
		servicePorts, err = spState.containerPortsToServicePorts(ports)
89
		if err != nil {
90
			return nil, servicePorts, err
91
		}
92
		hostNetwork = infraContainer.NetworkMode() == string(namespaces.NetworkMode(specgen.Host))
93
		hostUsers = infraContainer.IDMappings().HostUIDMapping && infraContainer.IDMappings().HostGIDMapping
94
		infraName = infraContainer.config.Name
95
	}
96
	pod, err := p.podWithContainers(ctx, allContainers, ports, hostNetwork, hostUsers, getService, podmanOnly, infraName)
97
	if err != nil {
98
		return nil, servicePorts, err
99
	}
100
	pod.Spec.HostAliases = extraHost
101

102
	// Set the pod's restart policy
103
	pod.Spec.RestartPolicy = getPodRestartPolicy(p.config.RestartPolicy)
104

105
	if p.SharesPID() {
106
		// unfortunately, go doesn't have a nice way to specify a pointer to a bool
107
		b := true
108
		pod.Spec.ShareProcessNamespace = &b
109
	}
110

111
	return pod, servicePorts, nil
112
}
113

114
func (p *Pod) getInfraContainer() (*Container, error) {
115
	infraID, err := p.InfraContainerID()
116
	if err != nil {
117
		return nil, err
118
	}
119
	return p.runtime.GetContainer(infraID)
120
}
121

122
func GenerateForKubeDaemonSet(ctx context.Context, pod *YAMLPod, options entities.GenerateKubeOptions) (*YAMLDaemonSet, error) {
123
	// Restart policy for DaemonSets can only be set to Always
124
	if !(pod.Spec.RestartPolicy == "" || pod.Spec.RestartPolicy == v1.RestartPolicyAlways) {
125
		return nil, fmt.Errorf("k8s DaemonSets can only have restartPolicy set to Always")
126
	}
127

128
	// Error out if the user tries to set replica count
129
	if options.Replicas > 1 {
130
		return nil, fmt.Errorf("k8s DaemonSets don't allow setting replicas")
131
	}
132

133
	// Create label map that will be added to podSpec and DaemonSet metadata
134
	// The matching label lets the daemonset know which pod to manage
135
	appKey := "app"
136
	matchLabels := map[string]string{appKey: pod.Name}
137
	// Add the key:value (app:pod-name) to the podSpec labels
138
	if pod.Labels == nil {
139
		pod.Labels = matchLabels
140
	} else {
141
		pod.Labels[appKey] = pod.Name
142
	}
143

144
	depSpec := YAMLDaemonSetSpec{
145
		DaemonSetSpec: v1.DaemonSetSpec{
146
			Selector: &v12.LabelSelector{
147
				MatchLabels: matchLabels,
148
			},
149
		},
150
		Template: &YAMLPodTemplateSpec{
151
			PodTemplateSpec: v1.PodTemplateSpec{
152
				ObjectMeta: pod.ObjectMeta,
153
			},
154
			Spec: pod.Spec,
155
		},
156
	}
157

158
	// Create the DaemonSet object
159
	dep := YAMLDaemonSet{
160
		DaemonSet: v1.DaemonSet{
161
			ObjectMeta: v12.ObjectMeta{
162
				Name:              pod.Name + "-daemonset",
163
				CreationTimestamp: pod.CreationTimestamp,
164
				Labels:            pod.Labels,
165
			},
166
			TypeMeta: v12.TypeMeta{
167
				Kind:       "DaemonSet",
168
				APIVersion: "apps/v1",
169
			},
170
		},
171
		Spec: &depSpec,
172
	}
173

174
	return &dep, nil
175
}
176

177
// GenerateForKubeDeployment returns a YAMLDeployment from a YAMLPod that is then used to create a kubernetes Deployment
178
// kind YAML.
179
func GenerateForKubeDeployment(ctx context.Context, pod *YAMLPod, options entities.GenerateKubeOptions) (*YAMLDeployment, error) {
180
	// Restart policy for Deployments can only be set to Always
181
	if options.Type == define.K8sKindDeployment && !(pod.Spec.RestartPolicy == "" || pod.Spec.RestartPolicy == v1.RestartPolicyAlways) {
182
		return nil, fmt.Errorf("k8s Deployments can only have restartPolicy set to Always")
183
	}
184

185
	// Create label map that will be added to podSpec and Deployment metadata
186
	// The matching label lets the deployment know which pods to manage
187
	appKey := "app"
188
	matchLabels := map[string]string{appKey: pod.Name}
189
	// Add the key:value (app:pod-name) to the podSpec labels
190
	if pod.Labels == nil {
191
		pod.Labels = matchLabels
192
	} else {
193
		pod.Labels[appKey] = pod.Name
194
	}
195

196
	depSpec := YAMLDeploymentSpec{
197
		DeploymentSpec: v1.DeploymentSpec{
198
			Selector: &v12.LabelSelector{
199
				MatchLabels: matchLabels,
200
			},
201
		},
202
		Template: &YAMLPodTemplateSpec{
203
			PodTemplateSpec: v1.PodTemplateSpec{
204
				ObjectMeta: pod.ObjectMeta,
205
			},
206
			Spec: pod.Spec,
207
		},
208
	}
209

210
	// Add replicas count if user adds replica number with --replicas flag and is greater than 1
211
	// If replicas is set to 1, no need to add it to the generated yaml as k8s automatically defaults
212
	// to that. Podman as sets replicas to 1 by default.
213
	if options.Replicas > 1 {
214
		depSpec.Replicas = &options.Replicas
215
	}
216

217
	// Create the Deployment object
218
	dep := YAMLDeployment{
219
		Deployment: v1.Deployment{
220
			ObjectMeta: v12.ObjectMeta{
221
				Name:              pod.Name + "-deployment",
222
				CreationTimestamp: pod.CreationTimestamp,
223
				Labels:            pod.Labels,
224
			},
225
			TypeMeta: v12.TypeMeta{
226
				Kind:       "Deployment",
227
				APIVersion: "apps/v1",
228
			},
229
		},
230
		Spec: &depSpec,
231
	}
232

233
	return &dep, nil
234
}
235

236
// GenerateForKube generates a v1.PersistentVolumeClaim from a libpod volume.
237
func (v *Volume) GenerateForKube() *v1.PersistentVolumeClaim {
238
	annotations := make(map[string]string)
239
	annotations[util.VolumeDriverAnnotation] = v.Driver()
240

241
	for k, v := range v.Options() {
242
		switch k {
243
		case "o":
244
			annotations[util.VolumeMountOptsAnnotation] = v
245
		case "device":
246
			annotations[util.VolumeDeviceAnnotation] = v
247
		case "type":
248
			annotations[util.VolumeTypeAnnotation] = v
249
		case "UID":
250
			annotations[util.VolumeUIDAnnotation] = v
251
		case "GID":
252
			annotations[util.VolumeGIDAnnotation] = v
253
		}
254
	}
255

256
	return &v1.PersistentVolumeClaim{
257
		TypeMeta: v12.TypeMeta{
258
			Kind:       "PersistentVolumeClaim",
259
			APIVersion: "v1",
260
		},
261
		ObjectMeta: v12.ObjectMeta{
262
			Name:              v.Name(),
263
			Labels:            v.Labels(),
264
			Annotations:       annotations,
265
			CreationTimestamp: v12.Now(),
266
		},
267
		Spec: v1.PersistentVolumeClaimSpec{
268
			Resources: v1.ResourceRequirements{
269
				Requests: map[v1.ResourceName]resource.Quantity{
270
					v1.ResourceStorage: resource.MustParse("1Gi"),
271
				},
272
			},
273
			AccessModes: []v1.PersistentVolumeAccessMode{
274
				v1.ReadWriteOnce,
275
			},
276
		},
277
	}
278
}
279

280
// YAMLPodSpec represents the same k8s API core PodSpec struct with a small
281
// change and that is having Containers as a pointer to YAMLContainer.
282
// Because Go doesn't omit empty struct and we want to omit Status in YAML
283
// if it's empty. Fixes: GH-11998
284
type YAMLPodSpec struct {
285
	v1.PodSpec
286
	Containers []*YAMLContainer `json:"containers"`
287
}
288

289
// YAMLPod represents the same k8s API core Pod struct with a small
290
// change and that is having Spec as a pointer to YAMLPodSpec and
291
// Status as a pointer to k8s API core PodStatus.
292
// Because Go doesn't omit empty struct and we want to omit Status in YAML
293
// if it's empty. Fixes: GH-11998
294
type YAMLPod struct {
295
	v1.Pod
296
	Spec   *YAMLPodSpec  `json:"spec,omitempty"`
297
	Status *v1.PodStatus `json:"status,omitempty"`
298
}
299

300
// YAMLPodTemplateSpec represents the same k8s API core PodTemplateStruct with a
301
// small change and that is having Spec as a pointer to YAMLPodSpec.
302
// Because Go doesn't omit empty struct and we want to omit any empty structs in the
303
// Pod yaml. This is used when generating a Deployment kind.
304
type YAMLPodTemplateSpec struct {
305
	v1.PodTemplateSpec
306
	Spec *YAMLPodSpec `json:"spec,omitempty"`
307
}
308

309
// YAMLDeploymentSpec represents the same k8s API core DeploymentSpec with a small
310
// change and that is having Template as a pointer to YAMLPodTemplateSpec and Strategy
311
// as a pointer to k8s API core DeploymentStrategy.
312
// Because Go doesn't omit empty struct and we want to omit Strategy and any fields in the Pod YAML
313
// if it's empty.
314
type YAMLDeploymentSpec struct {
315
	v1.DeploymentSpec
316
	Template *YAMLPodTemplateSpec   `json:"template,omitempty"`
317
	Strategy *v1.DeploymentStrategy `json:"strategy,omitempty"`
318
}
319

320
// YAMLDaemonSetSpec represents the same k8s API core DeploymentSpec with a small
321
// change and that is having Template as a pointer to YAMLPodTemplateSpec and Strategy
322
// as a pointer to k8s API core DaemonSetStrategy.
323
// Because Go doesn't omit empty struct and we want to omit Strategy and any fields in the Pod YAML
324
// if it's empty.
325
type YAMLDaemonSetSpec struct {
326
	v1.DaemonSetSpec
327
	Template *YAMLPodTemplateSpec        `json:"template,omitempty"`
328
	Strategy *v1.DaemonSetUpdateStrategy `json:"strategy,omitempty"`
329
}
330

331
// YAMLDaemonSet represents the same k8s API core DaemonSet with a small change
332
// and that is having Spec as a pointer to YAMLDaemonSetSpec and Status as a pointer to
333
// k8s API core DaemonSetStatus.
334
// Because Go doesn't omit empty struct and we want to omit Status and any fields in the DaemonSetSpec
335
// if it's empty.
336
type YAMLDaemonSet struct {
337
	v1.DaemonSet
338
	Spec   *YAMLDaemonSetSpec  `json:"spec,omitempty"`
339
	Status *v1.DaemonSetStatus `json:"status,omitempty"`
340
}
341

342
// YAMLDeployment represents the same k8s API core Deployment with a small change
343
// and that is having Spec as a pointer to YAMLDeploymentSpec and Status as a pointer to
344
// k8s API core DeploymentStatus.
345
// Because Go doesn't omit empty struct and we want to omit Status and any fields in the DeploymentSpec
346
// if it's empty.
347
type YAMLDeployment struct {
348
	v1.Deployment
349
	Spec   *YAMLDeploymentSpec  `json:"spec,omitempty"`
350
	Status *v1.DeploymentStatus `json:"status,omitempty"`
351
}
352

353
// YAMLService represents the same k8s API core Service struct with a small
354
// change and that is having Status as a pointer to k8s API core ServiceStatus.
355
// Because Go doesn't omit empty struct and we want to omit Status in YAML
356
// if it's empty. Fixes: GH-11998
357
type YAMLService struct {
358
	v1.Service
359
	Status *v1.ServiceStatus `json:"status,omitempty"`
360
}
361

362
// YAMLContainer represents the same k8s API core Container struct with a small
363
// change and that is having Resources as a pointer to k8s API core ResourceRequirements.
364
// Because Go doesn't omit empty struct and we want to omit Status in YAML
365
// if it's empty. Fixes: GH-11998
366
type YAMLContainer struct {
367
	v1.Container
368
	Resources *v1.ResourceRequirements `json:"resources,omitempty"`
369
}
370

371
// ConvertV1PodToYAMLPod takes k8s API core Pod and returns a pointer to YAMLPod
372
func ConvertV1PodToYAMLPod(pod *v1.Pod) *YAMLPod {
373
	cs := []*YAMLContainer{}
374
	for _, cc := range pod.Spec.Containers {
375
		var res *v1.ResourceRequirements
376
		if len(cc.Resources.Limits) > 0 || len(cc.Resources.Requests) > 0 {
377
			res = &cc.Resources
378
		}
379
		cs = append(cs, &YAMLContainer{Container: cc, Resources: res})
380
	}
381
	mpo := &YAMLPod{Pod: *pod}
382
	mpo.Spec = &YAMLPodSpec{PodSpec: pod.Spec, Containers: cs}
383
	for _, ctr := range pod.Spec.Containers {
384
		if ctr.SecurityContext == nil || ctr.SecurityContext.SELinuxOptions == nil {
385
			continue
386
		}
387
		selinuxOpts := ctr.SecurityContext.SELinuxOptions
388
		if selinuxOpts.User == "" && selinuxOpts.Role == "" && selinuxOpts.Type == "" && selinuxOpts.Level == "" && selinuxOpts.FileType == "" {
389
			ctr.SecurityContext.SELinuxOptions = nil
390
		}
391
	}
392
	dnsCfg := pod.Spec.DNSConfig
393
	if dnsCfg != nil && (len(dnsCfg.Nameservers)+len(dnsCfg.Searches)+len(dnsCfg.Options) > 0) {
394
		mpo.Spec.DNSConfig = dnsCfg
395
	}
396
	status := pod.Status
397
	if status.Phase != "" || len(status.Conditions) > 0 ||
398
		status.Message != "" || status.Reason != "" ||
399
		status.NominatedNodeName != "" || status.HostIP != "" ||
400
		status.PodIP != "" || status.StartTime != nil ||
401
		len(status.InitContainerStatuses) > 0 || len(status.ContainerStatuses) > 0 || status.QOSClass != "" || len(status.EphemeralContainerStatuses) > 0 {
402
		mpo.Status = &status
403
	}
404
	return mpo
405
}
406

407
// GenerateKubeServiceFromV1Pod creates a v1 service object from a v1 pod object
408
func GenerateKubeServiceFromV1Pod(pod *v1.Pod, servicePorts []v1.ServicePort) (YAMLService, error) {
409
	service := YAMLService{}
410
	selector := make(map[string]string)
411
	selector["app"] = pod.Labels["app"]
412
	ports := servicePorts
413
	if len(ports) == 0 {
414
		p, err := containersToServicePorts(pod.Spec.Containers)
415
		if err != nil {
416
			return service, err
417
		}
418
		ports = p
419
	}
420
	serviceSpec := v1.ServiceSpec{
421
		Ports:    ports,
422
		Selector: selector,
423
		Type:     v1.ServiceTypeNodePort,
424
	}
425
	service.Spec = serviceSpec
426
	service.ObjectMeta = pod.ObjectMeta
427
	// Reset the annotations for the service as the pod annotations are not needed for the service
428
	service.ObjectMeta.Annotations = nil
429
	tm := v12.TypeMeta{
430
		Kind:       "Service",
431
		APIVersion: pod.TypeMeta.APIVersion,
432
	}
433
	service.TypeMeta = tm
434
	return service, nil
435
}
436

437
// servicePortState allows calling containerPortsToServicePorts for a single service
438
type servicePortState struct {
439
	// A program using the shared math/rand state with the default seed will produce the same sequence of pseudo-random numbers
440
	// for each execution. Use a private RNG state not to interfere with other users.
441
	rng       *rand.Rand
442
	usedPorts map[int]struct{}
443
}
444

445
func newServicePortState() servicePortState {
446
	return servicePortState{
447
		rng:       rand.New(rand.NewSource(time.Now().UnixNano())),
448
		usedPorts: map[int]struct{}{},
449
	}
450
}
451

452
// containerPortsToServicePorts takes a slice of containerports and generates a
453
// slice of service ports
454
func (state *servicePortState) containerPortsToServicePorts(containerPorts []v1.ContainerPort) ([]v1.ServicePort, error) {
455
	sps := make([]v1.ServicePort, 0, len(containerPorts))
456
	for _, cp := range containerPorts {
457
		var nodePort int
458
		attempt := 0
459
		for {
460
			// Legal nodeport range is 30000-32767
461
			nodePort = 30000 + state.rng.Intn(32767-30000+1)
462
			if _, found := state.usedPorts[nodePort]; !found {
463
				state.usedPorts[nodePort] = struct{}{}
464
				break
465
			}
466
			attempt++
467
			if attempt >= 100 {
468
				return nil, fmt.Errorf("too many attempts trying to generate a unique NodePort number")
469
			}
470
		}
471
		servicePort := v1.ServicePort{
472
			Protocol:   cp.Protocol,
473
			Port:       cp.ContainerPort,
474
			NodePort:   int32(nodePort),
475
			Name:       strconv.Itoa(int(cp.ContainerPort)),
476
			TargetPort: intstr.Parse(strconv.Itoa(int(cp.ContainerPort))),
477
		}
478
		sps = append(sps, servicePort)
479
	}
480
	return sps, nil
481
}
482

483
// containersToServicePorts takes a slice of v1.Containers and generates an
484
// inclusive list of serviceports to expose
485
func containersToServicePorts(containers []v1.Container) ([]v1.ServicePort, error) {
486
	state := newServicePortState()
487
	sps := make([]v1.ServicePort, 0, len(containers))
488
	for _, ctr := range containers {
489
		ports, err := state.containerPortsToServicePorts(ctr.Ports)
490
		if err != nil {
491
			return nil, err
492
		}
493
		sps = append(sps, ports...)
494
	}
495
	return sps, nil
496
}
497

498
func (p *Pod) podWithContainers(ctx context.Context, containers []*Container, ports []v1.ContainerPort, hostNetwork, hostUsers, getService, podmanOnly bool, infraName string) (*v1.Pod, error) {
499
	deDupPodVolumes := make(map[string]*v1.Volume)
500
	first := true
501
	podContainers := make([]v1.Container, 0, len(containers))
502
	podInitCtrs := []v1.Container{}
503
	podAnnotations := make(map[string]string)
504
	dnsInfo := v1.PodDNSConfig{}
505
	var (
506
		hostname    string
507
		stopTimeout *uint
508
	)
509

510
	// Let's sort the containers in order of created time
511
	// This will ensure that the init containers are defined in the correct order in the kube yaml
512
	sort.Slice(containers, func(i, j int) bool { return containers[i].CreatedTime().Before(containers[j].CreatedTime()) })
513

514
	for _, ctr := range containers {
515
		if !ctr.IsInfra() {
516
			for k, v := range ctr.config.Spec.Annotations {
517
				if !podmanOnly && (define.IsReservedAnnotation(k)) {
518
					continue
519
				}
520
				podAnnotations[fmt.Sprintf("%s/%s", k, removeUnderscores(ctr.Name()))] = v
521
			}
522
			// Convert auto-update labels into kube annotations
523
			for k, v := range getAutoUpdateAnnotations(ctr.Name(), ctr.Labels()) {
524
				podAnnotations[k] = v
525
			}
526
			isInit := ctr.IsInitCtr()
527
			// Since hostname is only set at pod level, set the hostname to the hostname of the first container we encounter
528
			if hostname == "" {
529
				// Only set the hostname if it is not set to the truncated container ID, which we do by default if no
530
				// hostname is specified for the container and if it is not set to the pod name.
531
				if !strings.Contains(ctr.ID(), ctr.Hostname()) && ctr.Hostname() != p.Name() {
532
					hostname = ctr.Hostname()
533
				}
534
			}
535

536
			// Pick the first container that has a stop-timeout set and use that value
537
			// Ignore podman's default
538
			if ctr.config.StopTimeout != util.DefaultContainerConfig().Engine.StopTimeout && stopTimeout == nil {
539
				stopTimeout = &ctr.config.StopTimeout
540
			}
541

542
			ctr, volumes, _, annotations, err := containerToV1Container(ctx, ctr, getService)
543
			if err != nil {
544
				return nil, err
545
			}
546
			for k, v := range annotations {
547
				podAnnotations[define.BindMountPrefix] = k + ":" + v
548
			}
549
			// Since port bindings for the pod are handled by the
550
			// infra container, wipe them here only if we are sharing the net namespace
551
			// If the network namespace is not being shared in the pod, then containers
552
			// can have their own network configurations
553
			if p.SharesNet() {
554
				ctr.Ports = nil
555

556
				// We add the original port declarations from the libpod infra container
557
				// to the first kubernetes container description because otherwise we lose
558
				// the original container/port bindings.
559
				// Add the port configuration to the first regular container or the first
560
				// init container if only init containers have been created in the pod.
561
				if first && len(ports) > 0 && (!isInit || len(containers) == 2) {
562
					ctr.Ports = ports
563
					first = false
564
				}
565
			}
566
			if isInit {
567
				podInitCtrs = append(podInitCtrs, ctr)
568
				continue
569
			}
570
			podContainers = append(podContainers, ctr)
571
			// Deduplicate volumes, so if containers in the pod share a volume, it's only
572
			// listed in the volumes section once
573
			for _, vol := range volumes {
574
				vol := vol
575
				deDupPodVolumes[vol.Name] = &vol
576
			}
577
		} else {
578
			_, _, infraDNS, _, err := containerToV1Container(ctx, ctr, getService)
579
			if err != nil {
580
				return nil, err
581
			}
582
			if infraDNS != nil {
583
				if servers := infraDNS.Nameservers; len(servers) > 0 {
584
					dnsInfo.Nameservers = servers
585
				}
586
				if searches := infraDNS.Searches; len(searches) > 0 {
587
					dnsInfo.Searches = searches
588
				}
589
				if options := infraDNS.Options; len(options) > 0 {
590
					dnsInfo.Options = options
591
				}
592
			}
593
			// If the infraName is not the podID-infra, that means the user set another infra name using
594
			// --infra-name during pod creation
595
			if infraName != "" && infraName != p.ID()[:12]+"-infra" {
596
				podAnnotations[define.InfraNameAnnotation] = infraName
597
			}
598
		}
599
	}
600
	podVolumes := []v1.Volume{}
601
	for _, vol := range deDupPodVolumes {
602
		podVolumes = append(podVolumes, *vol)
603
	}
604
	podName := removeUnderscores(p.Name())
605

606
	return newPodObject(
607
		podName,
608
		podAnnotations,
609
		podInitCtrs,
610
		podContainers,
611
		podVolumes,
612
		&dnsInfo,
613
		hostNetwork,
614
		hostUsers,
615
		hostname,
616
		stopTimeout), nil
617
}
618

619
func newPodObject(podName string, annotations map[string]string, initCtrs, containers []v1.Container, volumes []v1.Volume, dnsOptions *v1.PodDNSConfig, hostNetwork, hostUsers bool, hostname string, stopTimeout *uint) *v1.Pod {
620
	tm := v12.TypeMeta{
621
		Kind:       "Pod",
622
		APIVersion: "v1",
623
	}
624

625
	// Add a label called "app" with the containers name as a value
626
	labels := make(map[string]string)
627
	labels["app"] = removeUnderscores(podName)
628
	om := v12.ObjectMeta{
629
		// The name of the pod is container_name-libpod
630
		Name:   podName,
631
		Labels: labels,
632
		// CreationTimestamp seems to be required, so adding it; in doing so, the timestamp
633
		// will reflect time this is run (not container create time) because the conversion
634
		// of the container create time to v1 Time is probably not warranted nor worthwhile.
635
		CreationTimestamp: v12.Now(),
636
		Annotations:       annotations,
637
	}
638
	ps := v1.PodSpec{
639
		Containers:     containers,
640
		Hostname:       hostname,
641
		HostNetwork:    hostNetwork,
642
		InitContainers: initCtrs,
643
		Volumes:        volumes,
644
	}
645
	if !hostUsers {
646
		ps.HostUsers = &hostUsers
647
	}
648
	if dnsOptions != nil && (len(dnsOptions.Nameservers)+len(dnsOptions.Searches)+len(dnsOptions.Options) > 0) {
649
		ps.DNSConfig = dnsOptions
650
	}
651
	if stopTimeout != nil {
652
		terminationGracePeriod := int64(*stopTimeout)
653
		ps.TerminationGracePeriodSeconds = &terminationGracePeriod
654
	}
655
	p := v1.Pod{
656
		TypeMeta:   tm,
657
		ObjectMeta: om,
658
		Spec:       ps,
659
	}
660
	return &p
661
}
662

663
// simplePodWithV1Containers is a function used by inspect when kube yaml needs to be generated
664
// for a single container.  we "insert" that container description in a pod.
665
func simplePodWithV1Containers(ctx context.Context, ctrs []*Container, getService, podmanOnly bool) (*v1.Pod, error) {
666
	kubeCtrs := make([]v1.Container, 0, len(ctrs))
667
	kubeInitCtrs := []v1.Container{}
668
	kubeVolumes := make([]v1.Volume, 0)
669
	hostUsers := true
670
	hostNetwork := true
671
	podDNS := v1.PodDNSConfig{}
672
	kubeAnnotations := make(map[string]string)
673
	ctrNames := make([]string, 0, len(ctrs))
674
	var (
675
		hostname      string
676
		restartPolicy *string
677
		stopTimeout   *uint
678
	)
679
	for _, ctr := range ctrs {
680
		ctrNames = append(ctrNames, removeUnderscores(ctr.Name()))
681
		for k, v := range ctr.config.Spec.Annotations {
682
			if !podmanOnly && define.IsReservedAnnotation(k) {
683
				continue
684
			}
685
			kubeAnnotations[fmt.Sprintf("%s/%s", k, removeUnderscores(ctr.Name()))] = v
686
		}
687

688
		// Convert auto-update labels into kube annotations
689
		for k, v := range getAutoUpdateAnnotations(ctr.Name(), ctr.Labels()) {
690
			kubeAnnotations[k] = v
691
		}
692

693
		isInit := ctr.IsInitCtr()
694
		// Since hostname is only set at pod level, set the hostname to the hostname of the first container we encounter
695
		if hostname == "" {
696
			// Only set the hostname if it is not set to the truncated container ID, which we do by default if no
697
			// hostname is specified for the container
698
			if !strings.Contains(ctr.ID(), ctr.Hostname()) {
699
				hostname = ctr.Hostname()
700
			}
701
		}
702

703
		// Pick the first container that has a stop-timeout set and use that value
704
		// Ignore podman's default
705
		if ctr.config.StopTimeout != util.DefaultContainerConfig().Engine.StopTimeout && stopTimeout == nil {
706
			stopTimeout = &ctr.config.StopTimeout
707
		}
708

709
		// Use the restart policy of the first non-init container
710
		if !isInit && restartPolicy == nil {
711
			restartPolicy = &ctr.config.RestartPolicy
712
		}
713

714
		if ctr.config.Spec.Process != nil {
715
			var ulimitArr []string
716
			defaultUlimits := util.DefaultContainerConfig().Ulimits()
717
			for _, ulimit := range ctr.config.Spec.Process.Rlimits {
718
				finalUlimit := strings.ToLower(strings.ReplaceAll(ulimit.Type, "RLIMIT_", "")) + "=" + strconv.Itoa(int(ulimit.Soft)) + ":" + strconv.Itoa(int(ulimit.Hard))
719
				// compare ulimit with default list so we don't add it twice
720
				if slices.Contains(defaultUlimits, finalUlimit) {
721
					continue
722
				}
723

724
				ulimitArr = append(ulimitArr, finalUlimit)
725
			}
726

727
			if len(ulimitArr) > 0 {
728
				kubeAnnotations[define.UlimitAnnotation] = strings.Join(ulimitArr, ",")
729
			}
730
		}
731

732
		if !ctr.HostNetwork() {
733
			hostNetwork = false
734
		}
735
		if !(ctr.IDMappings().HostUIDMapping && ctr.IDMappings().HostGIDMapping) {
736
			hostUsers = false
737
		}
738
		kubeCtr, kubeVols, ctrDNS, annotations, err := containerToV1Container(ctx, ctr, getService)
739
		if err != nil {
740
			return nil, err
741
		}
742
		for k, v := range annotations {
743
			kubeAnnotations[define.BindMountPrefix] = k + ":" + v
744
		}
745
		if isInit {
746
			kubeInitCtrs = append(kubeInitCtrs, kubeCtr)
747
		} else {
748
			kubeCtrs = append(kubeCtrs, kubeCtr)
749
		}
750
		kubeVolumes = append(kubeVolumes, kubeVols...)
751
		// Combine DNS information in sum'd structure
752
		if ctrDNS != nil {
753
			// nameservers
754
			if servers := ctrDNS.Nameservers; servers != nil {
755
				if podDNS.Nameservers == nil {
756
					podDNS.Nameservers = make([]string, 0)
757
				}
758
				for _, s := range servers {
759
					if !slices.Contains(podDNS.Nameservers, s) { // only append if it does not exist
760
						podDNS.Nameservers = append(podDNS.Nameservers, s)
761
					}
762
				}
763
			}
764
			// search domains
765
			if domains := ctrDNS.Searches; domains != nil {
766
				if podDNS.Searches == nil {
767
					podDNS.Searches = make([]string, 0)
768
				}
769
				for _, d := range domains {
770
					if !slices.Contains(podDNS.Searches, d) { // only append if it does not exist
771
						podDNS.Searches = append(podDNS.Searches, d)
772
					}
773
				}
774
			}
775
			// dns options
776
			if options := ctrDNS.Options; options != nil {
777
				if podDNS.Options == nil {
778
					podDNS.Options = make([]v1.PodDNSConfigOption, 0)
779
				}
780
				podDNS.Options = append(podDNS.Options, options...)
781
			}
782
		} // end if ctrDNS
783
	}
784
	podName := removeUnderscores(ctrs[0].Name())
785
	// Check if the pod name and container name will end up conflicting
786
	// Append -pod if so
787
	if slices.Contains(ctrNames, podName) {
788
		podName += "-pod"
789
	}
790

791
	pod := newPodObject(
792
		podName,
793
		kubeAnnotations,
794
		kubeInitCtrs,
795
		kubeCtrs,
796
		kubeVolumes,
797
		&podDNS,
798
		hostNetwork,
799
		hostUsers,
800
		hostname,
801
		stopTimeout)
802

803
	// Set the pod's restart policy
804
	policy := ""
805
	if restartPolicy != nil {
806
		policy = *restartPolicy
807
	}
808
	pod.Spec.RestartPolicy = getPodRestartPolicy(policy)
809

810
	return pod, nil
811
}
812

813
// getPodRestartPolicy returns the pod restart policy to be set in the generated kube yaml
814
func getPodRestartPolicy(policy string) v1.RestartPolicy {
815
	switch policy {
816
	case define.RestartPolicyNo:
817
		return v1.RestartPolicyNever
818
	case define.RestartPolicyAlways:
819
		return v1.RestartPolicyAlways
820
	case define.RestartPolicyOnFailure:
821
		return v1.RestartPolicyOnFailure
822
	default: // some pod/ctr create from cmdline, such as "" - set it to "" and let k8s handle the defaults
823
		return ""
824
	}
825
}
826

827
// containerToV1Container converts information we know about a libpod container
828
// to a V1.Container specification.
829
func containerToV1Container(ctx context.Context, c *Container, getService bool) (v1.Container, []v1.Volume, *v1.PodDNSConfig, map[string]string, error) {
830
	kubeContainer := v1.Container{}
831
	kubeVolumes := []v1.Volume{}
832
	annotations := make(map[string]string)
833
	kubeSec, hasSecData, err := generateKubeSecurityContext(c)
834
	if err != nil {
835
		return kubeContainer, kubeVolumes, nil, annotations, err
836
	}
837

838
	// NOTE: a privileged container mounts all of /dev/*.
839
	if !c.Privileged() && c.config.Spec.Linux != nil && len(c.config.Spec.Linux.Devices) > 0 {
840
		// TODO Enable when we can support devices and their names
841
		kubeContainer.VolumeDevices = generateKubeVolumeDeviceFromLinuxDevice(c.config.Spec.Linux.Devices)
842
		return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("linux devices: %w", define.ErrNotImplemented)
843
	}
844

845
	if len(c.config.UserVolumes) > 0 {
846
		volumeMounts, volumes, localAnnotations, err := libpodMountsToKubeVolumeMounts(c)
847
		if err != nil {
848
			return kubeContainer, kubeVolumes, nil, nil, err
849
		}
850
		annotations = localAnnotations
851
		kubeContainer.VolumeMounts = volumeMounts
852
		kubeVolumes = append(kubeVolumes, volumes...)
853
	}
854

855
	portmappings, err := c.PortMappings()
856
	if err != nil {
857
		return kubeContainer, kubeVolumes, nil, annotations, err
858
	}
859
	ports, err := portMappingToContainerPort(portmappings, getService)
860
	if err != nil {
861
		return kubeContainer, kubeVolumes, nil, annotations, err
862
	}
863

864
	// Handle command and arguments.
865
	if ep := c.Entrypoint(); len(ep) > 0 {
866
		// If we have an entrypoint, set the container's command as
867
		// arguments.
868
		kubeContainer.Command = ep
869
		kubeContainer.Args = c.Command()
870
	} else {
871
		kubeContainer.Command = c.Command()
872
	}
873

874
	kubeContainer.Name = removeUnderscores(c.Name())
875
	_, image := c.Image()
876

877
	// The infra container may have been created with an overlay root FS
878
	// instead of an infra image.  If so, set the imageto the default K8s
879
	// pause one and make sure it's in the storage by pulling it down if
880
	// missing.
881
	if image == "" && c.IsInfra() {
882
		image = c.runtime.config.Engine.InfraImage
883
		if _, err := c.runtime.libimageRuntime.Pull(ctx, image, config.PullPolicyMissing, nil); err != nil {
884
			return kubeContainer, nil, nil, nil, err
885
		}
886
	}
887

888
	kubeContainer.Image = image
889
	kubeContainer.Stdin = c.Stdin()
890
	img, _, err := c.runtime.libimageRuntime.LookupImage(image, nil)
891
	if err != nil {
892
		return kubeContainer, kubeVolumes, nil, annotations, fmt.Errorf("looking up image %q of container %q: %w", image, c.ID(), err)
893
	}
894
	imgData, err := img.Inspect(ctx, nil)
895
	if err != nil {
896
		return kubeContainer, kubeVolumes, nil, annotations, err
897
	}
898
	// If the user doesn't set a command/entrypoint when creating the container with podman and
899
	// is using the image command or entrypoint from the image, don't add it to the generated kube yaml
900
	if reflect.DeepEqual(imgData.Config.Cmd, kubeContainer.Command) || reflect.DeepEqual(imgData.Config.Entrypoint, kubeContainer.Command) {
901
		kubeContainer.Command = nil
902
	}
903

904
	if c.WorkingDir() != "/" && imgData.Config.WorkingDir != c.WorkingDir() {
905
		kubeContainer.WorkingDir = c.WorkingDir()
906
	}
907

908
	if imgData.User == c.User() && hasSecData {
909
		kubeSec.RunAsGroup, kubeSec.RunAsUser = nil, nil
910
	}
911
	// If the image has user set as a positive integer value, then set runAsNonRoot to true
912
	// in the kube yaml
913
	imgUserID, err := strconv.Atoi(imgData.User)
914
	if err == nil && imgUserID > 0 {
915
		trueBool := true
916
		kubeSec.RunAsNonRoot = &trueBool
917
	}
918

919
	envVariables, err := libpodEnvVarsToKubeEnvVars(c.config.Spec.Process.Env, imgData.Config.Env)
920
	if err != nil {
921
		return kubeContainer, kubeVolumes, nil, annotations, err
922
	}
923
	kubeContainer.Env = envVariables
924

925
	kubeContainer.Ports = ports
926
	// This should not be applicable
927
	// container.EnvFromSource =
928
	if hasSecData {
929
		kubeContainer.SecurityContext = kubeSec
930
	}
931
	kubeContainer.StdinOnce = false
932
	kubeContainer.TTY = c.Terminal()
933

934
	resources := c.LinuxResources()
935
	if resources != nil {
936
		if resources.Memory != nil &&
937
			resources.Memory.Limit != nil {
938
			if kubeContainer.Resources.Limits == nil {
939
				kubeContainer.Resources.Limits = v1.ResourceList{}
940
			}
941

942
			qty := kubeContainer.Resources.Limits.Memory()
943
			qty.Set(*c.config.Spec.Linux.Resources.Memory.Limit)
944
			kubeContainer.Resources.Limits[v1.ResourceMemory] = *qty
945
		}
946

947
		if resources.CPU != nil &&
948
			resources.CPU.Quota != nil &&
949
			resources.CPU.Period != nil {
950
			quota := *resources.CPU.Quota
951
			period := *resources.CPU.Period
952

953
			if quota > 0 && period > 0 {
954
				cpuLimitMilli := int64(1000 * util.PeriodAndQuotaToCores(period, quota))
955

956
				// Kubernetes: precision finer than 1m is not allowed
957
				if cpuLimitMilli >= 1 {
958
					if kubeContainer.Resources.Limits == nil {
959
						kubeContainer.Resources.Limits = v1.ResourceList{}
960
					}
961

962
					qty := kubeContainer.Resources.Limits.Cpu()
963
					qty.SetMilli(cpuLimitMilli)
964
					kubeContainer.Resources.Limits[v1.ResourceCPU] = *qty
965
				}
966
			}
967
		}
968
	}
969

970
	// Obtain the DNS entries from the container
971
	dns := v1.PodDNSConfig{}
972

973
	// DNS servers
974
	if servers := c.config.DNSServer; len(servers) > 0 {
975
		dnsServers := make([]string, 0)
976
		for _, server := range servers {
977
			dnsServers = append(dnsServers, server.String())
978
		}
979
		dns.Nameservers = dnsServers
980
	}
981

982
	// DNS search domains
983
	if searches := c.config.DNSSearch; len(searches) > 0 {
984
		dns.Searches = searches
985
	}
986

987
	// DNS options
988
	if options := c.config.DNSOption; len(options) > 0 {
989
		dnsOptions := make([]v1.PodDNSConfigOption, 0)
990
		for _, option := range options {
991
			// the option can be "k:v" or just "k", no delimiter is required
992
			name, value, _ := strings.Cut(option, ":")
993
			dnsOpt := v1.PodDNSConfigOption{
994
				Name:  name,
995
				Value: &value,
996
			}
997
			dnsOptions = append(dnsOptions, dnsOpt)
998
		}
999
		dns.Options = dnsOptions
1000
	}
1001
	return kubeContainer, kubeVolumes, &dns, annotations, nil
1002
}
1003

1004
// portMappingToContainerPort takes a portmapping and converts
1005
// it to a v1.ContainerPort format for kube output
1006
func portMappingToContainerPort(portMappings []types.PortMapping, getService bool) ([]v1.ContainerPort, error) {
1007
	containerPorts := make([]v1.ContainerPort, 0, len(portMappings))
1008
	for _, p := range portMappings {
1009
		protocols := strings.Split(p.Protocol, ",")
1010
		for _, proto := range protocols {
1011
			var protocol v1.Protocol
1012
			switch strings.ToUpper(proto) {
1013
			case "TCP":
1014
				// do nothing as it is the default protocol in k8s, there is no need to explicitly
1015
				// add it to the generated yaml
1016
			case "UDP":
1017
				protocol = v1.ProtocolUDP
1018
			case "SCTP":
1019
				protocol = v1.ProtocolSCTP
1020
			default:
1021
				return containerPorts, fmt.Errorf("unknown network protocol %s", p.Protocol)
1022
			}
1023
			for i := uint16(0); i < p.Range; i++ {
1024
				cp := v1.ContainerPort{
1025
					// Name will not be supported
1026
					HostIP:        p.HostIP,
1027
					ContainerPort: int32(p.ContainerPort + i),
1028
					Protocol:      protocol,
1029
				}
1030
				if !getService {
1031
					cp.HostPort = int32(p.HostPort + i)
1032
				}
1033
				containerPorts = append(containerPorts, cp)
1034
			}
1035
		}
1036
	}
1037
	return containerPorts, nil
1038
}
1039

1040
// libpodEnvVarsToKubeEnvVars converts a key=value string slice to []v1.EnvVar
1041
func libpodEnvVarsToKubeEnvVars(envs []string, imageEnvs []string) ([]v1.EnvVar, error) {
1042
	defaultEnv := env.DefaultEnvVariables()
1043
	envVars := make([]v1.EnvVar, 0, len(envs))
1044
	imageMap := make(map[string]string, len(imageEnvs))
1045
	for _, ie := range imageEnvs {
1046
		key, val, _ := strings.Cut(ie, "=")
1047
		imageMap[key] = val
1048
	}
1049
	for _, e := range envs {
1050
		envName, envValue, hasValue := strings.Cut(e, "=")
1051
		if !hasValue {
1052
			return envVars, fmt.Errorf("environment variable %s is malformed; should be key=value", e)
1053
		}
1054
		if defaultEnv[envName] == envValue {
1055
			continue
1056
		}
1057
		if imageMap[envName] == envValue {
1058
			continue
1059
		}
1060
		ev := v1.EnvVar{
1061
			Name:  envName,
1062
			Value: envValue,
1063
		}
1064
		envVars = append(envVars, ev)
1065
	}
1066
	return envVars, nil
1067
}
1068

1069
// libpodMountsToKubeVolumeMounts converts the containers mounts to a struct kube understands
1070
func libpodMountsToKubeVolumeMounts(c *Container) ([]v1.VolumeMount, []v1.Volume, map[string]string, error) {
1071
	namedVolumes, mounts := c.SortUserVolumes(c.config.Spec)
1072
	vms := make([]v1.VolumeMount, 0, len(mounts))
1073
	vos := make([]v1.Volume, 0, len(mounts))
1074
	annotations := make(map[string]string)
1075

1076
	var suffix string
1077
	for index, m := range mounts {
1078
		for _, opt := range m.Options {
1079
			if opt == "Z" || opt == "z" {
1080
				annotations[m.Source] = opt
1081
				break
1082
			}
1083
		}
1084
		vm, vo, err := generateKubeVolumeMount(m)
1085
		if err != nil {
1086
			return vms, vos, annotations, err
1087
		}
1088
		// Name will be the same, so use the index as suffix
1089
		suffix = fmt.Sprintf("-%d", index)
1090
		vm.Name += suffix
1091
		vo.Name += suffix
1092
		vms = append(vms, vm)
1093
		vos = append(vos, vo)
1094
	}
1095
	for _, v := range namedVolumes {
1096
		vm, vo := generateKubePersistentVolumeClaim(v)
1097
		vms = append(vms, vm)
1098
		vos = append(vos, vo)
1099
	}
1100
	return vms, vos, annotations, nil
1101
}
1102

1103
// generateKubePersistentVolumeClaim converts a ContainerNamedVolume to a Kubernetes PersistentVolumeClaim
1104
func generateKubePersistentVolumeClaim(v *ContainerNamedVolume) (v1.VolumeMount, v1.Volume) {
1105
	ro := slices.Contains(v.Options, "ro")
1106

1107
	// To avoid naming conflicts with any host path mounts, add a unique suffix to the volume's name.
1108
	name := v.Name + "-pvc"
1109

1110
	vm := v1.VolumeMount{}
1111
	vm.Name = name
1112
	vm.MountPath = v.Dest
1113
	vm.ReadOnly = ro
1114

1115
	pvc := v1.PersistentVolumeClaimVolumeSource{ClaimName: v.Name, ReadOnly: ro}
1116
	vs := v1.VolumeSource{}
1117
	vs.PersistentVolumeClaim = &pvc
1118
	vo := v1.Volume{Name: name, VolumeSource: vs}
1119

1120
	return vm, vo
1121
}
1122

1123
// generateKubeVolumeMount takes a user specified mount and returns
1124
// a kubernetes VolumeMount (to be added to the container) and a kubernetes Volume
1125
// (to be added to the pod)
1126
func generateKubeVolumeMount(m specs.Mount) (v1.VolumeMount, v1.Volume, error) {
1127
	vm := v1.VolumeMount{}
1128
	vo := v1.Volume{}
1129

1130
	var (
1131
		name string
1132
		err  error
1133
	)
1134
	if m.Type == define.TypeTmpfs {
1135
		name = "tmp"
1136
		vo.EmptyDir = &v1.EmptyDirVolumeSource{
1137
			Medium: v1.StorageMediumMemory,
1138
		}
1139
		vo.Name = name
1140
	} else {
1141
		name, err = convertVolumePathToName(m.Source)
1142
		if err != nil {
1143
			return vm, vo, err
1144
		}
1145
		// To avoid naming conflicts with any persistent volume mounts, add a unique suffix to the volume's name.
1146
		name += "-host"
1147
		vo.Name = name
1148
		vo.HostPath = &v1.HostPathVolumeSource{}
1149
		vo.HostPath.Path = m.Source
1150
		isDir, err := isHostPathDirectory(m.Source)
1151
		// neither a directory or a file lives here, default to creating a directory
1152
		// TODO should this be an error instead?
1153
		var hostPathType v1.HostPathType
1154
		switch {
1155
		case err != nil:
1156
			hostPathType = v1.HostPathDirectoryOrCreate
1157
		case isDir:
1158
			hostPathType = v1.HostPathDirectory
1159
		default:
1160
			hostPathType = v1.HostPathFile
1161
		}
1162
		vo.HostPath.Type = &hostPathType
1163
	}
1164
	vm.Name = name
1165
	vm.MountPath = m.Destination
1166
	if slices.Contains(m.Options, "ro") {
1167
		vm.ReadOnly = true
1168
	}
1169

1170
	return vm, vo, nil
1171
}
1172

1173
func isHostPathDirectory(hostPathSource string) (bool, error) {
1174
	info, err := os.Stat(hostPathSource)
1175
	if err != nil {
1176
		return false, err
1177
	}
1178
	return info.Mode().IsDir(), nil
1179
}
1180

1181
func convertVolumePathToName(hostSourcePath string) (string, error) {
1182
	if len(hostSourcePath) == 0 {
1183
		return "", errors.New("hostSourcePath must be specified to generate volume name")
1184
	}
1185
	if len(hostSourcePath) == 1 {
1186
		if hostSourcePath != "/" {
1187
			return "", fmt.Errorf("hostSourcePath malformatted: %s", hostSourcePath)
1188
		}
1189
		// add special case name
1190
		return "root", nil
1191
	}
1192
	// First, trim trailing slashes, then replace slashes with dashes.
1193
	// Thus, /mnt/data/ will become mnt-data
1194
	return strings.ReplaceAll(strings.Trim(hostSourcePath, "/"), "/", "-"), nil
1195
}
1196

1197
func determineCapAddDropFromCapabilities(defaultCaps, containerCaps []string) *v1.Capabilities {
1198
	var (
1199
		drop = []v1.Capability{}
1200
		add  = []v1.Capability{}
1201
	)
1202
	dedupDrop := make(map[string]bool)
1203
	dedupAdd := make(map[string]bool)
1204
	// Find caps in the defaultCaps but not in the container's
1205
	// those indicate a dropped cap
1206
	for _, capability := range defaultCaps {
1207
		if !slices.Contains(containerCaps, capability) {
1208
			if _, ok := dedupDrop[capability]; !ok {
1209
				drop = append(drop, v1.Capability(capability))
1210
				dedupDrop[capability] = true
1211
			}
1212
		}
1213
	}
1214
	// Find caps in the container but not in the defaults; those indicate
1215
	// an added cap
1216
	for _, capability := range containerCaps {
1217
		if !slices.Contains(defaultCaps, capability) {
1218
			if _, ok := dedupAdd[capability]; !ok {
1219
				add = append(add, v1.Capability(capability))
1220
				dedupAdd[capability] = true
1221
			}
1222
		}
1223
	}
1224

1225
	if len(add) > 0 || len(drop) > 0 {
1226
		return &v1.Capabilities{
1227
			Add:  add,
1228
			Drop: drop,
1229
		}
1230
	}
1231
	return nil
1232
}
1233

1234
func (c *Container) capAddDrop(caps *specs.LinuxCapabilities) *v1.Capabilities {
1235
	// FreeBSD containers don't have caps so don't dereference if it's nil
1236
	if caps == nil {
1237
		return nil
1238
	}
1239

1240
	// Combine all the container's capabilities into a slice
1241
	containerCaps := make([]string, 0, len(caps.Ambient)+len(caps.Bounding)+len(caps.Effective)+len(caps.Inheritable)+len(caps.Permitted))
1242
	containerCaps = append(containerCaps, caps.Ambient...)
1243
	containerCaps = append(containerCaps, caps.Bounding...)
1244
	containerCaps = append(containerCaps, caps.Effective...)
1245
	containerCaps = append(containerCaps, caps.Inheritable...)
1246
	containerCaps = append(containerCaps, caps.Permitted...)
1247

1248
	calculatedCaps := determineCapAddDropFromCapabilities(c.runtime.config.Containers.DefaultCapabilities.Get(), containerCaps)
1249
	return calculatedCaps
1250
}
1251

1252
// generateKubeSecurityContext generates a securityContext based on the existing container
1253
func generateKubeSecurityContext(c *Container) (*v1.SecurityContext, bool, error) {
1254
	privileged := c.Privileged()
1255
	ro := c.IsReadOnly()
1256
	allowPrivEscalation := !c.config.Spec.Process.NoNewPrivileges
1257

1258
	var capabilities *v1.Capabilities
1259
	if !privileged {
1260
		// Running privileged adds all caps.
1261
		capabilities = c.capAddDrop(c.config.Spec.Process.Capabilities)
1262
	}
1263

1264
	scHasData := false
1265
	sc := v1.SecurityContext{
1266
		// RunAsNonRoot is an optional parameter; our first implementations should be root only; however
1267
		// I'm leaving this as a bread-crumb for later
1268
		//RunAsNonRoot:             &nonRoot,
1269
	}
1270
	if capabilities != nil {
1271
		scHasData = true
1272
		sc.Capabilities = capabilities
1273
	}
1274
	var selinuxOpts v1.SELinuxOptions
1275
	selinuxHasData := false
1276
	for _, label := range strings.Split(c.config.Spec.Annotations[define.InspectAnnotationLabel], ",label=") {
1277
		opt, val, hasVal := strings.Cut(label, ":")
1278
		if hasVal {
1279
			switch opt {
1280
			case "filetype":
1281
				selinuxOpts.FileType = val
1282
				selinuxHasData = true
1283
			case "type":
1284
				selinuxOpts.Type = val
1285
				selinuxHasData = true
1286
			case "level":
1287
				selinuxOpts.Level = val
1288
				selinuxHasData = true
1289
			}
1290
		} else if opt == "disable" {
1291
			selinuxOpts.Type = "spc_t"
1292
			selinuxHasData = true
1293
		}
1294
	}
1295
	if selinuxHasData {
1296
		sc.SELinuxOptions = &selinuxOpts
1297
		scHasData = true
1298
	}
1299
	if !allowPrivEscalation {
1300
		scHasData = true
1301
		sc.AllowPrivilegeEscalation = &allowPrivEscalation
1302
	}
1303
	if privileged {
1304
		scHasData = true
1305
		sc.Privileged = &privileged
1306
	}
1307
	if ro {
1308
		scHasData = true
1309
		sc.ReadOnlyRootFilesystem = &ro
1310
	}
1311
	if c.config.Spec.Linux.MaskedPaths == nil {
1312
		scHasData = true
1313
		unmask := v1.UnmaskedProcMount
1314
		sc.ProcMount = &unmask
1315
	}
1316

1317
	if c.User() != "" {
1318
		if !c.batched {
1319
			c.lock.Lock()
1320
			defer c.lock.Unlock()
1321
		}
1322
		if err := c.syncContainer(); err != nil {
1323
			return nil, false, fmt.Errorf("unable to sync container during YAML generation: %w", err)
1324
		}
1325

1326
		mountpoint := c.state.Mountpoint
1327
		if mountpoint == "" {
1328
			var err error
1329
			mountpoint, err = c.mount()
1330
			if err != nil {
1331
				return nil, false, fmt.Errorf("failed to mount %s mountpoint: %w", c.ID(), err)
1332
			}
1333
			defer func() {
1334
				if err := c.unmount(false); err != nil {
1335
					logrus.Errorf("Failed to unmount container: %v", err)
1336
				}
1337
			}()
1338
		}
1339
		logrus.Debugf("Looking in container for user: %s", c.User())
1340

1341
		execUser, err := lookup.GetUserGroupInfo(mountpoint, c.User(), nil)
1342
		if err != nil {
1343
			return nil, false, err
1344
		}
1345
		uid := int64(execUser.Uid)
1346
		gid := int64(execUser.Gid)
1347
		scHasData = true
1348
		sc.RunAsUser = &uid
1349
		sc.RunAsGroup = &gid
1350
	}
1351

1352
	return &sc, scHasData, nil
1353
}
1354

1355
// generateKubeVolumeDeviceFromLinuxDevice takes a list of devices and makes a VolumeDevice struct for kube
1356
func generateKubeVolumeDeviceFromLinuxDevice(devices []specs.LinuxDevice) []v1.VolumeDevice {
1357
	volumeDevices := make([]v1.VolumeDevice, 0, len(devices))
1358
	for _, d := range devices {
1359
		vd := v1.VolumeDevice{
1360
			// TBD How are we going to sync up these names
1361
			//Name:
1362
			DevicePath: d.Path,
1363
		}
1364
		volumeDevices = append(volumeDevices, vd)
1365
	}
1366
	return volumeDevices
1367
}
1368

1369
func removeUnderscores(s string) string {
1370
	return strings.ReplaceAll(s, "_", "")
1371
}
1372

1373
// getAutoUpdateAnnotations searches for auto-update container labels
1374
// and returns them as kube annotations
1375
func getAutoUpdateAnnotations(ctrName string, ctrLabels map[string]string) map[string]string {
1376
	autoUpdateLabel := "io.containers.autoupdate"
1377
	annotations := make(map[string]string)
1378

1379
	ctrName = removeUnderscores(ctrName)
1380
	for k, v := range ctrLabels {
1381
		if strings.Contains(k, autoUpdateLabel) {
1382
			// since labels can variate between containers within a pod, they will be
1383
			// identified with the container name when converted into kube annotations
1384
			kc := fmt.Sprintf("%s/%s", k, ctrName)
1385
			annotations[kc] = v
1386
		}
1387
	}
1388

1389
	return annotations
1390
}
1391

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.