1
//go:build !remote && (linux || freebsd)
11
"github.com/containers/common/libnetwork/etchosts"
12
"github.com/containers/common/libnetwork/types"
13
"github.com/containers/common/pkg/config"
14
"github.com/containers/common/pkg/machine"
15
"github.com/containers/podman/v5/libpod/define"
16
"github.com/containers/podman/v5/libpod/events"
17
"github.com/containers/podman/v5/pkg/namespaces"
18
"github.com/containers/podman/v5/pkg/rootless"
19
"github.com/containers/storage/pkg/lockfile"
20
"github.com/sirupsen/logrus"
21
"golang.org/x/exp/slices"
24
// convertPortMappings will remove the HostIP part from the ports when running inside podman machine.
25
// This is needed because a HostIP of 127.0.0.1 would now allow the gvproxy forwarder to reach to open ports.
26
// For machine the HostIP must only be used by gvproxy and never in the VM.
27
func (c *Container) convertPortMappings() []types.PortMapping {
28
if !machine.IsGvProxyBased() || len(c.config.PortMappings) == 0 {
29
return c.config.PortMappings
31
// if we run in a machine VM we have to ignore the host IP part
32
newPorts := make([]types.PortMapping, 0, len(c.config.PortMappings))
33
for _, port := range c.config.PortMappings {
35
newPorts = append(newPorts, port)
40
func (c *Container) getNetworkOptions(networkOpts map[string]types.PerNetworkOptions) types.NetworkOptions {
41
nameservers := make([]string, 0, len(c.runtime.config.Containers.DNSServers.Get())+len(c.config.DNSServer))
42
nameservers = append(nameservers, c.runtime.config.Containers.DNSServers.Get()...)
43
for _, ip := range c.config.DNSServer {
44
nameservers = append(nameservers, ip.String())
46
opts := types.NetworkOptions{
47
ContainerID: c.config.ID,
48
ContainerName: getNetworkPodName(c),
49
DNSServers: nameservers,
51
opts.PortMappings = c.convertPortMappings()
53
// If the container requested special network options use this instead of the config.
54
// This is the case for container restore or network reload.
55
if c.perNetworkOpts != nil {
56
opts.Networks = c.perNetworkOpts
58
opts.Networks = networkOpts
63
// setUpNetwork will set up the networks, on error it will also tear down the cni
64
// networks. If rootless it will join/create the rootless network namespace.
65
func (r *Runtime) setUpNetwork(ns string, opts types.NetworkOptions) (map[string]types.StatusBlock, error) {
66
return r.network.Setup(ns, types.SetupOptions{NetworkOptions: opts})
69
// getNetworkPodName return the pod name (hostname) used by dns backend.
70
// If we are in the pod network namespace use the pod name otherwise the container name
71
func getNetworkPodName(c *Container) string {
72
if c.config.NetMode.IsPod() || c.IsInfra() {
73
pod, err := c.runtime.state.Pod(c.PodID())
81
// Tear down a container's network configuration and joins the
82
// rootless net ns as rootless user
83
func (r *Runtime) teardownNetworkBackend(ns string, opts types.NetworkOptions) error {
84
return r.network.Teardown(ns, types.TeardownOptions{NetworkOptions: opts})
87
// Tear down a container's network backend configuration, but do not tear down the
89
func (r *Runtime) teardownNetwork(ctr *Container) error {
90
if ctr.state.NetNS == "" {
91
// The container has no network namespace, we're set
95
logrus.Debugf("Tearing down network namespace at %s for container %s", ctr.state.NetNS, ctr.ID())
97
networks, err := ctr.networks()
102
if !ctr.config.NetMode.IsSlirp4netns() &&
103
!ctr.config.NetMode.IsPasta() && len(networks) > 0 {
104
netOpts := ctr.getNetworkOptions(networks)
105
return r.teardownNetworkBackend(ctr.state.NetNS, netOpts)
110
// isBridgeNetMode checks if the given network mode is bridge.
111
// It returns nil when it is set to bridge and an error otherwise.
112
func isBridgeNetMode(n namespaces.NetworkMode) error {
114
return fmt.Errorf("%q is not supported: %w", n, define.ErrNetworkModeInvalid)
119
// Reload only works with containers with a configured network.
120
// It will tear down, and then reconfigure, the network of the container.
121
// This is mainly used when a reload of firewall rules wipes out existing
122
// firewall configuration.
123
// Efforts will be made to preserve MAC and IP addresses.
124
// Only works on containers with bridge networking at present, though in the future we could
125
// extend this to stop + restart slirp4netns
126
func (r *Runtime) reloadContainerNetwork(ctr *Container) (map[string]types.StatusBlock, error) {
127
if ctr.state.NetNS == "" {
128
return nil, fmt.Errorf("container %s network is not configured, refusing to reload: %w", ctr.ID(), define.ErrCtrStateInvalid)
130
if err := isBridgeNetMode(ctr.config.NetMode); err != nil {
133
logrus.Infof("Going to reload container %s network", ctr.ID())
135
err := r.teardownNetwork(ctr)
137
// teardownNetwork will error if the iptables rules do not exist and this is the case after
138
// a firewall reload. The purpose of network reload is to recreate the rules if they do
139
// not exists so we should not log this specific error as error. This would confuse users otherwise.
140
// iptables-legacy and iptables-nft will create different errors. Make sure to match both.
141
b, rerr := regexp.MatchString("Couldn't load target `CNI-[a-f0-9]{24}':No such file or directory|Chain 'CNI-[a-f0-9]{24}' does not exist", err.Error())
142
if rerr == nil && !b {
149
networkOpts, err := ctr.networks()
154
// Set the same network settings as before..
155
netStatus := ctr.getNetworkStatus()
156
for network, perNetOpts := range networkOpts {
157
for name, netInt := range netStatus[network].Interfaces {
158
perNetOpts.InterfaceName = name
159
perNetOpts.StaticMAC = netInt.MacAddress
160
for _, netAddress := range netInt.Subnets {
161
perNetOpts.StaticIPs = append(perNetOpts.StaticIPs, netAddress.IPNet.IP)
163
// Normally interfaces have a length of 1, only for some special cni configs we could get more.
164
// For now just use the first interface to get the ips this should be good enough for most cases.
167
networkOpts[network] = perNetOpts
169
ctr.perNetworkOpts = networkOpts
171
return r.configureNetNS(ctr, ctr.state.NetNS)
174
// Produce an InspectNetworkSettings containing information on the container
176
func (c *Container) getContainerNetworkInfo() (*define.InspectNetworkSettings, error) {
177
if c.config.NetNsCtr != "" {
178
netNsCtr, err := c.runtime.GetContainer(c.config.NetNsCtr)
182
// see https://github.com/containers/podman/issues/10090
183
// the container has to be locked for syncContainer()
185
defer netNsCtr.lock.Unlock()
186
// Have to sync to ensure that state is populated
187
if err := netNsCtr.syncContainer(); err != nil {
190
logrus.Debugf("Container %s shares network namespace, retrieving network info of container %s", c.ID(), c.config.NetNsCtr)
192
return netNsCtr.getContainerNetworkInfo()
195
settings := new(define.InspectNetworkSettings)
196
settings.Ports = makeInspectPorts(c.config.PortMappings, c.config.ExposedPorts)
198
networks, err := c.networks()
203
setDefaultNetworks := func() {
204
settings.Networks = make(map[string]*define.InspectAdditionalNetwork, 1)
205
name := c.NetworkMode()
206
addedNet := new(define.InspectAdditionalNetwork)
207
addedNet.NetworkID = name
208
settings.Networks[name] = addedNet
211
if c.state.NetNS == "" {
212
if networkNSPath, set := c.joinedNetworkNSPath(); networkNSPath != "" {
213
if result, err := c.inspectJoinedNetworkNS(networkNSPath); err == nil {
214
// fallback to dummy configuration
215
settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result)
217
// do not propagate error inspecting a joined network ns
218
logrus.Errorf("Inspecting network namespace: %s of container %s: %v", networkNSPath, c.ID(), err)
222
// network none case, if running allow user to join netns via sandbox key
223
// https://github.com/containers/podman/issues/16716
225
settings.SandboxKey = fmt.Sprintf("/proc/%d/ns/net", c.state.PID)
228
// We can't do more if the network is down.
229
// We still want to make dummy configurations for each network
230
// the container joined.
231
if len(networks) > 0 {
232
settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks))
233
for net, opts := range networks {
234
cniNet := new(define.InspectAdditionalNetwork)
235
cniNet.NetworkID = net
236
cniNet.Aliases = opts.Aliases
237
settings.Networks[net] = cniNet
246
// Set network namespace path
247
settings.SandboxKey = c.state.NetNS
249
netStatus := c.getNetworkStatus()
250
// If this is empty, we're probably slirp4netns
251
if len(netStatus) == 0 {
255
// If we have networks - handle that here
256
if len(networks) > 0 {
257
if len(networks) != len(netStatus) {
258
return nil, fmt.Errorf("network inspection mismatch: asked to join %d network(s) %v, but have information on %d network(s): %w", len(networks), networks, len(netStatus), define.ErrInternal)
261
settings.Networks = make(map[string]*define.InspectAdditionalNetwork, len(networks))
263
for name, opts := range networks {
264
result := netStatus[name]
265
addedNet := new(define.InspectAdditionalNetwork)
266
addedNet.NetworkID = name
267
addedNet.Aliases = opts.Aliases
268
addedNet.InspectBasicNetworkConfig = resultToBasicNetworkConfig(result)
270
settings.Networks[name] = addedNet
273
// if not only the default network is connected we can return here
274
// otherwise we have to populate the InspectBasicNetworkConfig settings
275
_, isDefaultNet := networks[c.runtime.config.Network.DefaultNetwork]
276
if !(len(networks) == 1 && isDefaultNet) {
283
// If not joining networks, we should have at most 1 result
284
if len(netStatus) > 1 {
285
return nil, fmt.Errorf("should have at most 1 network status result if not joining networks, instead got %d: %w", len(netStatus), define.ErrInternal)
288
if len(netStatus) == 1 {
289
for _, status := range netStatus {
290
settings.InspectBasicNetworkConfig = resultToBasicNetworkConfig(status)
296
// resultToBasicNetworkConfig produces an InspectBasicNetworkConfig from a CNI
298
func resultToBasicNetworkConfig(result types.StatusBlock) define.InspectBasicNetworkConfig {
299
config := define.InspectBasicNetworkConfig{}
300
interfaceNames := make([]string, 0, len(result.Interfaces))
301
for interfaceName := range result.Interfaces {
302
interfaceNames = append(interfaceNames, interfaceName)
304
// ensure consistent inspect results by sorting
305
sort.Strings(interfaceNames)
306
for _, interfaceName := range interfaceNames {
307
netInt := result.Interfaces[interfaceName]
308
for _, netAddress := range netInt.Subnets {
309
size, _ := netAddress.IPNet.Mask.Size()
310
if netAddress.IPNet.IP.To4() != nil {
312
if config.IPAddress == "" {
313
config.IPAddress = netAddress.IPNet.IP.String()
314
config.IPPrefixLen = size
315
config.Gateway = netAddress.Gateway.String()
317
config.SecondaryIPAddresses = append(config.SecondaryIPAddresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size})
321
if config.GlobalIPv6Address == "" {
322
config.GlobalIPv6Address = netAddress.IPNet.IP.String()
323
config.GlobalIPv6PrefixLen = size
324
config.IPv6Gateway = netAddress.Gateway.String()
326
config.SecondaryIPv6Addresses = append(config.SecondaryIPv6Addresses, define.Address{Addr: netAddress.IPNet.IP.String(), PrefixLength: size})
330
if config.MacAddress == "" {
331
config.MacAddress = netInt.MacAddress.String()
333
config.AdditionalMacAddresses = append(config.AdditionalMacAddresses, netInt.MacAddress.String())
339
// NetworkDisconnect removes a container from the network
340
func (c *Container) NetworkDisconnect(nameOrID, netName string, force bool) error {
341
// only the bridge mode supports cni networks
342
if err := isBridgeNetMode(c.config.NetMode); err != nil {
347
defer c.lock.Unlock()
349
networks, err := c.networks()
354
// check if network exists and if the input is an ID we get the name
355
// CNI and netavark and the libpod db only uses names so it is important that we only use the name
356
netName, _, err = c.runtime.normalizeNetworkName(netName)
361
_, nameExists := networks[netName]
362
if !nameExists && len(networks) > 0 {
363
return fmt.Errorf("container %s is not connected to network %s", nameOrID, netName)
366
if err := c.syncContainer(); err != nil {
369
// get network status before we disconnect
370
networkStatus := c.getNetworkStatus()
372
if err := c.runtime.state.NetworkDisconnect(c, netName); err != nil {
376
c.newNetworkEvent(events.NetworkDisconnect, netName)
377
if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
381
if c.state.NetNS == "" {
382
return fmt.Errorf("unable to disconnect %s from %s: %w", nameOrID, netName, define.ErrNoNetwork)
385
opts := types.NetworkOptions{
386
ContainerID: c.config.ID,
387
ContainerName: getNetworkPodName(c),
389
opts.PortMappings = c.convertPortMappings()
390
opts.Networks = map[string]types.PerNetworkOptions{
391
netName: networks[netName],
394
if err := c.runtime.teardownNetworkBackend(c.state.NetNS, opts); err != nil {
398
// update network status if container is running
399
oldStatus, statusExist := networkStatus[netName]
400
delete(networkStatus, netName)
401
c.state.NetworkStatus = networkStatus
407
// Reload ports when there are still connected networks, maybe we removed the network interface with the child ip.
408
// Reloading without connected networks does not make sense, so we can skip this step.
409
if rootless.IsRootless() && len(networkStatus) > 0 {
410
if err := c.reloadRootlessRLKPortMapping(); err != nil {
415
// Update resolv.conf if required
417
stringIPs := make([]string, 0, len(oldStatus.DNSServerIPs))
418
for _, ip := range oldStatus.DNSServerIPs {
419
stringIPs = append(stringIPs, ip.String())
421
if len(stringIPs) > 0 {
422
logrus.Debugf("Removing DNS Servers %v from resolv.conf", stringIPs)
423
if err := c.removeNameserver(stringIPs); err != nil {
428
// update /etc/hosts file
429
if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
430
// sync the names with c.getHostsEntries()
431
names := []string{c.Hostname(), c.config.Name}
432
rm := etchosts.GetNetworkHostEntries(map[string]types.StatusBlock{netName: oldStatus}, names...)
434
// make sure to lock this file to prevent concurrent writes when
435
// this is used a net dependency container
436
lock, err := lockfile.GetLockFile(file)
438
return fmt.Errorf("failed to lock hosts file: %w", err)
440
logrus.Debugf("Remove /etc/hosts entries %v", rm)
442
err = etchosts.Remove(file, rm)
453
// ConnectNetwork connects a container to a given network
454
func (c *Container) NetworkConnect(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
455
// only the bridge mode supports networks
456
if err := isBridgeNetMode(c.config.NetMode); err != nil {
461
defer c.lock.Unlock()
463
networks, err := c.networks()
468
// check if network exists and if the input is an ID we get the name
469
// CNI and netavark and the libpod db only uses names so it is important that we only use the name
471
netName, nicName, err = c.runtime.normalizeNetworkName(netName)
476
if err := c.syncContainer(); err != nil {
480
// get network status before we connect
481
networkStatus := c.getNetworkStatus()
483
netOpts.Aliases = append(netOpts.Aliases, getExtraNetworkAliases(c)...)
485
// check whether interface is to be named as the network_interface
486
// when name left unspecified
487
if netOpts.InterfaceName == "" {
488
netOpts.InterfaceName = nicName
491
// set default interface name
492
if netOpts.InterfaceName == "" {
493
netOpts.InterfaceName = getFreeInterfaceName(networks)
494
if netOpts.InterfaceName == "" {
495
return errors.New("could not find free network interface name")
499
if err := c.runtime.state.NetworkConnect(c, netName, netOpts); err != nil {
500
// Docker compat: treat requests to attach already attached networks as a no-op, ignoring opts
501
if errors.Is(err, define.ErrNetworkConnected) && !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
507
c.newNetworkEvent(events.NetworkConnect, netName)
508
if !c.ensureState(define.ContainerStateRunning, define.ContainerStateCreated) {
511
if c.state.NetNS == "" {
512
return fmt.Errorf("unable to connect %s to %s: %w", nameOrID, netName, define.ErrNoNetwork)
515
opts := types.NetworkOptions{
516
ContainerID: c.config.ID,
517
ContainerName: getNetworkPodName(c),
519
opts.PortMappings = c.convertPortMappings()
520
opts.Networks = map[string]types.PerNetworkOptions{
524
results, err := c.runtime.setUpNetwork(c.state.NetNS, opts)
528
if len(results) != 1 {
529
return errors.New("when adding aliases, results must be of length 1")
532
// we need to get the old host entries before we add the new one to the status
533
// if we do not add do it here we will get the wrong existing entries which will throw of the logic
534
// we could also copy the map but this does not seem worth it
535
// sync the hostNames with c.getHostsEntries()
536
hostNames := []string{c.Hostname(), c.config.Name}
537
oldHostEntries := etchosts.GetNetworkHostEntries(networkStatus, hostNames...)
539
// update network status
540
if networkStatus == nil {
541
networkStatus = make(map[string]types.StatusBlock, 1)
543
networkStatus[netName] = results[netName]
544
c.state.NetworkStatus = networkStatus
551
// The first network needs a port reload to set the correct child ip for the rootlessport process.
552
// Adding a second network does not require a port reload because the child ip is still valid.
553
if rootless.IsRootless() && len(networks) == 0 {
554
if err := c.reloadRootlessRLKPortMapping(); err != nil {
559
ipv6 := c.checkForIPv6(networkStatus)
561
// Update resolv.conf if required
562
stringIPs := make([]string, 0, len(results[netName].DNSServerIPs))
563
for _, ip := range results[netName].DNSServerIPs {
564
if (ip.To4() == nil) && !ipv6 {
567
stringIPs = append(stringIPs, ip.String())
569
if len(stringIPs) > 0 {
570
logrus.Debugf("Adding DNS Servers %v to resolv.conf", stringIPs)
571
if err := c.addNameserver(stringIPs); err != nil {
576
// update /etc/hosts file
577
if file, ok := c.state.BindMounts[config.DefaultHostsFile]; ok {
578
// make sure to lock this file to prevent concurrent writes when
579
// this is used a net dependency container
580
lock, err := lockfile.GetLockFile(file)
582
return fmt.Errorf("failed to lock hosts file: %w", err)
584
new := etchosts.GetNetworkHostEntries(results, hostNames...)
585
logrus.Debugf("Add /etc/hosts entries %v", new)
586
// use special AddIfExists API to make sure we only add new entries if an old one exists
587
// see the AddIfExists() comment for more information
589
err = etchosts.AddIfExists(file, oldHostEntries, new)
599
// get a free interface name for a new network
600
// return an empty string if no free name was found
601
func getFreeInterfaceName(networks map[string]types.PerNetworkOptions) string {
602
ifNames := make([]string, 0, len(networks))
603
for _, opts := range networks {
604
ifNames = append(ifNames, opts.InterfaceName)
606
for i := 0; i < 100000; i++ {
607
ifName := fmt.Sprintf("eth%d", i)
608
if !slices.Contains(ifNames, ifName) {
615
func getExtraNetworkAliases(c *Container) []string {
616
// always add the short id as alias for docker compat
617
alias := []string{c.config.ID[:12]}
618
// if an explicit hostname was set add it as well
619
if c.config.Spec.Hostname != "" {
620
alias = append(alias, c.config.Spec.Hostname)
625
// DisconnectContainerFromNetwork removes a container from its network
626
func (r *Runtime) DisconnectContainerFromNetwork(nameOrID, netName string, force bool) error {
627
ctr, err := r.LookupContainer(nameOrID)
631
return ctr.NetworkDisconnect(nameOrID, netName, force)
634
// ConnectContainerToNetwork connects a container to a network
635
func (r *Runtime) ConnectContainerToNetwork(nameOrID, netName string, netOpts types.PerNetworkOptions) error {
636
ctr, err := r.LookupContainer(nameOrID)
640
return ctr.NetworkConnect(nameOrID, netName, netOpts)
643
// normalizeNetworkName takes a network name, a partial or a full network ID and
644
// returns: 1) the network name and 2) the network_interface name for macvlan
645
// and ipvlan drivers if the naming pattern is "device" defined in the
646
// containers.conf file. Else, "".
647
// If the network is not found an error is returned.
648
func (r *Runtime) normalizeNetworkName(nameOrID string) (string, string, error) {
649
net, err := r.network.NetworkInspect(nameOrID)
655
namingPattern := r.config.Containers.InterfaceName
656
if namingPattern == "device" && (net.Driver == types.MacVLANNetworkDriver || net.Driver == types.IPVLANNetworkDriver) {
657
netIface = net.NetworkInterface
660
return net.Name, netIface, nil
663
// ocicniPortsToNetTypesPorts convert the old port format to the new one
664
// while deduplicating ports into ranges
665
func ocicniPortsToNetTypesPorts(ports []types.OCICNIPortMapping) []types.PortMapping {
670
newPorts := make([]types.PortMapping, 0, len(ports))
672
// first sort the ports
673
sort.Slice(ports, func(i, j int) bool {
674
return compareOCICNIPorts(ports[i], ports[j])
677
// we already check if the slice is empty so we can use the first element
678
currentPort := types.PortMapping{
679
HostIP: ports[0].HostIP,
680
HostPort: uint16(ports[0].HostPort),
681
ContainerPort: uint16(ports[0].ContainerPort),
682
Protocol: ports[0].Protocol,
686
for i := 1; i < len(ports); i++ {
687
if ports[i].HostIP == currentPort.HostIP &&
688
ports[i].Protocol == currentPort.Protocol &&
689
ports[i].HostPort-int32(currentPort.Range) == int32(currentPort.HostPort) &&
690
ports[i].ContainerPort-int32(currentPort.Range) == int32(currentPort.ContainerPort) {
693
newPorts = append(newPorts, currentPort)
694
currentPort = types.PortMapping{
695
HostIP: ports[i].HostIP,
696
HostPort: uint16(ports[i].HostPort),
697
ContainerPort: uint16(ports[i].ContainerPort),
698
Protocol: ports[i].Protocol,
703
newPorts = append(newPorts, currentPort)
707
// compareOCICNIPorts will sort the ocicni ports by
712
func compareOCICNIPorts(i, j types.OCICNIPortMapping) bool {
713
if i.HostIP != j.HostIP {
714
return i.HostIP < j.HostIP
717
if i.Protocol != j.Protocol {
718
return i.Protocol < j.Protocol
721
if i.HostPort != j.HostPort {
722
return i.HostPort < j.HostPort
725
return i.ContainerPort < j.ContainerPort