inspektor-gadget
393 строки · 10.2 Кб
1// Copyright 2022-2023 The Inspektor Gadget authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package networktracer installs the dispatcher ebpf program in each network
16// namespace of interest. The dispatcher program runs a tail call to the actual
17// gadget program.
18//
19// This is done both for builtin gadgets and containerized gadgets. In the case
20// of containerized gadgets, the dispatcher program is installed before
21// knowning the actual gadget program. Once it knows the actual gadget program,
22// the tail call map is updated.
23//
24// In the case of builtin gadgets, the Run() method can be called to fetch and
25// process events from ebpf. The containerized gadgets won't call Run() because
26// run/tracer.go fetches and processes the events themselves. Instead, it will
27// just call AttachProg().
28//
29// The actual gadget program is instantiated only once for performance reason.
30// The network namespace is passed to the actual gadget program via the
31// skb->cb[0] variable.
32//
33// https://github.com/inspektor-gadget/inspektor-gadget/blob/main/docs/devel/network-gadget-dispatcher.png
34package networktracer35
36import (37"errors"38"fmt"39"os"40"strings"41"sync"42"syscall"43"unsafe"44
45"github.com/cilium/ebpf"46"github.com/cilium/ebpf/perf"47"golang.org/x/sys/unix"48
49containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection"50containerutils "github.com/inspektor-gadget/inspektor-gadget/pkg/container-utils"51"github.com/inspektor-gadget/inspektor-gadget/pkg/gadgets"52"github.com/inspektor-gadget/inspektor-gadget/pkg/rawsock"53"github.com/inspektor-gadget/inspektor-gadget/pkg/socketenricher"54"github.com/inspektor-gadget/inspektor-gadget/pkg/types"55)
56
57//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -target bpfel -cc clang -cflags ${CFLAGS} dispatcher ./bpf/dispatcher.bpf.c -- -I./bpf/ -I../socketenricher/bpf
58
59type attachment struct {60dispatcherObjs dispatcherObjects
61
62sockFd int63
64// users keeps track of the users' pid that have called Attach(). This can65// happen for two reasons:66// 1. several containers in a pod (sharing the netns)67// 2. pods with networkHost=true68// In both cases, we want to attach the BPF program only once.69users map[uint32]struct{}70}
71
72type Tracer[Event any] struct {73socketEnricherMap *ebpf.Map74dispatcherMap *ebpf.Map75collection *ebpf.Collection76prog *ebpf.Program77perfRd *perf.Reader78
79// key: network namespace inode number80// value: Tracelet81attachments map[uint64]*attachment82
83eventHandler func(ev *Event)84
85// mu protects attachments from concurrent access86// AttachContainer and DetachContainer can be called in parallel87mu sync.Mutex88}
89
90func (t *Tracer[Event]) newAttachment(91pid uint32,92netns uint64,93) (_ *attachment, err error) {94a := &attachment{95sockFd: -1,96users: map[uint32]struct{}{pid: {}},97}98defer func() {99if err != nil {100if a.sockFd != -1 {101unix.Close(a.sockFd)102}103a.dispatcherObjs.Close()104}105}()106
107dispatcherSpec, err := loadDispatcher()108if err != nil {109return nil, err110}111
112u32netns := uint32(netns)113consts := map[string]interface{}{114"current_netns": u32netns,115}116if err := dispatcherSpec.RewriteConstants(consts); err != nil {117return nil, fmt.Errorf("RewriteConstants while attaching to pid %d: %w", pid, err)118}119opts := ebpf.CollectionOptions{120MapReplacements: map[string]*ebpf.Map{121"tail_call": t.dispatcherMap,122},123}124if err = dispatcherSpec.LoadAndAssign(&a.dispatcherObjs, &opts); err != nil {125return nil, fmt.Errorf("loading ebpf program: %w", err)126}127
128a.sockFd, err = rawsock.OpenRawSock(pid)129if err != nil {130return nil, fmt.Errorf("opening raw socket: %w", err)131}132
133if err := syscall.SetsockoptInt(a.sockFd, syscall.SOL_SOCKET, unix.SO_ATTACH_BPF, a.dispatcherObjs.IgNetDisp.FD()); err != nil {134return nil, fmt.Errorf("attaching BPF program: %w", err)135}136return a, nil137}
138
139func NewTracer[Event any]() (_ *Tracer[Event], err error) {140t := &Tracer[Event]{141attachments: make(map[uint64]*attachment),142}143
144// Keep in sync with tail_call map in bpf/dispatcher.bpf.c145dispatcherMapSpec := ebpf.MapSpec{146Name: "tail_call",147Type: ebpf.ProgramArray,148KeySize: 4,149ValueSize: 4,150MaxEntries: 1,151}152t.dispatcherMap, err = ebpf.NewMap(&dispatcherMapSpec)153if err != nil {154return nil, fmt.Errorf("creating tail_call map: %w", err)155}156return t, nil157}
158
159func (t *Tracer[Event]) SetSocketEnricherMap(m *ebpf.Map) {160t.socketEnricherMap = m161}
162
163func (t *Tracer[Event]) Run(164spec *ebpf.CollectionSpec,165baseEvent func(ev types.Event) *Event,166processEvent func(rawSample []byte, netns uint64) (*Event, error),167) (err error) {168gadgets.FixBpfKtimeGetBootNs(spec.Programs)169
170defer func() {171if err != nil {172if t.perfRd != nil {173t.perfRd.Close()174}175if t.collection != nil {176t.collection.Close()177}178}179}()180
181var opts ebpf.CollectionOptions182
183// Automatically find the socket program184bpfProgName := ""185for progName, p := range spec.Programs {186if p.Type == ebpf.SocketFilter && strings.HasPrefix(p.SectionName, "socket") {187if bpfProgName != "" {188return fmt.Errorf("multiple socket programs found: %s, %s", bpfProgName, progName)189}190bpfProgName = progName191}192}193if bpfProgName == "" {194return fmt.Errorf("no socket program found")195}196
197// Automatically find the perf map198bpfPerfMapName := ""199for mapName, m := range spec.Maps {200if m.Type == ebpf.PerfEventArray {201if bpfPerfMapName != "" {202return fmt.Errorf("multiple perf maps found: %s, %s", bpfPerfMapName, mapName)203}204bpfPerfMapName = mapName205}206}207if bpfPerfMapName == "" {208return fmt.Errorf("no perf map found")209}210
211usesSocketEnricher := false212for _, m := range spec.Maps {213if m.Name == socketenricher.SocketsMapName {214usesSocketEnricher = true215break216}217}218
219if usesSocketEnricher && t.socketEnricherMap != nil {220mapReplacements := map[string]*ebpf.Map{}221mapReplacements[socketenricher.SocketsMapName] = t.socketEnricherMap222opts.MapReplacements = mapReplacements223}224
225t.collection, err = ebpf.NewCollectionWithOptions(spec, opts)226if err != nil {227return fmt.Errorf("creating BPF collection: %w", err)228}229
230t.perfRd, err = perf.NewReader(t.collection.Maps[bpfPerfMapName], gadgets.PerfBufferPages*os.Getpagesize())231if err != nil {232return fmt.Errorf("getting a perf reader: %w", err)233}234
235var ok bool236t.prog, ok = t.collection.Programs[bpfProgName]237if !ok {238return fmt.Errorf("BPF program %q not found", bpfProgName)239}240
241err = t.AttachProg(t.prog)242if err != nil {243return fmt.Errorf("updating tail call map: %w", err)244}245
246go t.listen(baseEvent, processEvent)247
248return nil249}
250
251// AttachProg is used directly by containerized gadgets
252func (t *Tracer[Event]) AttachProg(prog *ebpf.Program) error {253return t.dispatcherMap.Update(uint32(0), uint32(prog.FD()), ebpf.UpdateAny)254}
255
256func (t *Tracer[Event]) Attach(pid uint32) error {257t.mu.Lock()258defer t.mu.Unlock()259
260netns, err := containerutils.GetNetNs(int(pid))261if err != nil {262return fmt.Errorf("getting network namespace of pid %d: %w", pid, err)263}264if a, ok := t.attachments[netns]; ok {265a.users[pid] = struct{}{}266return nil267}268
269a, err := t.newAttachment(pid, netns)270if err != nil {271return fmt.Errorf("creating network tracer attachment for pid %d: %w", pid, err)272}273t.attachments[netns] = a274
275return nil276}
277
278func (t *Tracer[Event]) SetEventHandler(handler any) {279if t.eventHandler != nil {280panic("handler already set")281}282
283nh, ok := handler.(func(ev *Event))284if !ok {285panic("event handler invalid")286}287t.eventHandler = nh288}
289
290// EventCallback provides support for legacy pkg/gadget-collection
291func (t *Tracer[Event]) EventCallback(event any) {292e, ok := event.(*Event)293if !ok {294panic("event handler argument invalid")295}296if t.eventHandler == nil {297return298}299t.eventHandler(e)300}
301
302func (t *Tracer[Event]) AttachContainer(container *containercollection.Container) error {303return t.Attach(container.Pid)304}
305
306func (t *Tracer[Event]) DetachContainer(container *containercollection.Container) error {307return t.Detach(container.Pid)308}
309
310func (t *Tracer[Event]) GetMap(name string) *ebpf.Map {311return t.collection.Maps[name]312}
313
314func (t *Tracer[Event]) listen(315baseEvent func(ev types.Event) *Event,316processEvent func(rawSample []byte, netns uint64) (*Event, error),317) {318for {319record, err := t.perfRd.Read()320if err != nil {321if errors.Is(err, perf.ErrClosed) {322return323}324
325msg := fmt.Sprintf("Error reading perf ring buffer: %s", err)326t.eventHandler(baseEvent(types.Err(msg)))327return328}329
330if record.LostSamples != 0 {331msg := fmt.Sprintf("lost %d samples", record.LostSamples)332t.eventHandler(baseEvent(types.Warn(msg)))333continue334}335
336if len(record.RawSample) < 4 {337t.eventHandler(baseEvent(types.Err("record too small")))338continue339}340
341// all networking gadgets have netns as first field342netns := *(*uint32)(unsafe.Pointer(&record.RawSample[0]))343event, err := processEvent(record.RawSample, uint64(netns))344if err != nil {345t.eventHandler(baseEvent(types.Err(err.Error())))346continue347}348if event == nil {349continue350}351t.eventHandler(event)352}353}
354
355func (t *Tracer[Event]) releaseAttachment(netns uint64, a *attachment) {356unix.Close(a.sockFd)357a.dispatcherObjs.Close()358delete(t.attachments, netns)359}
360
361func (t *Tracer[Event]) Detach(pid uint32) error {362t.mu.Lock()363defer t.mu.Unlock()364
365for netns, a := range t.attachments {366if _, ok := a.users[pid]; ok {367delete(a.users, pid)368if len(a.users) == 0 {369t.releaseAttachment(netns, a)370}371return nil372}373}374return fmt.Errorf("pid %d is not attached", pid)375}
376
377func (t *Tracer[Event]) Close() {378t.mu.Lock()379defer t.mu.Unlock()380
381if t.perfRd != nil {382t.perfRd.Close()383}384if t.collection != nil {385t.collection.Close()386}387for key, l := range t.attachments {388t.releaseAttachment(key, l)389}390if t.dispatcherMap != nil {391t.dispatcherMap.Close()392}393}
394