tetragon
1// SPDX-License-Identifier: Apache-2.0
2// Copyright Authors of Tetragon
3
4package sensors5
6import (7"context"8"errors"9"fmt"10"strings"11
12"github.com/cilium/tetragon/api/v1/tetragon"13"github.com/cilium/tetragon/pkg/k8s/apis/cilium.io/v1alpha1"14"github.com/cilium/tetragon/pkg/logger"15"github.com/cilium/tetragon/pkg/policyfilter"16"github.com/cilium/tetragon/pkg/tracingpolicy"17)
18
19type SensorStatus struct {20Name string21Enabled bool22Collection string23}
24
25// StartSensorManager initializes the sensorCtlHandle by spawning a sensor
26// controller goroutine.
27//
28// The purpose of this goroutine is to serialize loading and unloading of
29// sensors as requested from different goroutines (e.g., different GRPC
30// clients).
31//
32// if waitChan is not nil, the serving of sensor requests will block until
33// something is received. The intention of this is to allow the main function
34// to first load the base sensor before the sensor manager starts loading other sensors.
35func StartSensorManager(36bpfDir string,37waitChan chan struct{},38) (*Manager, error) {39pfState, err := policyfilter.GetState()40if err != nil {41return nil, fmt.Errorf("failed to initialize policy filter state: %w", err)42}43
44colMap := newCollectionMap()45
46handler, err := newHandler(pfState, colMap, bpfDir)47if err != nil {48return nil, err49}50
51// NB: pass handler.collections as a policy lister so that the manager can list policies52// without having to go via the manager goroutine.53return startSensorManager(handler, handler.collections, waitChan)54}
55
56func startSensorManager(57handler *handler,58policyLister policyLister,59waitChan chan struct{},60) (*Manager, error) {61c := make(chan sensorOp)62m := Manager{63sensorCtl: c,64policyLister: policyLister,65}66
67go func() {68
69// wait until start serving requests70if waitChan != nil {71logger.GetLogger().Infof("sensor controller waiting on channel")72<-waitChan73logger.GetLogger().Infof("sensor controller starts")74}75
76done := false77for !done {78op_ := <-c79err := errors.New("BUG in SensorCtl: unset error value") // nolint80switch op := op_.(type) {81case *tracingPolicyAdd:82err = handler.addTracingPolicy(op)83case *tracingPolicyDelete:84err = handler.deleteTracingPolicy(op)85case *tracingPolicyEnable:86err = handler.enableTracingPolicy(op)87case *tracingPolicyDisable:88err = handler.disableTracingPolicy(op)89case *sensorAdd:90err = handler.addSensor(op)91case *sensorRemove:92err = handler.removeSensor(op)93case *sensorEnable:94err = handler.enableSensor(op)95case *sensorDisable:96err = handler.disableSensor(op)97case *sensorList:98err = handler.listSensors(op)99case *sensorCtlStop:100logger.GetLogger().Debugf("stopping sensor controller...")101done = true102err = nil103default:104err = fmt.Errorf("unknown sensorOp: %v", op)105}106op_.sensorOpDone(err)107}108}()109return &m, nil110}
111
112/*
113* Sensor operations
114*/
115
116// EnableSensor enables a sensor by name
117func (h *Manager) EnableSensor(ctx context.Context, name string) error {118retc := make(chan error)119op := &sensorEnable{120ctx: ctx,121name: name,122retChan: retc,123}124
125h.sensorCtl <- op126err := <-retc127
128return err129}
130
131// AddSensor adds a sensor
132func (h *Manager) AddSensor(ctx context.Context, name string, sensor *Sensor) error {133retc := make(chan error)134op := &sensorAdd{135ctx: ctx,136name: name,137sensor: sensor,138retChan: retc,139}140
141h.sensorCtl <- op142return <-retc143}
144
145// DisableSensor disables a sensor by name
146func (h *Manager) DisableSensor(ctx context.Context, name string) error {147retc := make(chan error)148op := &sensorDisable{149ctx: ctx,150name: name,151retChan: retc,152}153
154h.sensorCtl <- op155return <-retc156}
157
158func (h *Manager) ListSensors(ctx context.Context) (*[]SensorStatus, error) {159retc := make(chan error)160op := &sensorList{161ctx: ctx,162retChan: retc,163}164
165h.sensorCtl <- op166err := <-retc167if err == nil {168return op.result, nil169}170
171return nil, err172}
173
174// TracingPolicy is an interface for a tracing policy
175// This is implemented by v1alpha1.types.TracingPolicy and
176// config.GenericTracingConf. The former is what is the k8s API server uses,
177// and the latter is used when we load files directly (e.g., via the cli).
178type TracingPolicy interface {179// TpName returns the name of the policy.180TpName() string181// TpSpec returns the specification of the policy182TpSpec() *v1alpha1.TracingPolicySpec183// TpInfo returns a description of the policy184TpInfo() string185}
186
187// AddTracingPolicy adds a new sensor based on a tracing policy
188// NB: if tp implements tracingpolicy.TracingPolicyNamespaced, it will be
189// treated as a namespaced policy
190func (h *Manager) AddTracingPolicy(ctx context.Context, tp tracingpolicy.TracingPolicy) error {191retc := make(chan error)192var namespace string193if tpNs, ok := tp.(tracingpolicy.TracingPolicyNamespaced); ok {194namespace = tpNs.TpNamespace()195}196ck := collectionKey{tp.TpName(), namespace}197op := &tracingPolicyAdd{198ctx: ctx,199ck: ck,200tp: tp,201retChan: retc,202}203
204h.sensorCtl <- op205err := <-retc206
207return err208}
209
210// DeleteTracingPolicy deletes a new sensor based on a tracing policy
211func (h *Manager) DeleteTracingPolicy(ctx context.Context, name string, namespace string) error {212retc := make(chan error)213ck := collectionKey{name, namespace}214op := &tracingPolicyDelete{215ctx: ctx,216ck: ck,217retChan: retc,218}219
220h.sensorCtl <- op221err := <-retc222
223return err224}
225
226func (h *Manager) EnableTracingPolicy(ctx context.Context, name, namespace string) error {227ck := collectionKey{name, namespace}228retc := make(chan error)229op := &tracingPolicyEnable{230ctx: ctx,231ck: ck,232retChan: retc,233}234
235h.sensorCtl <- op236err := <-retc237
238return err239}
240
241func (h *Manager) DisableTracingPolicy(ctx context.Context, name, namespace string) error {242ck := collectionKey{name, namespace}243retc := make(chan error)244op := &tracingPolicyDisable{245ctx: ctx,246ck: ck,247retChan: retc,248}249
250h.sensorCtl <- op251err := <-retc252
253return err254}
255
256// ListTracingPolicies returns a list of the active tracing policies
257func (h *Manager) ListTracingPolicies(_ context.Context) (*tetragon.ListTracingPoliciesResponse, error) {258ret := &tetragon.ListTracingPoliciesResponse{}259ret.Policies = h.listPolicies()260return ret, nil261}
262
263func (h *Manager) RemoveSensor(ctx context.Context, sensorName string) error {264retc := make(chan error)265op := &sensorRemove{266ctx: ctx,267name: sensorName,268retChan: retc,269}270
271h.sensorCtl <- op272err := <-retc273
274return err275}
276
277func (h *Manager) RemoveAllSensors(ctx context.Context) error {278retc := make(chan error)279op := &sensorRemove{280ctx: ctx,281all: true,282retChan: retc,283}284
285h.sensorCtl <- op286err := <-retc287
288return err289}
290
291func (h *Manager) StopSensorManager(ctx context.Context) error {292retc := make(chan error)293op := &sensorCtlStop{294ctx: ctx,295retChan: retc,296}297
298h.sensorCtl <- op299return <-retc300}
301
302func (h *Manager) LogSensorsAndProbes(ctx context.Context) {303log := logger.GetLogger()304sensors, err := h.ListSensors(ctx)305if err != nil {306log.WithError(err).Warn("failed to list sensors")307}308
309names := []string{}310for _, s := range *sensors {311names = append(names, s.Name)312}313log.WithField("sensors", strings.Join(names, ", ")).Info("Available sensors")314
315names = []string{}316for n := range registeredPolicyHandlers {317names = append(names, n)318}319log.WithField("policy-handlers", strings.Join(names, ", ")).Info("Registered sensors (policy-handlers)")320
321names = []string{}322for n := range registeredProbeLoad {323names = append(names, n)324}325log.WithField("types", strings.Join(names, ", ")).Info("Registered probe types")326}
327
328// policyLister allows read-only access to the collections map
329type policyLister interface {330listPolicies() []*tetragon.TracingPolicyStatus331}
332
333// Manager handles dynamic sensor management, such as adding / removing sensors
334// at runtime.
335type Manager struct {336// channel to communicate with the controller goroutine337sensorCtl sensorCtlHandle
338// policyLister is used to list policies without going via the controller goroutine by339// directly accessing the collection.340policyLister
341}
342
343// There are 6 commands that can be passed to the controller goroutine:
344// - tracingPolicyAdd
345// - tracingPolicyDel
346// - sensorList
347// - sensorEnable
348// - sensorDisable
349// - sensorRemove
350// - sensorCtlStop
351
352// tracingPolicyAdd adds a sensor based on a the provided tracing policy
353type tracingPolicyAdd struct {354ctx context.Context355ck collectionKey
356tp tracingpolicy.TracingPolicy357retChan chan error358}
359
360type tracingPolicyDelete struct {361ctx context.Context362ck collectionKey
363retChan chan error364}
365
366type tracingPolicyDisable struct {367ctx context.Context368ck collectionKey
369retChan chan error370}
371
372type tracingPolicyEnable struct {373ctx context.Context374ck collectionKey
375retChan chan error376}
377
378// sensorOp is an interface for the sensor operations.
379// Not strictly needed but allows for better type checking.
380type sensorOp interface {381sensorOpDone(error)382}
383
384// sensorAdd adds a sensor
385type sensorAdd struct {386ctx context.Context387name string388sensor *Sensor389retChan chan error390}
391
392// sensorRemove removes a sensor (for now, used only for tracing policies)
393type sensorRemove struct {394ctx context.Context395name string396all bool397retChan chan error398}
399
400// sensorEnable enables a sensor
401type sensorEnable struct {402ctx context.Context403name string404retChan chan error405}
406
407// sensorDisable disables a sensor
408type sensorDisable struct {409ctx context.Context410name string411retChan chan error412}
413
414// sensorList returns a list of the active sensors
415type sensorList struct {416ctx context.Context417result *[]SensorStatus418retChan chan error419}
420
421// sensorCtlStop stops the controller
422type sensorCtlStop struct {423ctx context.Context424retChan chan error425}
426
427type LoadArg struct{}428type UnloadArg = LoadArg429
430// trivial sensorOpDone implementations for commands
431func (s *tracingPolicyAdd) sensorOpDone(e error) { s.retChan <- e }432func (s *tracingPolicyDelete) sensorOpDone(e error) { s.retChan <- e }433func (s *tracingPolicyEnable) sensorOpDone(e error) { s.retChan <- e }434func (s *tracingPolicyDisable) sensorOpDone(e error) { s.retChan <- e }435func (s *sensorAdd) sensorOpDone(e error) { s.retChan <- e }436func (s *sensorRemove) sensorOpDone(e error) { s.retChan <- e }437func (s *sensorEnable) sensorOpDone(e error) { s.retChan <- e }438func (s *sensorDisable) sensorOpDone(e error) { s.retChan <- e }439func (s *sensorList) sensorOpDone(e error) { s.retChan <- e }440func (s *sensorCtlStop) sensorOpDone(e error) { s.retChan <- e }441
442type sensorCtlHandle = chan<- sensorOp443