tetragon
1// SPDX-License-Identifier: Apache-2.0
2// Copyright Authors of Tetragon
3
4package test5
6import (7"os/exec"8"runtime"9"testing"10
11"github.com/cilium/tetragon/api/v1/tetragon"12"github.com/cilium/tetragon/pkg/testutils"13"github.com/sirupsen/logrus"14
15ec "github.com/cilium/tetragon/api/v1/tetragon/codegen/eventchecker"16)
17
18//revive:disable
19
20// TestEventChecker is a checker that relies on:
21// - the test sensor being loaded
22// - user-space executing hooks that trigger the test sensor on all cores
23// (see contrib/tester-progs/trigger-test-events).
24//
25// The typical structure of a test is:
26// 1. start observer
27// 2. do some things on user-space
28// 3. check that we get the expected events from tetragon
29//
30// In such a test there is no way to determine when to stop looking for events. Hence, we retry
31// step 3 a number of times to gain confidence that all events from step 2 have been processed.
32// These retries induce a significant time cost in failing tests or in tests that check the absence
33// of events (e.g., when doing filtering).
34//
35// TestEventChecker enables testing without timeouts. Timeouts are still used for robustness, but
36// assuming TestEventChecker works correctly they are not needed.
37//
38// After step 2, we trigger the hook of the test sensor (a simple sensor that generates test events)
39// on all CPUs. Once we have seen all the test events (on all CPUs), then we know that if we expect
40// any events, they are not there because events cannot be reordered on the same CPU.
41type TestEventChecker struct {42// eventChecker is the underlying event checker43eventChecker ec.MultiEventChecker44completionChecker *CompletionChecker45}
46
47// TestCheckerMarkEnd executes the necessary operations to mark the end of event stream on all CPUs
48func TestCheckerMarkEnd(t *testing.T) {49testBin := testutils.RepoRootPath("contrib/tester-progs/trigger-test-events")50testCmd := exec.Command(testBin)51err := testCmd.Run()52if err != nil {53t.Fatalf("error executing command: %v", err)54}55}
56
57//revive:enable
58
59func NewTestChecker(c ec.MultiEventChecker) *TestEventChecker {60ret := TestEventChecker{61eventChecker: c,62completionChecker: NewCompletionChecker(),63}64
65return &ret66}
67
68// update updates the state bsaed on the given event
69func (tc *TestEventChecker) update(ev ec.Event) {70switch ev := ev.(type) {71case *tetragon.Test:72cpu := ev.Arg073tc.completionChecker.Update(cpu)74default:75}76}
77
78func (tc *TestEventChecker) NextEventCheck(ev ec.Event, l *logrus.Logger) (bool, error) {79if tc.completionChecker.Done() {80l.Info("seen events on all CPUs, finalizing test")81return true, tc.eventChecker.FinalCheck(l)82}83
84done, err := tc.eventChecker.NextEventCheck(ev, l)85if done {86// underlying checker done, just return its values87return true, err88}89
90// just update the state. In the next event, we wil check91// whether it's time to terminate or not.92tc.update(ev)93
94return false, err95}
96
97func (tc *TestEventChecker) FinalCheck(l *logrus.Logger) error {98// this means that we run out of events before seeing all test events.99// Just return what the underlying checker returns100tc.completionChecker.Reset()101return tc.eventChecker.FinalCheck(l)102}
103
104type CompletionChecker struct {105cpuDone map[uint64]bool106remCount int107}
108
109func NewCompletionChecker() *CompletionChecker {110ncpus := runtime.NumCPU()111ret := CompletionChecker{112cpuDone: make(map[uint64]bool, ncpus),113remCount: 0,114}115
116// NB: We assume CPU ids are consecutive. There are systems where this117// is not the caes (e.g., cores getting offline), but we ignore them118// for now.119ret.remCount = ncpus120for i := 0; i < ncpus; i++ {121ret.cpuDone[uint64(i)] = false122}123
124return &ret125}
126
127func (cc *CompletionChecker) Update(cpu uint64) {128prev := cc.cpuDone[cpu]129cc.cpuDone[cpu] = true130if !prev && cc.remCount > 0 {131cc.remCount--132}133}
134
135func (cc *CompletionChecker) Reset() {136for i := range cc.cpuDone {137cc.cpuDone[i] = false138}139cc.remCount = len(cc.cpuDone)140}
141
142func (cc *CompletionChecker) Done() bool {143return cc.remCount == 0144}
145