From bc6520e633213284fec6a9d8b5fb2227ccf3b3fa Mon Sep 17 00:00:00 2001
From: Djalal Harouni <tixxdz@gmail.com>
Date: Wed, 31 Jul 2024 10:53:08 +0100
Subject: [PATCH] collections: take snapshot for debugging

Signed-off-by: Djalal Harouni <tixxdz@gmail.com>
---
 pkg/defaults/defaults.go  |  3 +++
 pkg/sensors/collection.go | 52 +++++++++++++++++++++++++++++++++++++++
 pkg/sensors/handler.go    | 17 ++++++++++---
 pkg/sensors/manager.go    |  6 +++++
 4 files changed, 74 insertions(+), 4 deletions(-)

diff --git a/pkg/defaults/defaults.go b/pkg/defaults/defaults.go
index cb26d0eb420..d2cbc443836 100644
--- a/pkg/defaults/defaults.go
+++ b/pkg/defaults/defaults.go
@@ -45,6 +45,9 @@ const (
 
 	// Pid file where to write tetragon main PID
 	DefaultPidFile = DefaultRunDir + "tetragon.pid"
+
+	// Collections snapshot directory
+	DefaultColDir = DefaultRunDir + "collections"
 )
 
 var (
diff --git a/pkg/sensors/collection.go b/pkg/sensors/collection.go
index 979517b618e..797e656fd16 100644
--- a/pkg/sensors/collection.go
+++ b/pkg/sensors/collection.go
@@ -4,12 +4,16 @@
 package sensors
 
 import (
+	"bufio"
 	"fmt"
+	"os"
+	"path/filepath"
 	"sync"
 
 	"github.com/cilium/tetragon/api/v1/tetragon"
 	"github.com/cilium/tetragon/pkg/tracingpolicy"
 	"go.uber.org/multierr"
+	"sigs.k8s.io/yaml"
 )
 
 type TracingPolicyState int
@@ -144,6 +148,54 @@ func (c *collection) destroy() {
 	}
 }
 
+// Snapshot the collection state on disk
+func (c *collection) snapshot(path string, filename string) error {
+	os.MkdirAll(path, 0700)
+	f, err := os.CreateTemp(path, "col-temp-*")
+	if err != nil {
+		return err
+	}
+	name := f.Name()
+	defer os.Remove(name)
+	w := bufio.NewWriter(f)
+	if c.tracingpolicy != nil {
+		_, err = w.WriteString(fmt.Sprintf("name=%s\n", c.name))
+		if err == nil {
+			_, err = w.WriteString(fmt.Sprintf("id=%d\n", c.tracingpolicyID))
+		}
+		if err == nil {
+			_, err = w.WriteString(fmt.Sprintf("state=%s\n", c.state.ToTetragonState()))
+		}
+		if err == nil {
+			_, err = w.WriteString(fmt.Sprintf("err=%v\n", c.err))
+		}
+		if err == nil {
+			spec, err := yaml.Marshal(c.tracingpolicy.TpSpec())
+			if err == nil {
+				_, err = w.WriteString(fmt.Sprintf("TracingPolicy=%s\n", string(spec)))
+			}
+		}
+	}
+	if err != nil {
+		return err
+	}
+	w.Flush()
+	err = f.Close()
+	if err != nil {
+		return err
+	}
+
+	err = os.Chmod(name, 0600)
+	if err == nil {
+		err = os.Rename(name, filepath.Join(path, filename))
+	}
+	return err
+}
+
+func (c *collection) deleteSnapshot(path string) error {
+	return os.Remove(path)
+}
+
 func (cm *collectionMap) listPolicies() []*tetragon.TracingPolicyStatus {
 	cm.mu.RLock()
 	defer cm.mu.RUnlock()
diff --git a/pkg/sensors/handler.go b/pkg/sensors/handler.go
index c7cda782cb9..3d5fbf7b531 100644
--- a/pkg/sensors/handler.go
+++ b/pkg/sensors/handler.go
@@ -5,12 +5,18 @@ package sensors
 
 import (
 	"fmt"
+	"path/filepath"
 
 	slimv1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/apis/meta/v1"
+	"github.com/cilium/tetragon/pkg/defaults"
 	"github.com/cilium/tetragon/pkg/policyfilter"
 	"github.com/cilium/tetragon/pkg/tracingpolicy"
 )
 
+const (
+	colRunPath = defaults.DefaultRunDir + "collections"
+)
+
 type handler struct {
 	collections *collectionMap
 	bpfDir      string
@@ -142,13 +148,14 @@ func (h *handler) addTracingPolicy(op *tracingPolicyAdd) error {
 	err = col.load(h.bpfDir)
 	h.collections.mu.Lock()
 
-	if err != nil {
+	if err == nil {
+		col.state = EnabledState
+	} else {
 		col.err = err
 		col.state = LoadErrorState
-		return err
 	}
-	col.state = EnabledState
-	return nil
+	col.snapshot(filepath.Join(colRunPath, op.ck.namespace), col.name)
+	return err
 }
 
 func (h *handler) deleteTracingPolicy(op *tracingPolicyDelete) error {
@@ -172,6 +179,8 @@ func (h *handler) deleteTracingPolicy(op *tracingPolicyDelete) error {
 		return fmt.Errorf("failed to remove from policyfilter: %w", err)
 	}
 
+	snapShotPath := filepath.Join(colRunPath, op.ck.namespace, col.name)
+	col.deleteSnapshot(snapShotPath)
 	return nil
 }
 
diff --git a/pkg/sensors/manager.go b/pkg/sensors/manager.go
index e080c879928..5275098aa8b 100644
--- a/pkg/sensors/manager.go
+++ b/pkg/sensors/manager.go
@@ -7,9 +7,11 @@ import (
 	"context"
 	"errors"
 	"fmt"
+	"os"
 	"strings"
 
 	"github.com/cilium/tetragon/api/v1/tetragon"
+	"github.com/cilium/tetragon/pkg/defaults"
 	"github.com/cilium/tetragon/pkg/k8s/apis/cilium.io/v1alpha1"
 	"github.com/cilium/tetragon/pkg/logger"
 	"github.com/cilium/tetragon/pkg/policyfilter"
@@ -48,6 +50,9 @@ func StartSensorManager(
 		return nil, err
 	}
 
+	// Initialize collections snapshot directory
+	os.MkdirAll(defaults.DefaultColDir, 0700)
+
 	// NB: pass handler.collections as a policy lister so that the manager can list policies
 	// without having to go via the manager goroutine.
 	return startSensorManager(handler, handler.collections, waitChan)
@@ -106,6 +111,7 @@ func startSensorManager(
 			op_.sensorOpDone(err)
 		}
 	}()
+	os.RemoveAll(defaults.DefaultColDir)
 	return &m, nil
 }