From 9c8587d1dbb9295ac62df1e56c96199c29025a4c Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Tue, 30 Jan 2024 14:20:20 +0100 Subject: [PATCH 01/18] feat: boilerplate --- go.mod | 2 + go.sum | 2 + pkg/checks/udptraceroute/traceroute.go | 159 +++++++++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 pkg/checks/udptraceroute/traceroute.go diff --git a/go.mod b/go.mod index 6adde4de..f7a06fe7 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,8 @@ require ( gopkg.in/yaml.v3 v3.0.1 ) +require github.com/aeden/traceroute v0.0.0-20210211061815-03f5f7cb7908 // indirect + require ( github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect diff --git a/go.sum b/go.sum index 7c135d54..3ed8ef82 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/aeden/traceroute v0.0.0-20210211061815-03f5f7cb7908 h1:6suDyKbvZ5r2G/gblQLV9Cdv7rdqNlUxsRXpLOF0rKM= +github.com/aeden/traceroute v0.0.0-20210211061815-03f5f7cb7908/go.mod h1:HPBB/4vaPt7NcN9l72/+IwsmDVQsa6AWM6ZDKJCLB9U= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= diff --git a/pkg/checks/udptraceroute/traceroute.go b/pkg/checks/udptraceroute/traceroute.go new file mode 100644 index 00000000..49c0661f --- /dev/null +++ b/pkg/checks/udptraceroute/traceroute.go @@ -0,0 +1,159 @@ +package udptraceroute + +import ( + "context" + "net" + "sync" + "time" + + "github.com/aeden/traceroute" + "github.com/caas-team/sparrow/internal/helper" + "github.com/caas-team/sparrow/internal/logger" + "github.com/caas-team/sparrow/pkg/checks" + "github.com/caas-team/sparrow/pkg/checks/types" + "github.com/getkin/kin-openapi/openapi3" + "github.com/prometheus/client_golang/prometheus" +) + +var _ checks.Check = (*UDPTraceroute)(nil) + +type config struct { + Targets []Target + Interval time.Duration `json:"interval" yaml:"interval" mapstructure:"interval"` + Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"` + Retry helper.RetryConfig `json:"retry" yaml:"retry" mapstructure:"retry"` +} + +type Target struct { + Addr string +} + +type UDPTraceroute struct { + types.CheckBase + config config + cResult chan<- types.Result +} + +type Result struct { + // The minimum amount of hops required to reach the target + NumHops int + // The path taken to the destination + Hops []Hop +} + +type Hop struct { + Addr net.IP + Latency time.Duration + Success bool +} + +// Run is called once per check interval +// this should error if there is a problem running the check +// Returns an error and a result. Returning a non nil error will cause a shutdown of the system +func (c *UDPTraceroute) Run(ctx context.Context) error { + ctx, cancel := logger.NewContextWithLogger(ctx) + defer cancel() + log := logger.FromContext(ctx) + log.Info("Starting latency check", "interval", c.config.Interval.String()) + + for { + select { + case <-ctx.Done(): + log.Error("Context canceled", "err", ctx.Err()) + return ctx.Err() + case <-c.Done: + return nil + case <-time.After(c.config.Interval): + res := c.check(ctx) + errval := "" + r := types.Result{ + Data: res, + Err: errval, + Timestamp: time.Now(), + } + + c.CResult <- r + log.Debug("Successfully finished latency check run") + } + } + + return nil +} + +func (c *UDPTraceroute) check(ctx context.Context) map[string]Result { + res := make(map[string]Result) + var resMu sync.Mutex + for _, t := range c.config.Targets { + tr, err := traceroute.Traceroute(t.Addr, nil) + if err != nil { + panic(err) + } + + // ip := net.IPv4(res.DestinationAddress[0], res.DestinationAddress[1], res.DestinationAddress[2], res.DestinationAddress[3]) + + r := Result{ + NumHops: len(tr.Hops), + Hops: []Hop{}, + } + + for _, h := range tr.Hops { + r.Hops = append(r.Hops, Hop{ + Addr: h.Address[:], + Latency: h.ElapsedTime, + }) + } + + resMu.Lock() + defer resMu.Unlock() + + res[t.Addr] = r + } + return +} + +// Startup is called once when the check is registered +// In the Run() method, the check should send results to the cResult channel +// this will cause sparrow to update its data store with the results +func (c *UDPTraceroute) Startup(ctx context.Context, cResult chan<- types.Result) error { + c.Mu.Lock() + defer c.Mu.Unlock() + c.cResult = cResult + return nil +} + +// Shutdown is called once when the check is unregistered or sparrow shuts down +func (c *UDPTraceroute) Shutdown(ctx context.Context) error { + return nil +} + +// SetConfig is called once when the check is registered +// This is also called while the check is running, if the remote config is updated +// This should return an error if the config is invalid +func (c *UDPTraceroute) SetConfig(ctx context.Context, cfg any) error { + decoded, err := helper.Decode[config](cfg) + if err != nil { + return err + } + err = validateConfig(decoded) + if err != nil { + return err + } + c.Mu.Lock() + defer c.Mu.Unlock() + c.config = decoded + return nil +} + +// Schema returns an openapi3.SchemaRef of the result type returned by the check +func (c *UDPTraceroute) Schema() (*openapi3.SchemaRef, error) { + return checks.OpenapiFromPerfData[Result](Result{}) +} + +// GetMetricCollectors allows the check to provide prometheus metric collectors +func (c *UDPTraceroute) GetMetricCollectors() []prometheus.Collector { + return []prometheus.Collector{} +} + +func validateConfig(c config) error { + return nil +} From e3301dddde3f2dcb0477a87a57e04cb6dcc80089 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Fri, 16 Feb 2024 18:48:49 +0100 Subject: [PATCH 02/18] feat: register udptraceroute --- go.mod | 5 +- go.sum | 4 +- pkg/checks/runtime/runtime.go | 21 ++- pkg/checks/udptraceroute/traceroute.go | 73 ++++++--- pkg/checks/udptraceroute/traceroute_test.go | 168 ++++++++++++++++++++ pkg/factory/factory.go | 8 +- 6 files changed, 252 insertions(+), 27 deletions(-) create mode 100644 pkg/checks/udptraceroute/traceroute_test.go diff --git a/go.mod b/go.mod index b5a12830..09669822 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,10 @@ require ( gopkg.in/yaml.v3 v3.0.1 ) -require github.com/aeden/traceroute v0.0.0-20210211061815-03f5f7cb7908 +require ( + github.com/aeden/traceroute v0.0.0-20210211061815-03f5f7cb7908 + github.com/google/go-cmp v0.6.0 +) require github.com/mitchellh/mapstructure v1.5.0 // indirect diff --git a/go.sum b/go.sum index 3ed8ef82..b1c89f63 100644 --- a/go.sum +++ b/go.sum @@ -29,8 +29,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= -github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= diff --git a/pkg/checks/runtime/runtime.go b/pkg/checks/runtime/runtime.go index bf33e7a7..31537771 100644 --- a/pkg/checks/runtime/runtime.go +++ b/pkg/checks/runtime/runtime.go @@ -23,15 +23,17 @@ import ( "github.com/caas-team/sparrow/pkg/checks/dns" "github.com/caas-team/sparrow/pkg/checks/health" "github.com/caas-team/sparrow/pkg/checks/latency" + "github.com/caas-team/sparrow/pkg/checks/udptraceroute" ) // Config holds the runtime configuration // for the various checks // the sparrow supports type Config struct { - Health *health.Config `yaml:"health" json:"health"` - Latency *latency.Config `yaml:"latency" json:"latency"` - Dns *dns.Config `yaml:"dns" json:"dns"` + Health *health.Config `yaml:"health" json:"health"` + Latency *latency.Config `yaml:"latency" json:"latency"` + Dns *dns.Config `yaml:"dns" json:"dns"` + Traceroute *udptraceroute.Config `yaml:"udptraceroute" json:"udptraceroute"` } // Empty returns true if no checks are configured @@ -51,6 +53,9 @@ func (c Config) Iter() []checks.Runtime { if c.Dns != nil { configs = append(configs, c.Dns) } + if c.Traceroute != nil { + configs = append(configs, c.Traceroute) + } return configs } @@ -66,6 +71,9 @@ func (c Config) size() int { if c.HasDNSCheck() { size++ } + if c.HasUdpTracerouteCheck() { + size++ + } return size } @@ -84,6 +92,11 @@ func (c Config) HasDNSCheck() bool { return c.Dns != nil } +// HasUdpTracerouteCheck returns true if the check has a udptraceroute check configured +func (c Config) HasUdpTracerouteCheck() bool { + return c.Traceroute != nil +} + // HasCheck returns true if the check has a check with the given name configured func (c Config) HasCheck(name string) bool { switch name { @@ -93,6 +106,8 @@ func (c Config) HasCheck(name string) bool { return c.HasLatencyCheck() case dns.CheckName: return c.HasDNSCheck() + case udptraceroute.CheckName: + return c.HasUdpTracerouteCheck() default: return false } diff --git a/pkg/checks/udptraceroute/traceroute.go b/pkg/checks/udptraceroute/traceroute.go index ba1e2c58..2f9cb018 100644 --- a/pkg/checks/udptraceroute/traceroute.go +++ b/pkg/checks/udptraceroute/traceroute.go @@ -2,7 +2,9 @@ package udptraceroute import ( "context" + "fmt" "net" + "sync" "time" "github.com/aeden/traceroute" @@ -15,7 +17,7 @@ import ( var ( _ checks.Check = (*UDPTraceroute)(nil) - checkname = "udptraceroute" + CheckName = "udptraceroute" ) type config struct { @@ -26,17 +28,35 @@ type config struct { } func (c config) For() string { - return checkname + return CheckName } type Target struct { Addr string } +func NewCheck() checks.Check { + return &UDPTraceroute{ + config: config{}, + traceroute: newTraceroute, + CheckBase: checks.CheckBase{ + Mu: sync.Mutex{}, + CResult: make(chan checks.Result), + Done: make(chan bool), + }, + } +} + type UDPTraceroute struct { checks.CheckBase - config config - cResult chan<- checks.Result + config config + traceroute tracerouteFactory +} + +type tracerouteFactory func(dest string) (traceroute.TracerouteResult, error) + +func newTraceroute(dest string) (traceroute.TracerouteResult, error) { + return traceroute.Traceroute(dest, nil) } type Result struct { @@ -69,7 +89,7 @@ func (c *UDPTraceroute) Run(ctx context.Context) error { case <-c.Done: return nil case <-time.After(c.config.Interval): - res := c.check(ctx) + res, _ := c.check(ctx) errval := "" r := checks.Result{ Data: res, @@ -89,30 +109,47 @@ func (c *UDPTraceroute) GetConfig() checks.Runtime { return c.config } -func (c *UDPTraceroute) check(_ context.Context) map[string]Result { +func (c *UDPTraceroute) check(_ context.Context) (map[string]Result, error) { res := make(map[string]Result) + var err error for _, t := range c.config.Targets { - tr, err := traceroute.Traceroute(t.Addr, nil) - if err != nil { - panic(err) + tr, trerr := c.traceroute(t.Addr) + if trerr != nil { + err = trerr + continue } - r := Result{ + result := Result{ NumHops: len(tr.Hops), Hops: []Hop{}, } for _, h := range tr.Hops { - r.Hops = append(r.Hops, Hop{ - Addr: h.Address[:], + result.Hops = append(result.Hops, Hop{ + Addr: net.IPv4(h.Address[0], h.Address[1], h.Address[2], h.Address[3]), Latency: h.ElapsedTime, Success: h.Success, }) } + res[t.Addr] = result + } + return res, err +} - res[t.Addr] = r +type MultiError struct { + errors []error +} + +func (m MultiError) Error() string { + result := "[" + if len(m.errors) > 0 { + result = m.errors[0].Error() + } + for _, err := range m.errors { + result = fmt.Sprintf("%s, %s", result, err.Error()) } - return res + result += "]" + return result } // Startup is called once when the check is registered @@ -121,7 +158,7 @@ func (c *UDPTraceroute) check(_ context.Context) map[string]Result { func (c *UDPTraceroute) Startup(_ context.Context, cResult chan<- checks.Result) error { c.Mu.Lock() defer c.Mu.Unlock() - c.cResult = cResult + c.CheckBase.CResult = cResult return nil } @@ -140,13 +177,13 @@ func (c *UDPTraceroute) SetConfig(cfg checks.Runtime) error { defer c.Mu.Unlock() c.config = *newConfg return checks.ErrConfigMismatch{ - Expected: checkname, + Expected: CheckName, Current: cfg.For(), } } if err := validateConfig(*newConfg); err != nil { return checks.ErrConfigMismatch{ - Expected: checkname, + Expected: CheckName, Current: cfg.For(), } } @@ -165,7 +202,7 @@ func (c *UDPTraceroute) GetMetricCollectors() []prometheus.Collector { } func (c *UDPTraceroute) Name() string { - return checkname + return CheckName } func validateConfig(_ config) error { diff --git a/pkg/checks/udptraceroute/traceroute_test.go b/pkg/checks/udptraceroute/traceroute_test.go new file mode 100644 index 00000000..61ef8882 --- /dev/null +++ b/pkg/checks/udptraceroute/traceroute_test.go @@ -0,0 +1,168 @@ +package udptraceroute + +import ( + "context" + "net" + "sync" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + + "github.com/aeden/traceroute" + "github.com/caas-team/sparrow/pkg/checks" +) + +func TestCheck(t *testing.T) { + type want struct { + expected map[string]Result + wantErr bool + err error + } + type testcase struct { + name string + c *UDPTraceroute + want want + } + + cases := []testcase{ + { + name: "Success 5 hops", + c: newForTest(success(5), []string{"8.8.8.8"}), + want: want{ + expected: map[string]Result{ + "8.8.8.8": { + NumHops: 5, + Hops: []Hop{ + {Addr: net.IPv4(0, 0, 0, 0), Latency: 0 * time.Second, Success: false}, + {Addr: net.IPv4(0, 0, 0, 1), Latency: 1 * time.Second, Success: false}, + {Addr: net.IPv4(0, 0, 0, 2), Latency: 2 * time.Second, Success: false}, + {Addr: net.IPv4(0, 0, 0, 3), Latency: 3 * time.Second, Success: false}, + {Addr: net.IPv4(8, 8, 8, 8), Latency: 69 * time.Second, Success: true}, + }, + }, + }, + wantErr: false, + }, + }, + { + name: "Traceroute internal error", + c: newForTest(returnError(&net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}), []string{"google.com"}), + want: want{ + wantErr: true, + expected: map[string]Result{}, + err: &net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}, + }, + }, + } + + for _, c := range cases { + res, err := c.c.check(context.Background()) + + if c.want.wantErr { + if err == nil { + t.Errorf("expected error, got nil") + } else { + if c.want.err.Error() != err.Error() { + t.Errorf("expected: %v, got: %v", c.want.err, err) + } + } + } + if !cmp.Equal(res, c.want.expected) { + diff := cmp.Diff(res, c.want.expected) + t.Errorf("unexpected result: -want +got\n%s", diff) + + } + + } + +} + +func newForTest(f tracerouteFactory, targets []string) *UDPTraceroute { + t := make([]Target, len(targets)) + for i, target := range targets { + t[i] = Target{Addr: target} + } + return &UDPTraceroute{ + config: config{ + Targets: t, + }, + traceroute: f, + CheckBase: checks.CheckBase{ + Mu: sync.Mutex{}, + CResult: make(chan checks.Result), + Done: make(chan bool), + }, + } + +} + +// success produces a tracerouteFactory that returns a traceroute result with nHops hops +func success(nHops int) tracerouteFactory { + return func(dest string) (traceroute.TracerouteResult, error) { + hops := make([]traceroute.TracerouteHop, nHops) + for i := 0; i < nHops-1; i++ { + hops[i] = traceroute.TracerouteHop{ + Success: false, + Address: ipFromInt(i), + N: nHops, + Host: "", + ElapsedTime: time.Duration(i) * time.Second, + TTL: i, + } + } + hops[nHops-1] = traceroute.TracerouteHop{ + Success: true, + Address: [4]byte{8, 8, 8, 8}, + N: nHops, + Host: "google-public-dns-a.google.com", + ElapsedTime: 69 * time.Second, + TTL: nHops, + } + + return traceroute.TracerouteResult{ + DestinationAddress: hops[nHops-1].Address, + Hops: hops, + }, nil + + } +} + +func returnError(err error) tracerouteFactory { + return func(dest string) (traceroute.TracerouteResult, error) { + return traceroute.TracerouteResult{}, err + } +} + +// ipFromInt takes in an int and builds an IP address from it +// Example: +// ipFromInt(300) -> 0.0.1.44 +func ipFromInt(i int) [4]byte { + b1 := i >> 24 & 0xFF + b2 := i >> 16 & 0xFF + b3 := i >> 8 & 0xFF + b4 := i & 0xFF + + return [4]byte{byte(b1), byte(b2), byte(b3), byte(b4)} +} + +func TestIpFromInt(t *testing.T) { + type testcase struct { + In int + Expected [4]byte + } + cases := []testcase{ + {In: 300, Expected: [4]byte{0, 0, 1, 44}}, + {In: 0, Expected: [4]byte{0, 0, 0, 0}}, + {In: (1 << 33) - 1, Expected: [4]byte{255, 255, 255, 255}}, + } + + for _, c := range cases { + t.Run("ipFromInt", func(t *testing.T) { + actual := ipFromInt(c.In) + if c.Expected != actual { + t.Errorf("expected: %v, actual: %v", c.Expected, actual) + } + }) + } +} diff --git a/pkg/factory/factory.go b/pkg/factory/factory.go index 9d28bdf2..87f9ecf1 100644 --- a/pkg/factory/factory.go +++ b/pkg/factory/factory.go @@ -26,6 +26,7 @@ import ( "github.com/caas-team/sparrow/pkg/checks/health" "github.com/caas-team/sparrow/pkg/checks/latency" "github.com/caas-team/sparrow/pkg/checks/runtime" + "github.com/caas-team/sparrow/pkg/checks/udptraceroute" ) // newCheck creates a new check instance from the given name @@ -57,7 +58,8 @@ func NewChecksFromConfig(cfg runtime.Config) (map[string]checks.Check, error) { // registry is a convenience map to create new checks var registry = map[string]func() checks.Check{ - health.CheckName: health.NewCheck, - latency.CheckName: latency.NewCheck, - dns.CheckName: dns.NewCheck, + health.CheckName: health.NewCheck, + latency.CheckName: latency.NewCheck, + dns.CheckName: dns.NewCheck, + udptraceroute.CheckName: udptraceroute.NewCheck, } From b1b79fcddd6d6d5172a76b055f4120c72711d482 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Fri, 16 Feb 2024 18:55:39 +0100 Subject: [PATCH 03/18] chore: fix nits --- pkg/checks/udptraceroute/traceroute.go | 57 ++++++++++++--------- pkg/checks/udptraceroute/traceroute_test.go | 13 ++--- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/pkg/checks/udptraceroute/traceroute.go b/pkg/checks/udptraceroute/traceroute.go index 2f9cb018..3eec685e 100644 --- a/pkg/checks/udptraceroute/traceroute.go +++ b/pkg/checks/udptraceroute/traceroute.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net" + "net/url" "sync" "time" @@ -20,43 +21,42 @@ var ( CheckName = "udptraceroute" ) -type config struct { - Targets []Target +type Config struct { + Targets []Target `json:"targets" yaml:"targets" mapstructure:"targets"` Interval time.Duration `json:"interval" yaml:"interval" mapstructure:"interval"` Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"` Retry helper.RetryConfig `json:"retry" yaml:"retry" mapstructure:"retry"` } -func (c config) For() string { +func (c Config) For() string { return CheckName } type Target struct { - Addr string + Addr string `json:"addr" yaml:"addr" mapstructure:"addr"` } func NewCheck() checks.Check { return &UDPTraceroute{ - config: config{}, + config: Config{}, traceroute: newTraceroute, CheckBase: checks.CheckBase{ - Mu: sync.Mutex{}, - CResult: make(chan checks.Result), - Done: make(chan bool), + Mu: sync.Mutex{}, + Done: make(chan bool), }, } } type UDPTraceroute struct { checks.CheckBase - config config + config Config traceroute tracerouteFactory } type tracerouteFactory func(dest string) (traceroute.TracerouteResult, error) func newTraceroute(dest string) (traceroute.TracerouteResult, error) { - return traceroute.Traceroute(dest, nil) + return traceroute.Traceroute(dest, &traceroute.TracerouteOptions{}) } type Result struct { @@ -79,7 +79,7 @@ func (c *UDPTraceroute) Run(ctx context.Context) error { ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) - log.Info("Starting latency check", "interval", c.config.Interval.String()) + log.Info("Starting traceroute check", "interval", c.config.Interval.String()) for { select { @@ -98,7 +98,7 @@ func (c *UDPTraceroute) Run(ctx context.Context) error { } c.CResult <- r - log.Debug("Successfully finished latency check run") + log.Debug("Successfully finished traceroute check run") } } } @@ -106,7 +106,7 @@ func (c *UDPTraceroute) Run(ctx context.Context) error { func (c *UDPTraceroute) GetConfig() checks.Runtime { c.Mu.Lock() defer c.Mu.Unlock() - return c.config + return &c.config } func (c *UDPTraceroute) check(_ context.Context) (map[string]Result, error) { @@ -171,23 +171,21 @@ func (c *UDPTraceroute) Shutdown(_ context.Context) error { // This is also called while the check is running, if the remote config is updated // This should return an error if the config is invalid func (c *UDPTraceroute) SetConfig(cfg checks.Runtime) error { - newConfg, ok := cfg.(*config) - if ok { - c.Mu.Lock() - defer c.Mu.Unlock() - c.config = *newConfg + newConfig, ok := cfg.(*Config) + if !ok { return checks.ErrConfigMismatch{ Expected: CheckName, Current: cfg.For(), } } - if err := validateConfig(*newConfg); err != nil { - return checks.ErrConfigMismatch{ - Expected: CheckName, - Current: cfg.For(), - } + if err := validateConfig(*newConfig); err != nil { + return err } + c.Mu.Lock() + defer c.Mu.Unlock() + c.config = *newConfig + return nil } @@ -205,6 +203,17 @@ func (c *UDPTraceroute) Name() string { return CheckName } -func validateConfig(_ config) error { +func validateConfig(cfg Config) error { + if cfg.Timeout <= 0 { + return fmt.Errorf("timeout must be greater than 0") + } + if cfg.Interval <= 0 { + return fmt.Errorf("interval must be greater than 0") + } + for _, t := range cfg.Targets { + if _, err := url.Parse(t.Addr); err != nil { + return fmt.Errorf("%s is not a valid url", t.Addr) + } + } return nil } diff --git a/pkg/checks/udptraceroute/traceroute_test.go b/pkg/checks/udptraceroute/traceroute_test.go index 61ef8882..6cbb355d 100644 --- a/pkg/checks/udptraceroute/traceroute_test.go +++ b/pkg/checks/udptraceroute/traceroute_test.go @@ -62,20 +62,15 @@ func TestCheck(t *testing.T) { if c.want.wantErr { if err == nil { t.Errorf("expected error, got nil") - } else { - if c.want.err.Error() != err.Error() { - t.Errorf("expected: %v, got: %v", c.want.err, err) - } + } else if c.want.err.Error() != err.Error() { + t.Errorf("expected: %v, got: %v", c.want.err, err) } } if !cmp.Equal(res, c.want.expected) { diff := cmp.Diff(res, c.want.expected) t.Errorf("unexpected result: -want +got\n%s", diff) - } - } - } func newForTest(f tracerouteFactory, targets []string) *UDPTraceroute { @@ -84,7 +79,7 @@ func newForTest(f tracerouteFactory, targets []string) *UDPTraceroute { t[i] = Target{Addr: target} } return &UDPTraceroute{ - config: config{ + config: Config{ Targets: t, }, traceroute: f, @@ -94,7 +89,6 @@ func newForTest(f tracerouteFactory, targets []string) *UDPTraceroute { Done: make(chan bool), }, } - } // success produces a tracerouteFactory that returns a traceroute result with nHops hops @@ -124,7 +118,6 @@ func success(nHops int) tracerouteFactory { DestinationAddress: hops[nHops-1].Address, Hops: hops, }, nil - } } From 821a1008549d88ef9c49998f310e06ef6bb84fa0 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 21 Feb 2024 14:26:06 +0100 Subject: [PATCH 04/18] fix: unit tests breaking --- .vscode/launch.json | 9 ++ pkg/checks/runtime/runtime.go | 20 +-- .../traceroute.go | 130 ++++++++++++------ .../traceroute_test.go | 47 +++---- pkg/factory/factory.go | 10 +- scripts/debug-elevated.sh | 13 ++ 6 files changed, 150 insertions(+), 79 deletions(-) rename pkg/checks/{udptraceroute => traceroute}/traceroute.go (54%) rename pkg/checks/{udptraceroute => traceroute}/traceroute_test.go (70%) create mode 100644 scripts/debug-elevated.sh diff --git a/.vscode/launch.json b/.vscode/launch.json index 751c7cb4..680f7f55 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,6 +1,15 @@ { "version": "0.2.0", "configurations": [ + { + "name": "Connect to server", + "type": "go", + "request": "attach", + "mode": "remote", + "remotePath": "${workspaceFolder}", + "port": 2345, + "host": "127.0.0.1" + }, { "name": "Launch Package", "type": "go", diff --git a/pkg/checks/runtime/runtime.go b/pkg/checks/runtime/runtime.go index 31537771..65af045e 100644 --- a/pkg/checks/runtime/runtime.go +++ b/pkg/checks/runtime/runtime.go @@ -23,17 +23,17 @@ import ( "github.com/caas-team/sparrow/pkg/checks/dns" "github.com/caas-team/sparrow/pkg/checks/health" "github.com/caas-team/sparrow/pkg/checks/latency" - "github.com/caas-team/sparrow/pkg/checks/udptraceroute" + "github.com/caas-team/sparrow/pkg/checks/traceroute" ) // Config holds the runtime configuration // for the various checks // the sparrow supports type Config struct { - Health *health.Config `yaml:"health" json:"health"` - Latency *latency.Config `yaml:"latency" json:"latency"` - Dns *dns.Config `yaml:"dns" json:"dns"` - Traceroute *udptraceroute.Config `yaml:"udptraceroute" json:"udptraceroute"` + Health *health.Config `yaml:"health" json:"health"` + Latency *latency.Config `yaml:"latency" json:"latency"` + Dns *dns.Config `yaml:"dns" json:"dns"` + Traceroute *traceroute.Config `yaml:"traceroute" json:"traceroute"` } // Empty returns true if no checks are configured @@ -71,7 +71,7 @@ func (c Config) size() int { if c.HasDNSCheck() { size++ } - if c.HasUdpTracerouteCheck() { + if c.HasTracerouteCheck() { size++ } return size @@ -92,8 +92,8 @@ func (c Config) HasDNSCheck() bool { return c.Dns != nil } -// HasUdpTracerouteCheck returns true if the check has a udptraceroute check configured -func (c Config) HasUdpTracerouteCheck() bool { +// HasTracerouteCheck returns true if the check has a traceroute check configured +func (c Config) HasTracerouteCheck() bool { return c.Traceroute != nil } @@ -106,8 +106,8 @@ func (c Config) HasCheck(name string) bool { return c.HasLatencyCheck() case dns.CheckName: return c.HasDNSCheck() - case udptraceroute.CheckName: - return c.HasUdpTracerouteCheck() + case traceroute.CheckName: + return c.HasTracerouteCheck() default: return false } diff --git a/pkg/checks/udptraceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go similarity index 54% rename from pkg/checks/udptraceroute/traceroute.go rename to pkg/checks/traceroute/traceroute.go index 3eec685e..e2c6c98f 100644 --- a/pkg/checks/udptraceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -1,15 +1,13 @@ -package udptraceroute +package traceroute import ( "context" "fmt" - "net" "net/url" "sync" "time" "github.com/aeden/traceroute" - "github.com/caas-team/sparrow/internal/helper" "github.com/caas-team/sparrow/internal/logger" "github.com/caas-team/sparrow/pkg/checks" "github.com/getkin/kin-openapi/openapi3" @@ -17,15 +15,16 @@ import ( ) var ( - _ checks.Check = (*UDPTraceroute)(nil) - CheckName = "udptraceroute" + _ checks.Check = (*Traceroute)(nil) + CheckName = "traceroute" ) type Config struct { - Targets []Target `json:"targets" yaml:"targets" mapstructure:"targets"` - Interval time.Duration `json:"interval" yaml:"interval" mapstructure:"interval"` - Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"` - Retry helper.RetryConfig `json:"retry" yaml:"retry" mapstructure:"retry"` + Targets []Target `json:"targets" yaml:"targets" mapstructure:"targets"` + Retries int `json:"retries" yaml:"retries" mapstructure:"retries"` + MaxHops int `json:"maxHops" yaml:"maxHops" mapstructure:"maxHops"` + Interval time.Duration `json:"interval" yaml:"interval" mapstructure:"interval"` + Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"` } func (c Config) For() string { @@ -34,10 +33,11 @@ func (c Config) For() string { type Target struct { Addr string `json:"addr" yaml:"addr" mapstructure:"addr"` + Port uint16 `json:"port" yaml:"port" mapstructure:"port"` } func NewCheck() checks.Check { - return &UDPTraceroute{ + return &Traceroute{ config: Config{}, traceroute: newTraceroute, CheckBase: checks.CheckBase{ @@ -47,16 +47,21 @@ func NewCheck() checks.Check { } } -type UDPTraceroute struct { +type Traceroute struct { checks.CheckBase config Config traceroute tracerouteFactory } -type tracerouteFactory func(dest string) (traceroute.TracerouteResult, error) +type tracerouteFactory func(dest string, port, timeout, retries, maxHops int) (traceroute.TracerouteResult, error) -func newTraceroute(dest string) (traceroute.TracerouteResult, error) { - return traceroute.Traceroute(dest, &traceroute.TracerouteOptions{}) +func newTraceroute(dest string, port, timeout, retries, maxHops int) (traceroute.TracerouteResult, error) { + opts := &traceroute.TracerouteOptions{} + opts.SetTimeoutMs(timeout) + opts.SetRetries(retries) + opts.SetMaxHops(maxHops) + opts.SetPort(port) + return traceroute.Traceroute(dest, opts) } type Result struct { @@ -67,7 +72,7 @@ type Result struct { } type Hop struct { - Addr net.IP + Addr string Latency time.Duration Success bool } @@ -75,7 +80,7 @@ type Hop struct { // Run is called once per check interval // this should error if there is a problem running the check // Returns an error and a result. Returning a non nil error will cause a shutdown of the system -func (c *UDPTraceroute) Run(ctx context.Context) error { +func (c *Traceroute) Run(ctx context.Context) error { ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) @@ -103,36 +108,79 @@ func (c *UDPTraceroute) Run(ctx context.Context) error { } } -func (c *UDPTraceroute) GetConfig() checks.Runtime { +func (c *Traceroute) GetConfig() checks.Runtime { c.Mu.Lock() defer c.Mu.Unlock() return &c.config } -func (c *UDPTraceroute) check(_ context.Context) (map[string]Result, error) { +func (c *Traceroute) check(ctx context.Context) (map[string]Result, error) { res := make(map[string]Result) + log := logger.FromContext(ctx) + + type internalResult struct { + addr string + res Result + } + var err error + var wg sync.WaitGroup + cErr := make(chan error, len(c.config.Targets)) + cResult := make(chan internalResult, len(c.config.Targets)) + for _, t := range c.config.Targets { - tr, trerr := c.traceroute(t.Addr) - if trerr != nil { - err = trerr - continue - } + wg.Add(1) + go func(t Target) { + defer wg.Done() + log.Debug("Running traceroute", "target", t.Addr) + start := time.Now() + tr, trerr := c.traceroute(t.Addr, int(t.Port), int(c.config.Timeout/time.Millisecond), c.config.Retries, c.config.MaxHops) + duration := time.Since(start) + if trerr != nil { + err = trerr + log.Error("Error running traceroute", "err", trerr, "target", t.Addr) + } - result := Result{ - NumHops: len(tr.Hops), - Hops: []Hop{}, - } + log.Debug("Ran traceroute", "result", tr, "duration", duration) - for _, h := range tr.Hops { - result.Hops = append(result.Hops, Hop{ - Addr: net.IPv4(h.Address[0], h.Address[1], h.Address[2], h.Address[3]), - Latency: h.ElapsedTime, - Success: h.Success, - }) - } - res[t.Addr] = result + result := Result{ + NumHops: len(tr.Hops), + Hops: []Hop{}, + } + + for _, h := range tr.Hops { + result.Hops = append(result.Hops, Hop{ + Addr: h.Host, + Latency: h.ElapsedTime, + Success: h.Success, + }) + } + cResult <- internalResult{addr: t.Addr, res: result} + }(t) + } + + log.Debug("Waiting for traceroute checks to finish") + + go func() { + wg.Wait() + close(cResult) + close(cErr) + }() + + log.Debug("All traceroute checks finished") + + for r := range cResult { + res[r.addr] = r.res } + + log.Debug("Getting errors from traceroute checks") + + for e := range cErr { + err = MultiError{append([]error{}, err, e)} + } + + log.Debug("Finished traceroute checks", "result", res, "err", err) + return res, err } @@ -155,7 +203,7 @@ func (m MultiError) Error() string { // Startup is called once when the check is registered // In the Run() method, the check should send results to the cResult channel // this will cause sparrow to update its data store with the results -func (c *UDPTraceroute) Startup(_ context.Context, cResult chan<- checks.Result) error { +func (c *Traceroute) Startup(_ context.Context, cResult chan<- checks.Result) error { c.Mu.Lock() defer c.Mu.Unlock() c.CheckBase.CResult = cResult @@ -163,14 +211,14 @@ func (c *UDPTraceroute) Startup(_ context.Context, cResult chan<- checks.Result) } // Shutdown is called once when the check is unregistered or sparrow shuts down -func (c *UDPTraceroute) Shutdown(_ context.Context) error { +func (c *Traceroute) Shutdown(_ context.Context) error { return nil } // SetConfig is called once when the check is registered // This is also called while the check is running, if the remote config is updated // This should return an error if the config is invalid -func (c *UDPTraceroute) SetConfig(cfg checks.Runtime) error { +func (c *Traceroute) SetConfig(cfg checks.Runtime) error { newConfig, ok := cfg.(*Config) if !ok { return checks.ErrConfigMismatch{ @@ -190,16 +238,16 @@ func (c *UDPTraceroute) SetConfig(cfg checks.Runtime) error { } // Schema returns an openapi3.SchemaRef of the result type returned by the check -func (c *UDPTraceroute) Schema() (*openapi3.SchemaRef, error) { +func (c *Traceroute) Schema() (*openapi3.SchemaRef, error) { return checks.OpenapiFromPerfData[Result](Result{}) } // GetMetricCollectors allows the check to provide prometheus metric collectors -func (c *UDPTraceroute) GetMetricCollectors() []prometheus.Collector { +func (c *Traceroute) GetMetricCollectors() []prometheus.Collector { return []prometheus.Collector{} } -func (c *UDPTraceroute) Name() string { +func (c *Traceroute) Name() string { return CheckName } diff --git a/pkg/checks/udptraceroute/traceroute_test.go b/pkg/checks/traceroute/traceroute_test.go similarity index 70% rename from pkg/checks/udptraceroute/traceroute_test.go rename to pkg/checks/traceroute/traceroute_test.go index 6cbb355d..41a78437 100644 --- a/pkg/checks/udptraceroute/traceroute_test.go +++ b/pkg/checks/traceroute/traceroute_test.go @@ -1,4 +1,4 @@ -package udptraceroute +package traceroute import ( "context" @@ -21,7 +21,7 @@ func TestCheck(t *testing.T) { } type testcase struct { name string - c *UDPTraceroute + c *Traceroute want want } @@ -34,11 +34,11 @@ func TestCheck(t *testing.T) { "8.8.8.8": { NumHops: 5, Hops: []Hop{ - {Addr: net.IPv4(0, 0, 0, 0), Latency: 0 * time.Second, Success: false}, - {Addr: net.IPv4(0, 0, 0, 1), Latency: 1 * time.Second, Success: false}, - {Addr: net.IPv4(0, 0, 0, 2), Latency: 2 * time.Second, Success: false}, - {Addr: net.IPv4(0, 0, 0, 3), Latency: 3 * time.Second, Success: false}, - {Addr: net.IPv4(8, 8, 8, 8), Latency: 69 * time.Second, Success: true}, + {Addr: "0.0.0.0", Latency: 0 * time.Second, Success: false}, + {Addr: "0.0.0.1", Latency: 1 * time.Second, Success: false}, + {Addr: "0.0.0.2", Latency: 2 * time.Second, Success: false}, + {Addr: "0.0.0.3", Latency: 3 * time.Second, Success: false}, + {Addr: "google-public-dns-a.google.com", Latency: 69 * time.Second, Success: true}, }, }, }, @@ -49,9 +49,11 @@ func TestCheck(t *testing.T) { name: "Traceroute internal error", c: newForTest(returnError(&net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}), []string{"google.com"}), want: want{ - wantErr: true, - expected: map[string]Result{}, - err: &net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}, + wantErr: true, + expected: map[string]Result{ + "google.com": {Hops: []Hop{}}, + }, + err: &net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}, }, }, } @@ -68,17 +70,17 @@ func TestCheck(t *testing.T) { } if !cmp.Equal(res, c.want.expected) { diff := cmp.Diff(res, c.want.expected) - t.Errorf("unexpected result: -want +got\n%s", diff) + t.Errorf("unexpected result: +want -got\n%s", diff) } } } -func newForTest(f tracerouteFactory, targets []string) *UDPTraceroute { +func newForTest(f tracerouteFactory, targets []string) *Traceroute { t := make([]Target, len(targets)) for i, target := range targets { t[i] = Target{Addr: target} } - return &UDPTraceroute{ + return &Traceroute{ config: Config{ Targets: t, }, @@ -93,14 +95,13 @@ func newForTest(f tracerouteFactory, targets []string) *UDPTraceroute { // success produces a tracerouteFactory that returns a traceroute result with nHops hops func success(nHops int) tracerouteFactory { - return func(dest string) (traceroute.TracerouteResult, error) { + return func(dest string, port, timeout, retries, maxHops int) (traceroute.TracerouteResult, error) { hops := make([]traceroute.TracerouteHop, nHops) for i := 0; i < nHops-1; i++ { hops[i] = traceroute.TracerouteHop{ Success: false, - Address: ipFromInt(i), N: nHops, - Host: "", + Host: ipFromInt(i), ElapsedTime: time.Duration(i) * time.Second, TTL: i, } @@ -122,7 +123,7 @@ func success(nHops int) tracerouteFactory { } func returnError(err error) tracerouteFactory { - return func(dest string) (traceroute.TracerouteResult, error) { + return func(dest string, port, timeout, retries, maxHops int) (traceroute.TracerouteResult, error) { return traceroute.TracerouteResult{}, err } } @@ -130,24 +131,24 @@ func returnError(err error) tracerouteFactory { // ipFromInt takes in an int and builds an IP address from it // Example: // ipFromInt(300) -> 0.0.1.44 -func ipFromInt(i int) [4]byte { +func ipFromInt(i int) string { b1 := i >> 24 & 0xFF b2 := i >> 16 & 0xFF b3 := i >> 8 & 0xFF b4 := i & 0xFF - return [4]byte{byte(b1), byte(b2), byte(b3), byte(b4)} + return net.IPv4(byte(b1), byte(b2), byte(b3), byte(b4)).String() } func TestIpFromInt(t *testing.T) { type testcase struct { In int - Expected [4]byte + Expected string } cases := []testcase{ - {In: 300, Expected: [4]byte{0, 0, 1, 44}}, - {In: 0, Expected: [4]byte{0, 0, 0, 0}}, - {In: (1 << 33) - 1, Expected: [4]byte{255, 255, 255, 255}}, + {In: 300, Expected: "0.0.1.44"}, + {In: 0, Expected: "0.0.0.0"}, + {In: (1 << 33) - 1, Expected: "255.255.255.255"}, } for _, c := range cases { diff --git a/pkg/factory/factory.go b/pkg/factory/factory.go index 87f9ecf1..434a17f5 100644 --- a/pkg/factory/factory.go +++ b/pkg/factory/factory.go @@ -26,7 +26,7 @@ import ( "github.com/caas-team/sparrow/pkg/checks/health" "github.com/caas-team/sparrow/pkg/checks/latency" "github.com/caas-team/sparrow/pkg/checks/runtime" - "github.com/caas-team/sparrow/pkg/checks/udptraceroute" + "github.com/caas-team/sparrow/pkg/checks/traceroute" ) // newCheck creates a new check instance from the given name @@ -58,8 +58,8 @@ func NewChecksFromConfig(cfg runtime.Config) (map[string]checks.Check, error) { // registry is a convenience map to create new checks var registry = map[string]func() checks.Check{ - health.CheckName: health.NewCheck, - latency.CheckName: latency.NewCheck, - dns.CheckName: dns.NewCheck, - udptraceroute.CheckName: udptraceroute.NewCheck, + health.CheckName: health.NewCheck, + latency.CheckName: latency.NewCheck, + dns.CheckName: dns.NewCheck, + traceroute.CheckName: traceroute.NewCheck, } diff --git a/scripts/debug-elevated.sh b/scripts/debug-elevated.sh new file mode 100644 index 00000000..e3af8168 --- /dev/null +++ b/scripts/debug-elevated.sh @@ -0,0 +1,13 @@ +# Description: Debug the application with elevated privileges +# This is only necessary when debugging issues with the traceroute check, +# as it requires elevated privileges +# to createa a raw socket +# +# Usage: +# 1. Create a config for debugging in .tmp/config.yaml and a .tmp/runtime.yaml +# +# 2. Run the following command from the root of the project +# ./scripts/debug-elevated.sh +# +# 3. Attach to the debugger with launch.json in vscode +go build -gcflags '-N -l' -o .tmp/app ./ && sudo dlv exec .tmp/app -- run --config .tmp/config.yaml From 93135c5d8aabc3d10e5864b89217a8a06513f109 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 21 Feb 2024 14:57:02 +0100 Subject: [PATCH 05/18] Merge from main --- pkg/checks/traceroute/traceroute.go | 102 ++++++++--------------- pkg/checks/traceroute/traceroute_test.go | 21 +---- 2 files changed, 37 insertions(+), 86 deletions(-) diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index e2c6c98f..a675d6eb 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -41,8 +41,8 @@ func NewCheck() checks.Check { config: Config{}, traceroute: newTraceroute, CheckBase: checks.CheckBase{ - Mu: sync.Mutex{}, - Done: make(chan bool), + Mu: sync.Mutex{}, + DoneChan: make(chan struct{}), }, } } @@ -77,33 +77,30 @@ type Hop struct { Success bool } -// Run is called once per check interval -// this should error if there is a problem running the check -// Returns an error and a result. Returning a non nil error will cause a shutdown of the system -func (c *Traceroute) Run(ctx context.Context) error { +func (d *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) error { ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) - log.Info("Starting traceroute check", "interval", c.config.Interval.String()) + log.Info("Starting dns check", "interval", d.config.Interval.String()) for { select { case <-ctx.Done(): log.Error("Context canceled", "err", ctx.Err()) return ctx.Err() - case <-c.Done: + case <-d.DoneChan: return nil - case <-time.After(c.config.Interval): - res, _ := c.check(ctx) - errval := "" - r := checks.Result{ - Data: res, - Err: errval, - Timestamp: time.Now(), + case <-time.After(d.config.Interval): + res := d.check(ctx) + + cResult <- checks.ResultDTO{ + Name: d.Name(), + Result: &checks.Result{ + Data: res, + Timestamp: time.Now(), + }, } - - c.CResult <- r - log.Debug("Successfully finished traceroute check run") + log.Debug("Successfully finished dns check run") } } } @@ -114,7 +111,7 @@ func (c *Traceroute) GetConfig() checks.Runtime { return &c.config } -func (c *Traceroute) check(ctx context.Context) (map[string]Result, error) { +func (c *Traceroute) check(ctx context.Context) map[string]Result { res := make(map[string]Result) log := logger.FromContext(ctx) @@ -123,9 +120,7 @@ func (c *Traceroute) check(ctx context.Context) (map[string]Result, error) { res Result } - var err error var wg sync.WaitGroup - cErr := make(chan error, len(c.config.Targets)) cResult := make(chan internalResult, len(c.config.Targets)) for _, t := range c.config.Targets { @@ -137,7 +132,6 @@ func (c *Traceroute) check(ctx context.Context) (map[string]Result, error) { tr, trerr := c.traceroute(t.Addr, int(t.Port), int(c.config.Timeout/time.Millisecond), c.config.Retries, c.config.MaxHops) duration := time.Since(start) if trerr != nil { - err = trerr log.Error("Error running traceroute", "err", trerr, "target", t.Addr) } @@ -164,7 +158,6 @@ func (c *Traceroute) check(ctx context.Context) (map[string]Result, error) { go func() { wg.Wait() close(cResult) - close(cErr) }() log.Debug("All traceroute checks finished") @@ -175,43 +168,15 @@ func (c *Traceroute) check(ctx context.Context) (map[string]Result, error) { log.Debug("Getting errors from traceroute checks") - for e := range cErr { - err = MultiError{append([]error{}, err, e)} - } - - log.Debug("Finished traceroute checks", "result", res, "err", err) - - return res, err -} - -type MultiError struct { - errors []error -} + log.Debug("Finished traceroute checks", "result", res) -func (m MultiError) Error() string { - result := "[" - if len(m.errors) > 0 { - result = m.errors[0].Error() - } - for _, err := range m.errors { - result = fmt.Sprintf("%s, %s", result, err.Error()) - } - result += "]" - return result -} - -// Startup is called once when the check is registered -// In the Run() method, the check should send results to the cResult channel -// this will cause sparrow to update its data store with the results -func (c *Traceroute) Startup(_ context.Context, cResult chan<- checks.Result) error { - c.Mu.Lock() - defer c.Mu.Unlock() - c.CheckBase.CResult = cResult - return nil + return res } // Shutdown is called once when the check is unregistered or sparrow shuts down func (c *Traceroute) Shutdown(_ context.Context) error { + c.DoneChan <- struct{}{} + close(c.DoneChan) return nil } @@ -219,22 +184,17 @@ func (c *Traceroute) Shutdown(_ context.Context) error { // This is also called while the check is running, if the remote config is updated // This should return an error if the config is invalid func (c *Traceroute) SetConfig(cfg checks.Runtime) error { - newConfig, ok := cfg.(*Config) - if !ok { - return checks.ErrConfigMismatch{ - Expected: CheckName, - Current: cfg.For(), - } - } - if err := validateConfig(*newConfig); err != nil { - return err + if cfg, ok := cfg.(*Config); ok { + c.Mu.Lock() + defer c.Mu.Unlock() + c.config = *cfg + return nil } - c.Mu.Lock() - defer c.Mu.Unlock() - c.config = *newConfig - - return nil + return checks.ErrConfigMismatch{ + Expected: CheckName, + Current: cfg.For(), + } } // Schema returns an openapi3.SchemaRef of the result type returned by the check @@ -251,6 +211,10 @@ func (c *Traceroute) Name() string { return CheckName } +func (c *Config) Validate() error { + return validateConfig(*c) +} + func validateConfig(cfg Config) error { if cfg.Timeout <= 0 { return fmt.Errorf("timeout must be greater than 0") diff --git a/pkg/checks/traceroute/traceroute_test.go b/pkg/checks/traceroute/traceroute_test.go index 41a78437..c87a0934 100644 --- a/pkg/checks/traceroute/traceroute_test.go +++ b/pkg/checks/traceroute/traceroute_test.go @@ -16,8 +16,6 @@ import ( func TestCheck(t *testing.T) { type want struct { expected map[string]Result - wantErr bool - err error } type testcase struct { name string @@ -42,32 +40,22 @@ func TestCheck(t *testing.T) { }, }, }, - wantErr: false, }, }, { - name: "Traceroute internal error", + name: "Traceroute internal error fails silently", c: newForTest(returnError(&net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}), []string{"google.com"}), want: want{ - wantErr: true, expected: map[string]Result{ "google.com": {Hops: []Hop{}}, }, - err: &net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}, }, }, } for _, c := range cases { - res, err := c.c.check(context.Background()) + res := c.c.check(context.Background()) - if c.want.wantErr { - if err == nil { - t.Errorf("expected error, got nil") - } else if c.want.err.Error() != err.Error() { - t.Errorf("expected: %v, got: %v", c.want.err, err) - } - } if !cmp.Equal(res, c.want.expected) { diff := cmp.Diff(res, c.want.expected) t.Errorf("unexpected result: +want -got\n%s", diff) @@ -86,9 +74,8 @@ func newForTest(f tracerouteFactory, targets []string) *Traceroute { }, traceroute: f, CheckBase: checks.CheckBase{ - Mu: sync.Mutex{}, - CResult: make(chan checks.Result), - Done: make(chan bool), + Mu: sync.Mutex{}, + DoneChan: make(chan struct{}), }, } } From 2f83b130a8eeaae22c6f02a47dade4dc9b5b5df2 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 21 Feb 2024 14:59:18 +0100 Subject: [PATCH 06/18] refactor: validation logic --- pkg/checks/traceroute/traceroute.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index a675d6eb..8ce80eac 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -212,17 +212,13 @@ func (c *Traceroute) Name() string { } func (c *Config) Validate() error { - return validateConfig(*c) -} - -func validateConfig(cfg Config) error { - if cfg.Timeout <= 0 { + if c.Timeout <= 0 { return fmt.Errorf("timeout must be greater than 0") } - if cfg.Interval <= 0 { + if c.Interval <= 0 { return fmt.Errorf("interval must be greater than 0") } - for _, t := range cfg.Targets { + for _, t := range c.Targets { if _, err := url.Parse(t.Addr); err != nil { return fmt.Errorf("%s is not a valid url", t.Addr) } From ce328bbacdc1c5903af135a9b4a808dae393ab4d Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Mon, 26 Feb 2024 14:27:10 +0100 Subject: [PATCH 07/18] review: implement changes requested by @lvlcn-t --- pkg/checks/runtime/config.go | 4 +++ pkg/checks/traceroute/traceroute.go | 40 ++++++++++++------------ pkg/checks/traceroute/traceroute_test.go | 6 ++-- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/pkg/checks/runtime/config.go b/pkg/checks/runtime/config.go index e6356cb2..d922441f 100644 --- a/pkg/checks/runtime/config.go +++ b/pkg/checks/runtime/config.go @@ -140,6 +140,10 @@ func (c Config) For(name string) checks.Runtime { if c.HasDNSCheck() { return c.Dns } + case traceroute.CheckName: + if c.HasTracerouteCheck() { + return c.Traceroute + } } return nil } diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index 8ce80eac..2943b386 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -3,6 +3,7 @@ package traceroute import ( "context" "fmt" + "net" "net/url" "sync" "time" @@ -14,10 +15,9 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -var ( - _ checks.Check = (*Traceroute)(nil) - CheckName = "traceroute" -) +var _ checks.Check = (*Traceroute)(nil) + +const CheckName = "traceroute" type Config struct { Targets []Target `json:"targets" yaml:"targets" mapstructure:"targets"` @@ -64,7 +64,7 @@ func newTraceroute(dest string, port, timeout, retries, maxHops int) (traceroute return traceroute.Traceroute(dest, opts) } -type Result struct { +type result struct { // The minimum amount of hops required to reach the target NumHops int // The path taken to the destination @@ -81,7 +81,7 @@ func (d *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) err ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) - log.Info("Starting dns check", "interval", d.config.Interval.String()) + log.Info("Starting traceroute check", "interval", d.config.Interval.String()) for { select { @@ -100,7 +100,7 @@ func (d *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) err Timestamp: time.Now(), }, } - log.Debug("Successfully finished dns check run") + log.Debug("Successfully finished traceroute check run") } } } @@ -111,13 +111,13 @@ func (c *Traceroute) GetConfig() checks.Runtime { return &c.config } -func (c *Traceroute) check(ctx context.Context) map[string]Result { - res := make(map[string]Result) +func (c *Traceroute) check(ctx context.Context) map[string]result { + res := make(map[string]result) log := logger.FromContext(ctx) type internalResult struct { addr string - res Result + res result } var wg sync.WaitGroup @@ -137,19 +137,19 @@ func (c *Traceroute) check(ctx context.Context) map[string]Result { log.Debug("Ran traceroute", "result", tr, "duration", duration) - result := Result{ + r := result{ NumHops: len(tr.Hops), Hops: []Hop{}, } for _, h := range tr.Hops { - result.Hops = append(result.Hops, Hop{ + r.Hops = append(r.Hops, Hop{ Addr: h.Host, Latency: h.ElapsedTime, Success: h.Success, }) } - cResult <- internalResult{addr: t.Addr, res: result} + cResult <- internalResult{addr: t.Addr, res: r} }(t) } @@ -166,8 +166,6 @@ func (c *Traceroute) check(ctx context.Context) map[string]Result { res[r.addr] = r.res } - log.Debug("Getting errors from traceroute checks") - log.Debug("Finished traceroute checks", "result", res) return res @@ -199,7 +197,7 @@ func (c *Traceroute) SetConfig(cfg checks.Runtime) error { // Schema returns an openapi3.SchemaRef of the result type returned by the check func (c *Traceroute) Schema() (*openapi3.SchemaRef, error) { - return checks.OpenapiFromPerfData[Result](Result{}) + return checks.OpenapiFromPerfData[map[string]result](map[string]result{}) } // GetMetricCollectors allows the check to provide prometheus metric collectors @@ -213,14 +211,16 @@ func (c *Traceroute) Name() string { func (c *Config) Validate() error { if c.Timeout <= 0 { - return fmt.Errorf("timeout must be greater than 0") + return checks.ErrInvalidConfig{CheckName: CheckName, Field: "traceroute.timeout", Reason: "must be greater than 0"} } if c.Interval <= 0 { - return fmt.Errorf("interval must be greater than 0") + return checks.ErrInvalidConfig{CheckName: CheckName, Field: "traceroute.interval", Reason: "must be greater than 0"} } - for _, t := range c.Targets { + + for i, t := range c.Targets { + net.ParseIP(t.Addr) if _, err := url.Parse(t.Addr); err != nil { - return fmt.Errorf("%s is not a valid url", t.Addr) + return checks.ErrInvalidConfig{CheckName: CheckName, Field: fmt.Sprintf("traceroute.targets[%d].addr", i), Reason: "invalid url"} } } return nil diff --git a/pkg/checks/traceroute/traceroute_test.go b/pkg/checks/traceroute/traceroute_test.go index c87a0934..e9a8108b 100644 --- a/pkg/checks/traceroute/traceroute_test.go +++ b/pkg/checks/traceroute/traceroute_test.go @@ -15,7 +15,7 @@ import ( func TestCheck(t *testing.T) { type want struct { - expected map[string]Result + expected map[string]result } type testcase struct { name string @@ -28,7 +28,7 @@ func TestCheck(t *testing.T) { name: "Success 5 hops", c: newForTest(success(5), []string{"8.8.8.8"}), want: want{ - expected: map[string]Result{ + expected: map[string]result{ "8.8.8.8": { NumHops: 5, Hops: []Hop{ @@ -46,7 +46,7 @@ func TestCheck(t *testing.T) { name: "Traceroute internal error fails silently", c: newForTest(returnError(&net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}), []string{"google.com"}), want: want{ - expected: map[string]Result{ + expected: map[string]result{ "google.com": {Hops: []Hop{}}, }, }, From 662a526a0d209750fbd3432e232788aed3192934 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Mon, 26 Feb 2024 15:20:59 +0100 Subject: [PATCH 08/18] review: implement changes requested by @puffitos --- pkg/checks/traceroute/config.go | 39 ++++++++++ pkg/checks/traceroute/traceroute.go | 94 +++++++++--------------- pkg/checks/traceroute/traceroute_test.go | 4 +- 3 files changed, 74 insertions(+), 63 deletions(-) create mode 100644 pkg/checks/traceroute/config.go diff --git a/pkg/checks/traceroute/config.go b/pkg/checks/traceroute/config.go new file mode 100644 index 00000000..5da6ebfb --- /dev/null +++ b/pkg/checks/traceroute/config.go @@ -0,0 +1,39 @@ +package traceroute + +import ( + "fmt" + "net" + "net/url" + "time" + + "github.com/caas-team/sparrow/pkg/checks" +) + +type Config struct { + Targets []Target `json:"targets" yaml:"targets" mapstructure:"targets"` + Retries int `json:"retries" yaml:"retries" mapstructure:"retries"` + MaxHops int `json:"maxHops" yaml:"maxHops" mapstructure:"maxHops"` + Interval time.Duration `json:"interval" yaml:"interval" mapstructure:"interval"` + Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"` +} + +func (c *Config) For() string { + return CheckName +} + +func (c *Config) Validate() error { + if c.Timeout <= 0 { + return checks.ErrInvalidConfig{CheckName: CheckName, Field: "traceroute.timeout", Reason: "must be greater than 0"} + } + if c.Interval <= 0 { + return checks.ErrInvalidConfig{CheckName: CheckName, Field: "traceroute.interval", Reason: "must be greater than 0"} + } + + for i, t := range c.Targets { + net.ParseIP(t.Addr) + if _, err := url.Parse(t.Addr); err != nil { + return checks.ErrInvalidConfig{CheckName: CheckName, Field: fmt.Sprintf("traceroute.targets[%d].addr", i), Reason: "invalid url"} + } + } + return nil +} diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index 2943b386..6eef3d63 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -2,9 +2,6 @@ package traceroute import ( "context" - "fmt" - "net" - "net/url" "sync" "time" @@ -19,23 +16,12 @@ var _ checks.Check = (*Traceroute)(nil) const CheckName = "traceroute" -type Config struct { - Targets []Target `json:"targets" yaml:"targets" mapstructure:"targets"` - Retries int `json:"retries" yaml:"retries" mapstructure:"retries"` - MaxHops int `json:"maxHops" yaml:"maxHops" mapstructure:"maxHops"` - Interval time.Duration `json:"interval" yaml:"interval" mapstructure:"interval"` - Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"` -} - -func (c Config) For() string { - return CheckName -} - type Target struct { Addr string `json:"addr" yaml:"addr" mapstructure:"addr"` Port uint16 `json:"port" yaml:"port" mapstructure:"port"` } +// Config is the configuration for the traceroute check func NewCheck() checks.Check { return &Traceroute{ config: Config{}, @@ -65,36 +51,37 @@ func newTraceroute(dest string, port, timeout, retries, maxHops int) (traceroute } type result struct { - // The minimum amount of hops required to reach the target + // The minimum number of hops required to reach the target NumHops int // The path taken to the destination - Hops []Hop + Hops []hop } -type Hop struct { +type hop struct { Addr string Latency time.Duration Success bool } -func (d *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) error { +// Run runs the check in a loop sending results to the provided channel +func (tr *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) error { ctx, cancel := logger.NewContextWithLogger(ctx) defer cancel() log := logger.FromContext(ctx) - log.Info("Starting traceroute check", "interval", d.config.Interval.String()) + log.Info("Starting traceroute check", "interval", tr.config.Interval.String()) for { select { case <-ctx.Done(): log.Error("Context canceled", "err", ctx.Err()) return ctx.Err() - case <-d.DoneChan: + case <-tr.DoneChan: return nil - case <-time.After(d.config.Interval): - res := d.check(ctx) + case <-time.After(tr.config.Interval): + res := tr.check(ctx) cResult <- checks.ResultDTO{ - Name: d.Name(), + Name: tr.Name(), Result: &checks.Result{ Data: res, Timestamp: time.Now(), @@ -105,13 +92,14 @@ func (d *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) err } } -func (c *Traceroute) GetConfig() checks.Runtime { - c.Mu.Lock() - defer c.Mu.Unlock() - return &c.config +// GetConfig returns the current configuration of the check +func (tr *Traceroute) GetConfig() checks.Runtime { + tr.Mu.Lock() + defer tr.Mu.Unlock() + return &tr.config } -func (c *Traceroute) check(ctx context.Context) map[string]result { +func (tr *Traceroute) check(ctx context.Context) map[string]result { res := make(map[string]result) log := logger.FromContext(ctx) @@ -121,15 +109,15 @@ func (c *Traceroute) check(ctx context.Context) map[string]result { } var wg sync.WaitGroup - cResult := make(chan internalResult, len(c.config.Targets)) + cResult := make(chan internalResult, len(tr.config.Targets)) - for _, t := range c.config.Targets { + for _, t := range tr.config.Targets { wg.Add(1) go func(t Target) { defer wg.Done() log.Debug("Running traceroute", "target", t.Addr) start := time.Now() - tr, trerr := c.traceroute(t.Addr, int(t.Port), int(c.config.Timeout/time.Millisecond), c.config.Retries, c.config.MaxHops) + tr, trerr := tr.traceroute(t.Addr, int(t.Port), int(tr.config.Timeout/time.Millisecond), tr.config.Retries, tr.config.MaxHops) duration := time.Since(start) if trerr != nil { log.Error("Error running traceroute", "err", trerr, "target", t.Addr) @@ -139,11 +127,11 @@ func (c *Traceroute) check(ctx context.Context) map[string]result { r := result{ NumHops: len(tr.Hops), - Hops: []Hop{}, + Hops: []hop{}, } for _, h := range tr.Hops { - r.Hops = append(r.Hops, Hop{ + r.Hops = append(r.Hops, hop{ Addr: h.Host, Latency: h.ElapsedTime, Success: h.Success, @@ -172,20 +160,20 @@ func (c *Traceroute) check(ctx context.Context) map[string]result { } // Shutdown is called once when the check is unregistered or sparrow shuts down -func (c *Traceroute) Shutdown(_ context.Context) error { - c.DoneChan <- struct{}{} - close(c.DoneChan) +func (tr *Traceroute) Shutdown(_ context.Context) error { + tr.DoneChan <- struct{}{} + close(tr.DoneChan) return nil } // SetConfig is called once when the check is registered // This is also called while the check is running, if the remote config is updated // This should return an error if the config is invalid -func (c *Traceroute) SetConfig(cfg checks.Runtime) error { +func (tr *Traceroute) SetConfig(cfg checks.Runtime) error { if cfg, ok := cfg.(*Config); ok { - c.Mu.Lock() - defer c.Mu.Unlock() - c.config = *cfg + tr.Mu.Lock() + defer tr.Mu.Unlock() + tr.config = *cfg return nil } @@ -196,32 +184,16 @@ func (c *Traceroute) SetConfig(cfg checks.Runtime) error { } // Schema returns an openapi3.SchemaRef of the result type returned by the check -func (c *Traceroute) Schema() (*openapi3.SchemaRef, error) { +func (tr *Traceroute) Schema() (*openapi3.SchemaRef, error) { return checks.OpenapiFromPerfData[map[string]result](map[string]result{}) } // GetMetricCollectors allows the check to provide prometheus metric collectors -func (c *Traceroute) GetMetricCollectors() []prometheus.Collector { +func (tr *Traceroute) GetMetricCollectors() []prometheus.Collector { return []prometheus.Collector{} } -func (c *Traceroute) Name() string { +// Name returns the name of the check +func (tr *Traceroute) Name() string { return CheckName } - -func (c *Config) Validate() error { - if c.Timeout <= 0 { - return checks.ErrInvalidConfig{CheckName: CheckName, Field: "traceroute.timeout", Reason: "must be greater than 0"} - } - if c.Interval <= 0 { - return checks.ErrInvalidConfig{CheckName: CheckName, Field: "traceroute.interval", Reason: "must be greater than 0"} - } - - for i, t := range c.Targets { - net.ParseIP(t.Addr) - if _, err := url.Parse(t.Addr); err != nil { - return checks.ErrInvalidConfig{CheckName: CheckName, Field: fmt.Sprintf("traceroute.targets[%d].addr", i), Reason: "invalid url"} - } - } - return nil -} diff --git a/pkg/checks/traceroute/traceroute_test.go b/pkg/checks/traceroute/traceroute_test.go index e9a8108b..6ce091a3 100644 --- a/pkg/checks/traceroute/traceroute_test.go +++ b/pkg/checks/traceroute/traceroute_test.go @@ -31,7 +31,7 @@ func TestCheck(t *testing.T) { expected: map[string]result{ "8.8.8.8": { NumHops: 5, - Hops: []Hop{ + Hops: []hop{ {Addr: "0.0.0.0", Latency: 0 * time.Second, Success: false}, {Addr: "0.0.0.1", Latency: 1 * time.Second, Success: false}, {Addr: "0.0.0.2", Latency: 2 * time.Second, Success: false}, @@ -47,7 +47,7 @@ func TestCheck(t *testing.T) { c: newForTest(returnError(&net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}), []string{"google.com"}), want: want{ expected: map[string]result{ - "google.com": {Hops: []Hop{}}, + "google.com": {Hops: []hop{}}, }, }, }, From 7baa59da77f6e7794276132daa92c89d5fdbf47b Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 28 Feb 2024 13:56:13 +0100 Subject: [PATCH 09/18] chore: add target to logger --- pkg/checks/traceroute/traceroute.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index 6eef3d63..780a431a 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -114,16 +114,17 @@ func (tr *Traceroute) check(ctx context.Context) map[string]result { for _, t := range tr.config.Targets { wg.Add(1) go func(t Target) { + l := log.With("target", t.Addr) defer wg.Done() - log.Debug("Running traceroute", "target", t.Addr) + l.Debug("Running traceroute") start := time.Now() tr, trerr := tr.traceroute(t.Addr, int(t.Port), int(tr.config.Timeout/time.Millisecond), tr.config.Retries, tr.config.MaxHops) duration := time.Since(start) if trerr != nil { - log.Error("Error running traceroute", "err", trerr, "target", t.Addr) + l.Error("Error running traceroute", "err", trerr) } - log.Debug("Ran traceroute", "result", tr, "duration", duration) + l.Debug("Ran traceroute", "result", tr, "duration", duration) r := result{ NumHops: len(tr.Hops), From ad3804bb55c5fe0ba5cfa9662ab263a3a816bf30 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 28 Feb 2024 14:02:35 +0100 Subject: [PATCH 10/18] chore: remove testcase type Signed-off-by: Niklas Treml --- pkg/checks/traceroute/traceroute_test.go | 46 ++++++++++-------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/pkg/checks/traceroute/traceroute_test.go b/pkg/checks/traceroute/traceroute_test.go index 6ce091a3..608e1c78 100644 --- a/pkg/checks/traceroute/traceroute_test.go +++ b/pkg/checks/traceroute/traceroute_test.go @@ -14,30 +14,23 @@ import ( ) func TestCheck(t *testing.T) { - type want struct { - expected map[string]result - } - type testcase struct { + cases := []struct { name string c *Traceroute - want want - } - - cases := []testcase{ + want map[string]result + }{ { name: "Success 5 hops", c: newForTest(success(5), []string{"8.8.8.8"}), - want: want{ - expected: map[string]result{ - "8.8.8.8": { - NumHops: 5, - Hops: []hop{ - {Addr: "0.0.0.0", Latency: 0 * time.Second, Success: false}, - {Addr: "0.0.0.1", Latency: 1 * time.Second, Success: false}, - {Addr: "0.0.0.2", Latency: 2 * time.Second, Success: false}, - {Addr: "0.0.0.3", Latency: 3 * time.Second, Success: false}, - {Addr: "google-public-dns-a.google.com", Latency: 69 * time.Second, Success: true}, - }, + want: map[string]result{ + "8.8.8.8": { + NumHops: 5, + Hops: []hop{ + {Addr: "0.0.0.0", Latency: 0 * time.Second, Success: false}, + {Addr: "0.0.0.1", Latency: 1 * time.Second, Success: false}, + {Addr: "0.0.0.2", Latency: 2 * time.Second, Success: false}, + {Addr: "0.0.0.3", Latency: 3 * time.Second, Success: false}, + {Addr: "google-public-dns-a.google.com", Latency: 69 * time.Second, Success: true}, }, }, }, @@ -45,10 +38,8 @@ func TestCheck(t *testing.T) { { name: "Traceroute internal error fails silently", c: newForTest(returnError(&net.DNSError{Err: "no such host", Name: "google.com", IsNotFound: true}), []string{"google.com"}), - want: want{ - expected: map[string]result{ - "google.com": {Hops: []hop{}}, - }, + want: map[string]result{ + "google.com": {Hops: []hop{}}, }, }, } @@ -56,8 +47,8 @@ func TestCheck(t *testing.T) { for _, c := range cases { res := c.c.check(context.Background()) - if !cmp.Equal(res, c.want.expected) { - diff := cmp.Diff(res, c.want.expected) + if !cmp.Equal(res, c.want) { + diff := cmp.Diff(res, c.want) t.Errorf("unexpected result: +want -got\n%s", diff) } } @@ -128,11 +119,10 @@ func ipFromInt(i int) string { } func TestIpFromInt(t *testing.T) { - type testcase struct { + cases := []struct { In int Expected string - } - cases := []testcase{ + }{ {In: 300, Expected: "0.0.1.44"}, {In: 0, Expected: "0.0.0.0"}, {In: (1 << 33) - 1, Expected: "255.255.255.255"}, From 48d28b795df049647ab443d7e2a335c4422afacd Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 28 Feb 2024 14:06:18 +0100 Subject: [PATCH 11/18] chore: rename shadowed variable Signed-off-by: Niklas Treml --- pkg/checks/traceroute/traceroute.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index 780a431a..f7552e01 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -118,20 +118,20 @@ func (tr *Traceroute) check(ctx context.Context) map[string]result { defer wg.Done() l.Debug("Running traceroute") start := time.Now() - tr, trerr := tr.traceroute(t.Addr, int(t.Port), int(tr.config.Timeout/time.Millisecond), tr.config.Retries, tr.config.MaxHops) + trace, err := tr.traceroute(t.Addr, int(t.Port), int(tr.config.Timeout/time.Millisecond), tr.config.Retries, tr.config.MaxHops) duration := time.Since(start) - if trerr != nil { - l.Error("Error running traceroute", "err", trerr) + if err != nil { + l.Error("Error running traceroute", "err", err) } - l.Debug("Ran traceroute", "result", tr, "duration", duration) + l.Debug("Ran traceroute", "result", trace, "duration", duration) r := result{ - NumHops: len(tr.Hops), + NumHops: len(trace.Hops), Hops: []hop{}, } - for _, h := range tr.Hops { + for _, h := range trace.Hops { r.Hops = append(r.Hops, hop{ Addr: h.Host, Latency: h.ElapsedTime, From b6228b04ed4e81f609477723e74f5a993c5eec5c Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 28 Feb 2024 14:13:02 +0100 Subject: [PATCH 12/18] fix: validate ip correctly Signed-off-by: Niklas Treml --- pkg/checks/traceroute/config.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pkg/checks/traceroute/config.go b/pkg/checks/traceroute/config.go index 5da6ebfb..92e84f0c 100644 --- a/pkg/checks/traceroute/config.go +++ b/pkg/checks/traceroute/config.go @@ -30,9 +30,15 @@ func (c *Config) Validate() error { } for i, t := range c.Targets { - net.ParseIP(t.Addr) - if _, err := url.Parse(t.Addr); err != nil { - return checks.ErrInvalidConfig{CheckName: CheckName, Field: fmt.Sprintf("traceroute.targets[%d].addr", i), Reason: "invalid url"} + ip := net.ParseIP(t.Addr) + + if ip != nil { + continue + } + + _, err := url.Parse(t.Addr) + if err != nil && ip == nil { + return checks.ErrInvalidConfig{CheckName: CheckName, Field: fmt.Sprintf("traceroute.targets[%d].addr", i), Reason: "invalid url or ip"} } } return nil From 87fa5c48ce72ec38d66ce399b2fab4fadc3e3ce9 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 28 Feb 2024 14:18:47 +0100 Subject: [PATCH 13/18] docs: add docs for config Signed-off-by: Niklas Treml --- pkg/checks/traceroute/config.go | 14 ++++++++++---- pkg/checks/traceroute/traceroute.go | 1 - 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pkg/checks/traceroute/config.go b/pkg/checks/traceroute/config.go index 92e84f0c..f489e10d 100644 --- a/pkg/checks/traceroute/config.go +++ b/pkg/checks/traceroute/config.go @@ -9,12 +9,18 @@ import ( "github.com/caas-team/sparrow/pkg/checks" ) +// Config is the configuration for the traceroute check type Config struct { - Targets []Target `json:"targets" yaml:"targets" mapstructure:"targets"` - Retries int `json:"retries" yaml:"retries" mapstructure:"retries"` - MaxHops int `json:"maxHops" yaml:"maxHops" mapstructure:"maxHops"` + // Targets is a list of targets to traceroute to + Targets []Target `json:"targets" yaml:"targets" mapstructure:"targets"` + // Retries is the number of times to retry the traceroute for a target, if it fails + Retries int `json:"retries" yaml:"retries" mapstructure:"retries"` + // MaxHops is the maximum number of hops to try before giving up + MaxHops int `json:"maxHops" yaml:"maxHops" mapstructure:"maxHops"` + // Interval is the time to wait between check iterations Interval time.Duration `json:"interval" yaml:"interval" mapstructure:"interval"` - Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"` + // Timeout is the maximum time to wait for a response from a hop + Timeout time.Duration `json:"timeout" yaml:"timeout" mapstructure:"timeout"` } func (c *Config) For() string { diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index f7552e01..f93dfe2e 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -21,7 +21,6 @@ type Target struct { Port uint16 `json:"port" yaml:"port" mapstructure:"port"` } -// Config is the configuration for the traceroute check func NewCheck() checks.Check { return &Traceroute{ config: Config{}, From ea8d65f93cd6253654e934041d5f1291679013ca Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Wed, 28 Feb 2024 14:23:38 +0100 Subject: [PATCH 14/18] chore: log error instead of err Signed-off-by: Niklas Treml --- pkg/checks/traceroute/traceroute.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index f93dfe2e..519e850d 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -72,7 +72,7 @@ func (tr *Traceroute) Run(ctx context.Context, cResult chan checks.ResultDTO) er for { select { case <-ctx.Done(): - log.Error("Context canceled", "err", ctx.Err()) + log.Error("Context canceled", "error", ctx.Err()) return ctx.Err() case <-tr.DoneChan: return nil @@ -120,7 +120,7 @@ func (tr *Traceroute) check(ctx context.Context) map[string]result { trace, err := tr.traceroute(t.Addr, int(t.Port), int(tr.config.Timeout/time.Millisecond), tr.config.Retries, tr.config.MaxHops) duration := time.Since(start) if err != nil { - l.Error("Error running traceroute", "err", err) + l.Error("Error running traceroute", "error", err) } l.Debug("Ran traceroute", "result", trace, "duration", duration) From 709f75f6489433ab55e452bfc86e54e4c169bfe8 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Fri, 1 Mar 2024 09:17:29 +0100 Subject: [PATCH 15/18] docs: update readme to include traceroute Signed-off-by: Niklas Treml --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index de79ed44..cb6160fe 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,9 @@ The following checks are available: 3. [DNS check](#check-dns) - `dns`: The `sparrow` is able to perform DNS resolution checks to monitor domain name system performance and reliability. The check has the ability to target specific domains or IPs for monitoring. +4. [Traceroute Check](#check-traceroute) - `traceroute`: The `sparrow` is able to perform traceroute checks to monitor + the network path to a target. The check has the ability to target specific domains or IPs for monitoring. + Each check is designed to provide comprehensive insights into the various aspects of network and service health, ensuring robust monitoring and quick detection of potential issues. @@ -435,6 +438,33 @@ dns: - Description: Histogram of response times for DNS checks - Labelled with `target` +### Check: Traceroute + +| Field | Type | Description | +| ----------------- | ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `interval` | `duration` | Interval to perform the Traceroute check. | +| `timeout` | `duration` | Timeout for every hop. | +| `retries` | `integer` | Number of times to retry the traceroute for a target, if it fails. | +| `maxHops` | `integer` | Maximum number of hops to try before giving up. | +| `targets` | `list of objects` | List of targets to traceroute to. | +| `targets[].addr` | `string` | The address of the target to traceroute to. Can be an IP address or DNS name | +| `targets[].port` | `uint16` | The port of the target to traceroute to. Default is 80 | + +#### Example configuration + +```yaml + traceroute: + interval: 5s + timeout: 3s + retries: 3 + maxHops: 8 + targets: + - addr: 8.8.8.8 + port: 53 + - addr: www.google.com + port: 80 +``` + ## API The `sparrow` exposes an API for accessing the results of various checks. Each check registers its own endpoint From 2fb07cf30de0db9e23c17e1a39e8aff42131e351 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Fri, 1 Mar 2024 16:46:20 +0100 Subject: [PATCH 16/18] docs: capabilities Signed-off-by: Niklas Treml --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index cb6160fe..23f495e5 100644 --- a/README.md +++ b/README.md @@ -465,6 +465,14 @@ dns: port: 80 ``` +#### Required Capabilities +To use this check, sparrow needs to be run with the `CAP_NET_RAW` capability or elevated privileges to be able to send raw packets. +Using the `CAP_NET_RAW` capability is recommended over running sparrow as sudo + +```bash +sudo setcap 'cap_net_raw=ep' sparrow +``` + ## API The `sparrow` exposes an API for accessing the results of various checks. Each check registers its own endpoint From fa2570c20b7ca8ab04a29d61d17d219612ddf071 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Fri, 1 Mar 2024 16:48:22 +0100 Subject: [PATCH 17/18] docs: document target --- pkg/checks/traceroute/traceroute.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index 519e850d..97f2790d 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -17,7 +17,9 @@ var _ checks.Check = (*Traceroute)(nil) const CheckName = "traceroute" type Target struct { + // The address of the target to traceroute to. Can be a DNS name or an IP address Addr string `json:"addr" yaml:"addr" mapstructure:"addr"` + // The port to traceroute to Port uint16 `json:"port" yaml:"port" mapstructure:"port"` } From b5a7e441377d3b27a93aaae6995c15f5994fc7b6 Mon Sep 17 00:00:00 2001 From: Niklas Treml Date: Fri, 1 Mar 2024 16:53:04 +0100 Subject: [PATCH 18/18] chore: remove duplicate debug log Signed-off-by: Niklas Treml --- pkg/checks/traceroute/traceroute.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/pkg/checks/traceroute/traceroute.go b/pkg/checks/traceroute/traceroute.go index 97f2790d..f2543e7c 100644 --- a/pkg/checks/traceroute/traceroute.go +++ b/pkg/checks/traceroute/traceroute.go @@ -156,8 +156,6 @@ func (tr *Traceroute) check(ctx context.Context) map[string]result { res[r.addr] = r.res } - log.Debug("Finished traceroute checks", "result", res) - return res }