From 94950735bddecfa507fe183761445b37d2aab94d Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Fri, 24 May 2024 18:43:11 -0400 Subject: [PATCH] feat: configurable probabilistic tracing of gateway requests --- docs/environment-variables.md | 16 ++++++--- go.mod | 18 +++++++++- go.sum | 2 -- main.go | 8 ++++- tracing.go | 65 ++++++++++++++++++++++++----------- 5 files changed, 79 insertions(+), 30 deletions(-) diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 39fc50f..1d7a2fe 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -299,11 +299,12 @@ specifications. Configuration environment variables are specified in the [OpenTelemetry Environment Variable Specification] where possible. The [Boxo Tracing] documentation is the basis for tracing here. -Two major distinctions from the more general tracing enabled in boxo are: -1. When tracing is enabled it is restricted to flows through HTTP Gateway requests, -rather than also included background processes -2. Requests are only traced when there is a `Traceparent` header passed that is valid -According to the [Trace Context] specification +A major distinctions from the more general tracing enabled in boxo is that when +tracing is enabled it is restricted to flows through HTTP Gateway requests, rather +than also included background processes. + +Note: requests are also traced when there is a `Traceparent` header passed that is valid +According to the [Trace Context] specification, even if the sampling fraction is set to 0. ### `RAINBOW_TRACING_AUTH` @@ -311,6 +312,11 @@ The ability to pass `Traceparent` or `Tracestate` headers is guarded by an `Authorization` header. The value of the `Authorization` header should match the value in the `RAINBOW_TRACING_AUTH` environment variable. +### `RAINBOW_SAMPLING_FRACTION` + +The fraction (between 0 and 1) of requests that should be sampled. +This is calculated independently of any Traceparent based sampling. + [Boxo Tracing]: https://github.com/ipfs/boxo/blob/main/docs/tracing.md [Open Telemetry]: https://opentelemetry.io/ [OpenTelemetry Environment Variable Specification]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/sdk-environment-variables.md diff --git a/go.mod b/go.mod index 48087d7..03731e3 100644 --- a/go.mod +++ b/go.mod @@ -71,7 +71,7 @@ require ( github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect - github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect + github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/gddo v0.0.0-20210115222349-20d68f94ee1f // indirect @@ -146,6 +146,22 @@ require ( github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/openzipkin/zipkin-go v0.4.3 // indirect github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect + github.com/pion/datachannel v1.5.6 // indirect + github.com/pion/dtls/v2 v2.2.11 // indirect + github.com/pion/ice/v2 v2.3.24 // indirect + github.com/pion/interceptor v0.1.29 // indirect + github.com/pion/logging v0.2.2 // indirect + github.com/pion/mdns v0.0.12 // indirect + github.com/pion/randutil v0.1.0 // indirect + github.com/pion/rtcp v1.2.14 // indirect + github.com/pion/rtp v1.8.6 // indirect + github.com/pion/sctp v1.8.16 // indirect + github.com/pion/sdp/v3 v3.0.9 // indirect + github.com/pion/srtp/v2 v2.0.18 // indirect + github.com/pion/stun v0.6.1 // indirect + github.com/pion/transport/v2 v2.2.5 // indirect + github.com/pion/turn/v2 v2.1.6 // indirect + github.com/pion/webrtc/v3 v3.2.40 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/polydawn/refmt v0.89.0 // indirect diff --git a/go.sum b/go.sum index 97b87ce..a059dc1 100644 --- a/go.sum +++ b/go.sum @@ -127,7 +127,6 @@ github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiU github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= github.com/go-stack/stack v1.6.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= @@ -668,7 +667,6 @@ github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= -github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= diff --git a/main.go b/main.go index 003cbb8..84f3d2e 100644 --- a/main.go +++ b/main.go @@ -303,6 +303,12 @@ Generate an identity seed and launch a gateway: EnvVars: []string{"RAINBOW_TRACING_AUTH"}, Usage: "If set the key gates use of the Traceparent header by requiring the key to be passed in the Authorization header", }, + &cli.Float64Flag{ + Name: "sampling-fraction", + Value: 0, + EnvVars: []string{"RAINBOW_SAMPLING_FRACTION"}, + Usage: "Rate at which to sample gateway requests. Does not include traceheaders which will always sample", + }, } app.Commands = []*cli.Command{ @@ -487,7 +493,7 @@ share the same seed as long as the indexes are different. registerVersionMetric(version) registerIpfsNodeCollector(gnd) - tp, shutdown, err := newTracerProvider(cctx.Context) + tp, shutdown, err := newTracerProvider(cctx.Context, cctx.Float64("sampling-fraction")) if err != nil { return err } diff --git a/tracing.go b/tracing.go index 14499cf..106e1f6 100644 --- a/tracing.go +++ b/tracing.go @@ -2,7 +2,6 @@ package main import ( "context" - "fmt" "github.com/ipfs/boxo/tracing" "go.opentelemetry.io/otel/sdk/resource" "go.opentelemetry.io/otel/sdk/trace" @@ -12,7 +11,7 @@ import ( "strings" ) -func newTracerProvider(ctx context.Context) (traceapi.TracerProvider, func(context.Context) error, error) { +func newTracerProvider(ctx context.Context, traceFraction float64) (traceapi.TracerProvider, func(context.Context) error, error) { exporters, err := tracing.NewSpanExporters(ctx) if err != nil { return nil, nil, err @@ -39,33 +38,57 @@ func newTracerProvider(ctx context.Context) (traceapi.TracerProvider, func(conte return nil, nil, err } - options = append(options, trace.WithResource(r), trace.WithSampler(RootPrefixSampler{RootPrefix: "Gateway", Next: trace.ParentBased(trace.NeverSample())})) + var baseSampler trace.Sampler + if traceFraction == 0 { + baseSampler = trace.NeverSample() + } else { + baseSampler = trace.TraceIDRatioBased(traceFraction) + } + + // Sample all children whose parents are sampled + // Probabilistically sample if the span is a root which is a Gateway request + var sampler trace.Sampler + sampler = trace.ParentBased( + CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool { + return !traceapi.SpanContextFromContext(parameters.ParentContext).IsValid() + }, "root sampler", + CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool { + return strings.HasPrefix(parameters.Name, "Gateway") + }, "gateway request sampler", + baseSampler))) + options = append(options, trace.WithResource(r), trace.WithSampler(sampler)) tp := trace.NewTracerProvider(options...) return tp, tp.Shutdown, nil } -type RootPrefixSampler struct { - Next trace.Sampler - RootPrefix string +type funcSampler struct { + next trace.Sampler + fn func(trace.SamplingParameters) trace.SamplingResult + description string } -var _ trace.Sampler = (*RootPrefixSampler)(nil) - -func (s RootPrefixSampler) ShouldSample(parameters trace.SamplingParameters) trace.SamplingResult { - parentSpan := traceapi.SpanContextFromContext(parameters.ParentContext) - if !parentSpan.IsValid() && strings.HasPrefix(parameters.Name, s.RootPrefix) { - res := s.Next.ShouldSample(parameters) - return trace.SamplingResult{ - Decision: res.Decision, - Attributes: res.Attributes, - Tracestate: res.Tracestate, - } - } +func (f funcSampler) ShouldSample(parameters trace.SamplingParameters) trace.SamplingResult { + return f.fn(parameters) +} - return s.Next.ShouldSample(parameters) +func (f funcSampler) Description() string { + return f.description } -func (s RootPrefixSampler) Description() string { - return fmt.Sprintf("root prefix sampler: %s", s.RootPrefix) +// CascadingSamplerFunc will sample with the next tracer if the condition is met, otherwise the sample will be dropped +func CascadingSamplerFunc(shouldSample func(parameters trace.SamplingParameters) bool, description string, next trace.Sampler) trace.Sampler { + return funcSampler{ + next: next, + fn: func(parameters trace.SamplingParameters) trace.SamplingResult { + if shouldSample(parameters) { + return next.ShouldSample(parameters) + } + return trace.SamplingResult{ + Decision: trace.Drop, + Tracestate: traceapi.SpanContextFromContext(parameters.ParentContext).TraceState(), + } + }, + description: description, + } }