From dda5658a1cdbbf7af30f855fe535e3d2dc492fb3 Mon Sep 17 00:00:00 2001 From: David Ashpole Date: Fri, 10 Jan 2025 19:50:46 +0000 Subject: [PATCH] add OTEL_GO_X_SELF_OBSERVABILITY feature gate, and otel.sdk.batch_span_processor.queue_size metric --- CHANGELOG.md | 5 ++++ sdk/go.mod | 2 +- sdk/internal/x/README.md | 7 +++++ sdk/internal/x/x.go | 13 +++++++++ sdk/trace/batch_span_processor.go | 47 +++++++++++++++++++++++++++++-- sdk/trace/provider.go | 1 + 6 files changed, 71 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7094c5248cb..9853060747e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Added +- Add OTEL_GO_X_SELF_OBSERVABILITY environment variable to control whether self-observability metrics and traces are produced by SDKs. +- Add experimental otel.sdk.batch_span_processor.queue_size metric to the trace batch span processor. + +### Fixed + - Add `ValueFromAttribute` and `KeyValueFromAttribute` in `go.opentelemetry.io/otel/log`. (#6180) - Add `EventName` and `SetEventName` to `Record` in `go.opentelemetry.io/otel/log`. (#6187) - Add `EventName` to `RecordFactory` in `go.opentelemetry.io/otel/log/logtest`. (#6187) diff --git a/sdk/go.mod b/sdk/go.mod index e8ea4634198..e253a59bf88 100644 --- a/sdk/go.mod +++ b/sdk/go.mod @@ -10,6 +10,7 @@ require ( github.com/google/uuid v1.6.0 github.com/stretchr/testify v1.10.0 go.opentelemetry.io/otel v1.34.0 + go.opentelemetry.io/otel/metric v1.34.0 go.opentelemetry.io/otel/trace v1.34.0 golang.org/x/sys v0.29.0 ) @@ -19,7 +20,6 @@ require ( github.com/go-logr/stdr v1.2.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect - go.opentelemetry.io/otel/metric v1.34.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/sdk/internal/x/README.md b/sdk/internal/x/README.md index fab61647c2d..fcc06312d28 100644 --- a/sdk/internal/x/README.md +++ b/sdk/internal/x/README.md @@ -22,6 +22,13 @@ All other values are ignored. [OpenTelemetry resource semantic conventions]: https://opentelemetry.io/docs/specs/semconv/resource/ [resource detectors]: https://pkg.go.dev/go.opentelemetry.io/otel/sdk/resource#Detector +### SDK Self-Observability + +To enable experimental metric and trace instrumentation in SDKs, set the `OTEL_GO_X_SELF_OBSERVABILITY` environment variable. +If enabled, this instrumentation uses the global `TracerProvider` and `MeterProvider`. +The value set must be the case-insensitive string of `"true"` to enable the feature. +All other values are ignored. + #### Examples Enable experimental resource semantic conventions. diff --git a/sdk/internal/x/x.go b/sdk/internal/x/x.go index 68d296cbed3..dd37df244d1 100644 --- a/sdk/internal/x/x.go +++ b/sdk/internal/x/x.go @@ -25,6 +25,19 @@ var Resource = newFeature("RESOURCE", func(v string) (string, bool) { return "", false }) +// SelfObservability is an experimental feature flag that determines if SDK +// self-observability metrics are enabled. +// +// To enable this feature set the OTEL_GO_X_SELF_OBSERVABILITY environment variable +// to the case-insensitive string value of "true" (i.e. "True" and "TRUE" +// will also enable this). +var SelfObservability = newFeature("SELF_OBSERVABILITY", func(v string) (string, bool) { + if strings.ToLower(v) == "true" { + return v, true + } + return "", false +}) + // Feature is an experimental feature control flag. It provides a uniform way // to interact with these feature flags and parse their values. type Feature[T any] struct { diff --git a/sdk/trace/batch_span_processor.go b/sdk/trace/batch_span_processor.go index ccc97e1b662..69b60dbaecc 100644 --- a/sdk/trace/batch_span_processor.go +++ b/sdk/trace/batch_span_processor.go @@ -5,13 +5,19 @@ package trace // import "go.opentelemetry.io/otel/sdk/trace" import ( "context" + "errors" + "fmt" "sync" "sync/atomic" "time" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/internal/global" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/noop" "go.opentelemetry.io/otel/sdk/internal/env" + "go.opentelemetry.io/otel/sdk/internal/x" "go.opentelemetry.io/otel/trace" ) @@ -63,8 +69,9 @@ type batchSpanProcessor struct { e SpanExporter o BatchSpanProcessorOptions - queue chan ReadOnlySpan - dropped uint32 + queue chan ReadOnlySpan + dropped uint32 + callbackRegistration metric.Registration batch []ReadOnlySpan batchMutex sync.Mutex @@ -111,6 +118,8 @@ func NewBatchSpanProcessor(exporter SpanExporter, options ...BatchSpanProcessorO stopCh: make(chan struct{}), } + bsp.configureSelfObservability() + bsp.stopWait.Add(1) go func() { defer bsp.stopWait.Done() @@ -121,6 +130,38 @@ func NewBatchSpanProcessor(exporter SpanExporter, options ...BatchSpanProcessorO return bsp } +func (bsp *batchSpanProcessor) configureSelfObservability() { + mp := otel.GetMeterProvider() + if !x.SelfObservability.Enabled() { + mp = metric.MeterProvider(noop.NewMeterProvider()) + } + meter := mp.Meter( + selfObsScopeName, + metric.WithInstrumentationVersion(version()), + ) + + queueSizeCounter, err := meter.Int64ObservableUpDownCounter("otel.sdk.span.processor.queue_size", + metric.WithUnit("{span}"), + metric.WithDescription("The number of spans in the queue of a given instance of an SDK span processor."), + ) + if err != nil { + otel.Handle(err) + } + + attrsOpt := metric.WithAttributes( + attribute.String("otel.sdk.component.name", fmt.Sprintf("batching_span_processor/%p", bsp)), + ) + bsp.callbackRegistration, err = meter.RegisterCallback( + func(ctx context.Context, o metric.Observer) error { + o.ObserveInt64(queueSizeCounter, int64(len(bsp.queue)), attrsOpt) + return nil + }, + queueSizeCounter) + if err != nil { + otel.Handle(err) + } +} + // OnStart method does nothing. func (bsp *batchSpanProcessor) OnStart(parent context.Context, s ReadWriteSpan) {} @@ -162,7 +203,7 @@ func (bsp *batchSpanProcessor) Shutdown(ctx context.Context) error { err = ctx.Err() } }) - return err + return errors.Join(err, bsp.callbackRegistration.Unregister()) } type forceFlushSpan struct { diff --git a/sdk/trace/provider.go b/sdk/trace/provider.go index 185aa7c08f7..405856ce791 100644 --- a/sdk/trace/provider.go +++ b/sdk/trace/provider.go @@ -20,6 +20,7 @@ import ( const ( defaultTracerName = "go.opentelemetry.io/otel/sdk/tracer" + selfObsScopeName = "go.opentelemetry.io/otel/sdk/trace" ) // tracerProviderConfig.