From 3c6811dda06cd226880a25c74a601b023f073b35 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Tue, 9 Jan 2024 11:10:27 -0500 Subject: [PATCH] fix(spans): Handle strings with escaped characters (#2926) --- CHANGELOG.md | 3 +- relay-server/src/actors/store.rs | 3 +- tests/integration/test_store.py | 100 ++++++++++++++++++++++++++----- 3 files changed, 88 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e55dbe094..470a860036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,7 @@ - Add `duration` metric for mobile app start spans. ([#2906](https://github.com/getsentry/relay/pull/2906)) - Introduce the configuration option `http.global_metrics`. When enabled, Relay submits metric buckets not through regular project-scoped Envelopes, but instead through the global endpoint. When this Relay serves a high number of projects, this can reduce the overall request volume. ([#2902](https://github.com/getsentry/relay/pull/2902)) - Record the size of global metrics requests in statsd as `upstream.metrics.body_size`. ([#2908](https://github.com/getsentry/relay/pull/2908)) -- Make Kafka spans compatible with the Snuba span schema. ([#2917](https://github.com/getsentry/relay/pull/2917)) -- Only extract span metrics / tags when they are needed. ([#2907](https://github.com/getsentry/relay/pull/2907)) +- Make Kafka spans compatible with the Snuba span schema. ([#2917](https://github.com/getsentry/relay/pull/2917), [#2926](https://github.com/getsentry/relay/pull/2926)) - Only extract span metrics / tags when they are needed. ([#2907](https://github.com/getsentry/relay/pull/2907), [#2923](https://github.com/getsentry/relay/pull/2923)) - Normalize metric resource identifiers in `event._metrics_summary` and `span._metrics_summary`. ([#2914](https://github.com/getsentry/relay/pull/2914)) diff --git a/relay-server/src/actors/store.rs b/relay-server/src/actors/store.rs index 2ead7320ee..384ae7e72c 100644 --- a/relay-server/src/actors/store.rs +++ b/relay-server/src/actors/store.rs @@ -1138,7 +1138,8 @@ struct SpanKafkaMessage<'a> { #[serde(rename(deserialize = "timestamp"), skip_serializing)] end_timestamp: f64, - description: &'a str, + #[serde(default, skip_serializing_if = "Option::is_none")] + description: Option<&'a RawValue>, #[serde(default)] duration_ms: u32, /// The ID of the transaction event associated to this span, if any. diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index dd806306a5..c49a563ef4 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -1449,7 +1449,7 @@ def test_span_ingestion( bytes=json.dumps( { "traceId": "89143b0763095bd9c9955e8175d1fb23", - "spanId": "e342abb1214ca181", + "spanId": "a342abb1214ca181", "name": "my 1st OTel span", "startTimeUnixNano": int(start.timestamp() * 1e9), "endTimeUnixNano": int(end.timestamp() * 1e9), @@ -1491,6 +1491,43 @@ def test_span_ingestion( ), ) ) + envelope.add_item( + Item( + type="span", + payload=PayloadRef( + bytes=json.dumps( + { + "description": r"test \" with \" escaped \" chars", + "op": "default", + "span_id": "cd429c44b67a3eb1", + "segment_id": "968cff94913ebb07", + "start_timestamp": start.timestamp(), + "timestamp": end.timestamp() + 1, + "exclusive_time": 345.0, # The SDK knows that this span has a lower exclusive time + "trace_id": "ff62a8b040f340bda5d830223def1d81", + }, + ).encode() + ), + ) + ) + envelope.add_item( + Item( + type="span", + payload=PayloadRef( + bytes=json.dumps( + { + "op": "default", + "span_id": "ed429c44b67a3eb1", + "segment_id": "968cff94913ebb07", + "start_timestamp": start.timestamp(), + "timestamp": end.timestamp() + 1, + "exclusive_time": 345.0, # The SDK knows that this span has a lower exclusive time + "trace_id": "ff62a8b040f340bda5d830223def1d81", + }, + ).encode() + ), + ) + ) relay.send_envelope(project_id, envelope) # 2 - Send OTel span via endpoint @@ -1504,7 +1541,7 @@ def test_span_ingestion( "spans": [ { "traceId": "89143b0763095bd9c9955e8175d1fb24", - "spanId": "e342abb1214ca182", + "spanId": "d342abb1214ca182", "name": "my 2nd OTel span", "startTimeUnixNano": int(start.timestamp() * 1e9), "endTimeUnixNano": int(end.timestamp() * 1e9), @@ -1531,11 +1568,23 @@ def test_span_ingestion( for span in spans: span.pop("received", None) - spans.sort( - key=lambda msg: msg.get("description", "") - ) # endpoint might overtake envelope + spans.sort(key=lambda msg: msg["span_id"]) # endpoint might overtake envelope assert spans == [ + { + "description": "my 1st OTel span", + "duration_ms": 500, + "exclusive_time_ms": 500.0, + "is_segment": True, + "parent_span_id": "", + "project_id": 42, + "retention_days": 90, + "segment_id": "a342abb1214ca181", + "sentry_tags": {"category": "db", "op": "db.query"}, + "span_id": "a342abb1214ca181", + "start_timestamp_ms": int(start.timestamp() * 1e3), + "trace_id": "89143b0763095bd9c9955e8175d1fb23", + }, { "description": "https://example.com/p/blah.js", "duration_ms": 1500, @@ -1557,18 +1606,17 @@ def test_span_ingestion( "trace_id": "ff62a8b040f340bda5d830223def1d81", }, { - "description": "my 1st OTel span", - "duration_ms": 500, - "exclusive_time_ms": 500.0, + "description": r"test \" with \" escaped \" chars", + "duration_ms": 1500, + "exclusive_time_ms": 345.0, "is_segment": True, - "parent_span_id": "", "project_id": 42, "retention_days": 90, - "segment_id": "e342abb1214ca181", - "sentry_tags": {"category": "db", "op": "db.query"}, - "span_id": "e342abb1214ca181", + "segment_id": "cd429c44b67a3eb1", + "sentry_tags": {"op": "default"}, + "span_id": "cd429c44b67a3eb1", "start_timestamp_ms": int(start.timestamp() * 1e3), - "trace_id": "89143b0763095bd9c9955e8175d1fb23", + "trace_id": "ff62a8b040f340bda5d830223def1d81", }, { "description": "my 2nd OTel span", @@ -1578,12 +1626,24 @@ def test_span_ingestion( "parent_span_id": "", "project_id": 42, "retention_days": 90, - "segment_id": "e342abb1214ca182", + "segment_id": "d342abb1214ca182", "sentry_tags": {"op": "default"}, - "span_id": "e342abb1214ca182", + "span_id": "d342abb1214ca182", "start_timestamp_ms": int(start.timestamp() * 1e3), "trace_id": "89143b0763095bd9c9955e8175d1fb24", }, + { + "duration_ms": 1500, + "exclusive_time_ms": 345.0, + "is_segment": True, + "project_id": 42, + "retention_days": 90, + "segment_id": "ed429c44b67a3eb1", + "sentry_tags": {"op": "default"}, + "span_id": "ed429c44b67a3eb1", + "start_timestamp_ms": int(start.timestamp() * 1e3), + "trace_id": "ff62a8b040f340bda5d830223def1d81", + }, ] metrics = [metric for (metric, _headers) in metrics_consumer.get_metrics()] @@ -1627,6 +1687,16 @@ def test_span_ingestion( "tags": {"span.op": "default"}, "retention_days": 90, }, + { + "org_id": 1, + "project_id": 42, + "name": "c:spans/count_per_op@none", + "type": "c", + "value": 2.0, + "timestamp": expected_timestamp + 1, + "tags": {"span.op": "default"}, + "retention_days": 90, + }, { "org_id": 1, "project_id": 42,