From 3c6811dda06cd226880a25c74a601b023f073b35 Mon Sep 17 00:00:00 2001
From: Pierre Massat <pierre.massat@sentry.io>
Date: Tue, 9 Jan 2024 11:10:27 -0500
Subject: [PATCH] fix(spans): Handle strings with escaped characters (#2926)

---
 CHANGELOG.md                     |   3 +-
 relay-server/src/actors/store.rs |   3 +-
 tests/integration/test_store.py  | 100 ++++++++++++++++++++++++++-----
 3 files changed, 88 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e55dbe094..470a860036 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,8 +12,7 @@
 - Add `duration` metric for mobile app start spans. ([#2906](https://github.com/getsentry/relay/pull/2906))
 - Introduce the configuration option `http.global_metrics`. When enabled, Relay submits metric buckets not through regular project-scoped Envelopes, but instead through the global endpoint. When this Relay serves a high number of projects, this can reduce the overall request volume. ([#2902](https://github.com/getsentry/relay/pull/2902))
 - Record the size of global metrics requests in statsd as `upstream.metrics.body_size`. ([#2908](https://github.com/getsentry/relay/pull/2908))
-- Make Kafka spans compatible with the Snuba span schema. ([#2917](https://github.com/getsentry/relay/pull/2917))
-- Only extract span metrics / tags when they are needed. ([#2907](https://github.com/getsentry/relay/pull/2907))
+- Make Kafka spans compatible with the Snuba span schema. ([#2917](https://github.com/getsentry/relay/pull/2917), [#2926](https://github.com/getsentry/relay/pull/2926))
 - Only extract span metrics / tags when they are needed. ([#2907](https://github.com/getsentry/relay/pull/2907), [#2923](https://github.com/getsentry/relay/pull/2923))
 - Normalize metric resource identifiers in `event._metrics_summary` and `span._metrics_summary`. ([#2914](https://github.com/getsentry/relay/pull/2914))
 
diff --git a/relay-server/src/actors/store.rs b/relay-server/src/actors/store.rs
index 2ead7320ee..384ae7e72c 100644
--- a/relay-server/src/actors/store.rs
+++ b/relay-server/src/actors/store.rs
@@ -1138,7 +1138,8 @@ struct SpanKafkaMessage<'a> {
     #[serde(rename(deserialize = "timestamp"), skip_serializing)]
     end_timestamp: f64,
 
-    description: &'a str,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    description: Option<&'a RawValue>,
     #[serde(default)]
     duration_ms: u32,
     /// The ID of the transaction event associated to this span, if any.
diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py
index dd806306a5..c49a563ef4 100644
--- a/tests/integration/test_store.py
+++ b/tests/integration/test_store.py
@@ -1449,7 +1449,7 @@ def test_span_ingestion(
                 bytes=json.dumps(
                     {
                         "traceId": "89143b0763095bd9c9955e8175d1fb23",
-                        "spanId": "e342abb1214ca181",
+                        "spanId": "a342abb1214ca181",
                         "name": "my 1st OTel span",
                         "startTimeUnixNano": int(start.timestamp() * 1e9),
                         "endTimeUnixNano": int(end.timestamp() * 1e9),
@@ -1491,6 +1491,43 @@ def test_span_ingestion(
             ),
         )
     )
+    envelope.add_item(
+        Item(
+            type="span",
+            payload=PayloadRef(
+                bytes=json.dumps(
+                    {
+                        "description": r"test \" with \" escaped \" chars",
+                        "op": "default",
+                        "span_id": "cd429c44b67a3eb1",
+                        "segment_id": "968cff94913ebb07",
+                        "start_timestamp": start.timestamp(),
+                        "timestamp": end.timestamp() + 1,
+                        "exclusive_time": 345.0,  # The SDK knows that this span has a lower exclusive time
+                        "trace_id": "ff62a8b040f340bda5d830223def1d81",
+                    },
+                ).encode()
+            ),
+        )
+    )
+    envelope.add_item(
+        Item(
+            type="span",
+            payload=PayloadRef(
+                bytes=json.dumps(
+                    {
+                        "op": "default",
+                        "span_id": "ed429c44b67a3eb1",
+                        "segment_id": "968cff94913ebb07",
+                        "start_timestamp": start.timestamp(),
+                        "timestamp": end.timestamp() + 1,
+                        "exclusive_time": 345.0,  # The SDK knows that this span has a lower exclusive time
+                        "trace_id": "ff62a8b040f340bda5d830223def1d81",
+                    },
+                ).encode()
+            ),
+        )
+    )
     relay.send_envelope(project_id, envelope)
 
     # 2 - Send OTel span via endpoint
@@ -1504,7 +1541,7 @@ def test_span_ingestion(
                             "spans": [
                                 {
                                     "traceId": "89143b0763095bd9c9955e8175d1fb24",
-                                    "spanId": "e342abb1214ca182",
+                                    "spanId": "d342abb1214ca182",
                                     "name": "my 2nd OTel span",
                                     "startTimeUnixNano": int(start.timestamp() * 1e9),
                                     "endTimeUnixNano": int(end.timestamp() * 1e9),
@@ -1531,11 +1568,23 @@ def test_span_ingestion(
     for span in spans:
         span.pop("received", None)
 
-    spans.sort(
-        key=lambda msg: msg.get("description", "")
-    )  # endpoint might overtake envelope
+    spans.sort(key=lambda msg: msg["span_id"])  # endpoint might overtake envelope
 
     assert spans == [
+        {
+            "description": "my 1st OTel span",
+            "duration_ms": 500,
+            "exclusive_time_ms": 500.0,
+            "is_segment": True,
+            "parent_span_id": "",
+            "project_id": 42,
+            "retention_days": 90,
+            "segment_id": "a342abb1214ca181",
+            "sentry_tags": {"category": "db", "op": "db.query"},
+            "span_id": "a342abb1214ca181",
+            "start_timestamp_ms": int(start.timestamp() * 1e3),
+            "trace_id": "89143b0763095bd9c9955e8175d1fb23",
+        },
         {
             "description": "https://example.com/p/blah.js",
             "duration_ms": 1500,
@@ -1557,18 +1606,17 @@ def test_span_ingestion(
             "trace_id": "ff62a8b040f340bda5d830223def1d81",
         },
         {
-            "description": "my 1st OTel span",
-            "duration_ms": 500,
-            "exclusive_time_ms": 500.0,
+            "description": r"test \" with \" escaped \" chars",
+            "duration_ms": 1500,
+            "exclusive_time_ms": 345.0,
             "is_segment": True,
-            "parent_span_id": "",
             "project_id": 42,
             "retention_days": 90,
-            "segment_id": "e342abb1214ca181",
-            "sentry_tags": {"category": "db", "op": "db.query"},
-            "span_id": "e342abb1214ca181",
+            "segment_id": "cd429c44b67a3eb1",
+            "sentry_tags": {"op": "default"},
+            "span_id": "cd429c44b67a3eb1",
             "start_timestamp_ms": int(start.timestamp() * 1e3),
-            "trace_id": "89143b0763095bd9c9955e8175d1fb23",
+            "trace_id": "ff62a8b040f340bda5d830223def1d81",
         },
         {
             "description": "my 2nd OTel span",
@@ -1578,12 +1626,24 @@ def test_span_ingestion(
             "parent_span_id": "",
             "project_id": 42,
             "retention_days": 90,
-            "segment_id": "e342abb1214ca182",
+            "segment_id": "d342abb1214ca182",
             "sentry_tags": {"op": "default"},
-            "span_id": "e342abb1214ca182",
+            "span_id": "d342abb1214ca182",
             "start_timestamp_ms": int(start.timestamp() * 1e3),
             "trace_id": "89143b0763095bd9c9955e8175d1fb24",
         },
+        {
+            "duration_ms": 1500,
+            "exclusive_time_ms": 345.0,
+            "is_segment": True,
+            "project_id": 42,
+            "retention_days": 90,
+            "segment_id": "ed429c44b67a3eb1",
+            "sentry_tags": {"op": "default"},
+            "span_id": "ed429c44b67a3eb1",
+            "start_timestamp_ms": int(start.timestamp() * 1e3),
+            "trace_id": "ff62a8b040f340bda5d830223def1d81",
+        },
     ]
 
     metrics = [metric for (metric, _headers) in metrics_consumer.get_metrics()]
@@ -1627,6 +1687,16 @@ def test_span_ingestion(
             "tags": {"span.op": "default"},
             "retention_days": 90,
         },
+        {
+            "org_id": 1,
+            "project_id": 42,
+            "name": "c:spans/count_per_op@none",
+            "type": "c",
+            "value": 2.0,
+            "timestamp": expected_timestamp + 1,
+            "tags": {"span.op": "default"},
+            "retention_days": 90,
+        },
         {
             "org_id": 1,
             "project_id": 42,