From 80f0b3e52ec7bf1221dab6d7caf47d593069b799 Mon Sep 17 00:00:00 2001
From: Georgiy Tarasov <gtarasov.work@gmail.com>
Date: Fri, 17 Jan 2025 22:04:37 +0100
Subject: [PATCH] fix(llm-observability): capture system prompt for anthropic
 (#167)

Co-authored-by: Peter Kirkham <peter@posthog.com>
---
 posthog/ai/anthropic/anthropic.py           |  8 ++-
 posthog/ai/anthropic/anthropic_async.py     |  8 ++-
 posthog/ai/utils.py                         | 17 +++++-
 posthog/test/ai/anthropic/test_anthropic.py | 62 +++++++++++++++++++--
 4 files changed, 85 insertions(+), 10 deletions(-)

diff --git a/posthog/ai/anthropic/anthropic.py b/posthog/ai/anthropic/anthropic.py
index a6b12b8..49f5c4f 100644
--- a/posthog/ai/anthropic/anthropic.py
+++ b/posthog/ai/anthropic/anthropic.py
@@ -8,7 +8,7 @@
 import uuid
 from typing import Any, Dict, Optional
 
-from posthog.ai.utils import call_llm_and_track_usage, get_model_params, with_privacy_mode
+from posthog.ai.utils import call_llm_and_track_usage, get_model_params, merge_system_prompt, with_privacy_mode
 from posthog.client import Client as PostHogClient
 
 
@@ -171,7 +171,11 @@ def _capture_streaming_event(
             "$ai_provider": "anthropic",
             "$ai_model": kwargs.get("model"),
             "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(self._client._ph_client, posthog_privacy_mode, kwargs.get("messages")),
+            "$ai_input": with_privacy_mode(
+                self._client._ph_client,
+                posthog_privacy_mode,
+                merge_system_prompt(kwargs, "anthropic"),
+            ),
             "$ai_output_choices": with_privacy_mode(
                 self._client._ph_client,
                 posthog_privacy_mode,
diff --git a/posthog/ai/anthropic/anthropic_async.py b/posthog/ai/anthropic/anthropic_async.py
index 31a3b35..d3b87ca 100644
--- a/posthog/ai/anthropic/anthropic_async.py
+++ b/posthog/ai/anthropic/anthropic_async.py
@@ -8,7 +8,7 @@
 import uuid
 from typing import Any, Dict, Optional
 
-from posthog.ai.utils import call_llm_and_track_usage_async, get_model_params, with_privacy_mode
+from posthog.ai.utils import call_llm_and_track_usage_async, get_model_params, merge_system_prompt, with_privacy_mode
 from posthog.client import Client as PostHogClient
 
 
@@ -171,7 +171,11 @@ async def _capture_streaming_event(
             "$ai_provider": "anthropic",
             "$ai_model": kwargs.get("model"),
             "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(self._client._ph_client, posthog_privacy_mode, kwargs.get("messages")),
+            "$ai_input": with_privacy_mode(
+                self._client._ph_client,
+                posthog_privacy_mode,
+                merge_system_prompt(kwargs, "anthropic"),
+            ),
             "$ai_output_choices": with_privacy_mode(
                 self._client._ph_client,
                 posthog_privacy_mode,
diff --git a/posthog/ai/utils.py b/posthog/ai/utils.py
index 728ddd4..6a902a2 100644
--- a/posthog/ai/utils.py
+++ b/posthog/ai/utils.py
@@ -86,6 +86,15 @@ def format_response_openai(response):
     return output
 
 
+def merge_system_prompt(kwargs: Dict[str, Any], provider: str):
+    if provider != "anthropic":
+        return kwargs.get("messages")
+    messages = kwargs.get("messages") or []
+    if kwargs.get("system") is None:
+        return messages
+    return [{"role": "system", "content": kwargs.get("system")}] + messages
+
+
 def call_llm_and_track_usage(
     posthog_distinct_id: Optional[str],
     ph_client: PostHogClient,
@@ -123,11 +132,13 @@ def call_llm_and_track_usage(
         if response and hasattr(response, "usage"):
             usage = get_usage(response, provider)
 
+        messages = merge_system_prompt(kwargs, provider)
+
         event_properties = {
             "$ai_provider": provider,
             "$ai_model": kwargs.get("model"),
             "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(ph_client, posthog_privacy_mode, kwargs.get("messages")),
+            "$ai_input": with_privacy_mode(ph_client, posthog_privacy_mode, messages),
             "$ai_output_choices": with_privacy_mode(
                 ph_client, posthog_privacy_mode, format_response(response, provider)
             ),
@@ -191,11 +202,13 @@ async def call_llm_and_track_usage_async(
         if response and hasattr(response, "usage"):
             usage = get_usage(response, provider)
 
+        messages = merge_system_prompt(kwargs, provider)
+
         event_properties = {
             "$ai_provider": provider,
             "$ai_model": kwargs.get("model"),
             "$ai_model_parameters": get_model_params(kwargs),
-            "$ai_input": with_privacy_mode(ph_client, posthog_privacy_mode, kwargs.get("messages")),
+            "$ai_input": with_privacy_mode(ph_client, posthog_privacy_mode, messages),
             "$ai_output_choices": with_privacy_mode(
                 ph_client, posthog_privacy_mode, format_response(response, provider)
             ),
diff --git a/posthog/test/ai/anthropic/test_anthropic.py b/posthog/test/ai/anthropic/test_anthropic.py
index ce2169d..a2a3dd9 100644
--- a/posthog/test/ai/anthropic/test_anthropic.py
+++ b/posthog/test/ai/anthropic/test_anthropic.py
@@ -218,25 +218,29 @@ def test_basic_integration(mock_client):
     client = Anthropic(posthog_client=mock_client)
     client.messages.create(
         model="claude-3-opus-20240229",
-        messages=[{"role": "user", "content": "You must always answer with 'Bar'."}],
+        messages=[{"role": "user", "content": "Foo"}],
         max_tokens=1,
         temperature=0,
         posthog_distinct_id="test-id",
         posthog_properties={"foo": "bar"},
+        system="You must always answer with 'Bar'.",
     )
 
     assert mock_client.capture.call_count == 1
 
     call_args = mock_client.capture.call_args[1]
     props = call_args["properties"]
-
     assert call_args["distinct_id"] == "test-id"
     assert call_args["event"] == "$ai_generation"
     assert props["$ai_provider"] == "anthropic"
     assert props["$ai_model"] == "claude-3-opus-20240229"
-    assert props["$ai_input"] == [{"role": "user", "content": "You must always answer with 'Bar'."}]
+    assert props["$ai_input"] == [
+        {"role": "system", "content": "You must always answer with 'Bar'."},
+        {"role": "user", "content": "Foo"},
+    ]
     assert props["$ai_output_choices"][0]["role"] == "assistant"
-    assert props["$ai_input_tokens"] == 16
+    assert props["$ai_output_choices"][0]["content"] == "Bar"
+    assert props["$ai_input_tokens"] == 18
     assert props["$ai_output_tokens"] == 1
     assert props["$ai_http_status"] == 200
     assert props["foo"] == "bar"
@@ -271,3 +275,53 @@ async def test_basic_async_integration(mock_client):
     assert props["$ai_http_status"] == 200
     assert props["foo"] == "bar"
     assert isinstance(props["$ai_latency"], float)
+
+
+def test_streaming_system_prompt(mock_client, mock_anthropic_stream):
+    with patch("anthropic.resources.Messages.create", return_value=mock_anthropic_stream):
+        client = Anthropic(api_key="test-key", posthog_client=mock_client)
+        response = client.messages.create(
+            model="claude-3-opus-20240229",
+            system="Foo",
+            messages=[{"role": "user", "content": "Bar"}],
+            stream=True,
+        )
+
+        # Consume the stream
+        list(response)
+
+        # Wait a bit to ensure the capture is called
+        time.sleep(0.1)
+        assert mock_client.capture.call_count == 1
+
+        call_args = mock_client.capture.call_args[1]
+        props = call_args["properties"]
+
+        assert props["$ai_input"] == [{"role": "system", "content": "Foo"}, {"role": "user", "content": "Bar"}]
+
+
+@pytest.mark.skipif(not ANTHROPIC_API_KEY, reason="ANTHROPIC_API_KEY is not set")
+async def test_async_streaming_system_prompt(mock_client, mock_anthropic_stream):
+    client = AsyncAnthropic(posthog_client=mock_client)
+    response = await client.messages.create(
+        model="claude-3-opus-20240229",
+        system="You must always answer with 'Bar'.",
+        messages=[{"role": "user", "content": "Foo"}],
+        stream=True,
+        max_tokens=1,
+    )
+
+    # Consume the stream
+    [c async for c in response]
+
+    # Wait a bit to ensure the capture is called
+    time.sleep(0.1)
+    assert mock_client.capture.call_count == 1
+
+    call_args = mock_client.capture.call_args[1]
+    props = call_args["properties"]
+
+    assert props["$ai_input"] == [
+        {"role": "system", "content": "You must always answer with 'Bar'."},
+        {"role": "user", "content": "Foo"},
+    ]