From 1c87221a91ebe510d707f47edb30c86c3a0f1897 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Wed, 21 Jan 2026 15:02:56 +0100 Subject: [PATCH 1/5] feat(langchain): Set system instruction attribute --- sentry_sdk/consts.py | 6 ++++++ sentry_sdk/integrations/langchain.py | 21 +++++++++++++++++++ .../integrations/langchain/test_langchain.py | 6 ++++-- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 93fca6ba3e..4b61a317fb 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -542,6 +542,12 @@ class SPANDATA: Example: 2048 """ + GEN_AI_SYSTEM_INSTRUCTIONS = "gen_ai.system_instructions" + """ + The system instructions passed to the model. + Example: [{"type": "text", "text": "You are a helpful assistant."},{"type": "text", "text": "Be concise and clear."}] + """ + GEN_AI_REQUEST_MESSAGES = "gen_ai.request.messages" """ The messages passed to the model. The "content" can be a string or an array of objects. diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 8a9bc5167c..0beebb36a5 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -189,6 +189,22 @@ def _get_current_agent() -> "Optional[str]": return None +def _set_system_prompt( + span: "sentry_sdk.tracing.Span", messages: "List[List[BaseMessage]]" +) -> None: + for list_ in messages: + for message in list_: + if message.type == "system": + system_prompt = message.content + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + system_prompt, + unpack=False, + ) + return + + class LangchainIntegration(Integration): identifier = "langchain" origin = f"auto.ai.{identifier}" @@ -430,9 +446,14 @@ def on_chat_model_start( _set_tools_on_span(span, all_params.get("tools")) if should_send_default_pii() and self.include_prompts: + _set_system_prompt(span, messages) + normalized_messages = [] for list_ in messages: for message in list_: + if message.type == "system": + continue + normalized_messages.append( self._normalize_langchain_message(message) ) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 6f5f9f14a1..fd6a4a34d2 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -219,14 +219,14 @@ def test_langchain_agent( if send_default_pii and include_prompts: assert ( "You are very powerful" - in chat_spans[0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + in chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) assert ( "You are very powerful" - in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + in chat_spans[1]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] ) assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] @@ -243,8 +243,10 @@ def test_langchain_agent( tool_call_str = str(tool_calls_data) assert "get_word_length" in tool_call_str else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {}) From 306741ef1f9bcbc5ec4ea14bfcd0cb7ac185f92e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Thu, 22 Jan 2026 08:51:49 +0100 Subject: [PATCH 2/5] . --- sentry_sdk/_types.py | 4 ++ sentry_sdk/integrations/langchain.py | 39 +++++++++++++------ .../integrations/langchain/test_langchain.py | 20 ++++++---- 3 files changed, 43 insertions(+), 20 deletions(-) diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py index 7043bbc2ee..ecb8abcd10 100644 --- a/sentry_sdk/_types.py +++ b/sentry_sdk/_types.py @@ -359,3 +359,7 @@ class SDKInfo(TypedDict): ) HttpStatusCodeRange = Union[int, Container[int]] + + class TextPart(TypedDict): + type: Literal["text"] + content: str diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 0beebb36a5..1cccfb0fff 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -36,6 +36,7 @@ from uuid import UUID from sentry_sdk.tracing import Span + from sentry_sdk._types import TextPart try: @@ -189,20 +190,27 @@ def _get_current_agent() -> "Optional[str]": return None -def _set_system_prompt( - span: "sentry_sdk.tracing.Span", messages: "List[List[BaseMessage]]" -) -> None: +def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[TextPart]": + system_instructions = [] + for list_ in messages: for message in list_: if message.type == "system": - system_prompt = message.content - set_data_normalized( - span, - SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, - system_prompt, - unpack=False, - ) - return + system_instructions.append(message) + + return system_instructions + + +def _transform_system_instructions( + system_instructions: "List[BaseMessage]", +) -> "List[TextPart]": + return [ + { + "type": "text", + "content": instruction.content, + } + for instruction in system_instructions + ] class LangchainIntegration(Integration): @@ -446,7 +454,14 @@ def on_chat_model_start( _set_tools_on_span(span, all_params.get("tools")) if should_send_default_pii() and self.include_prompts: - _set_system_prompt(span, messages) + system_instructions = _get_system_instructions(messages) + if len(system_instructions) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + _transform_system_instructions(system_instructions), + unpack=False, + ) normalized_messages = [] for list_ in messages: diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index fd6a4a34d2..54c4664f3e 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -217,17 +217,21 @@ def test_langchain_agent( assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 if send_default_pii and include_prompts: - assert ( - "You are very powerful" - in chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) - assert ( - "You are very powerful" - in chat_spans[1]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS] - ) + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[1]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Verify tool calls are recorded when PII is enabled From 6a7c40993d75b513f806d37a12495785273e61a9 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 12:21:48 +0100 Subject: [PATCH 3/5] handle content lists --- sentry_sdk/integrations/langchain.py | 19 +++++-- .../integrations/langchain/test_langchain.py | 55 ++++++++++++++----- 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 1cccfb0fff..eb490bd416 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -190,24 +190,33 @@ def _get_current_agent() -> "Optional[str]": return None -def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[TextPart]": +def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]": system_instructions = [] for list_ in messages: for message in list_: - if message.type == "system": - system_instructions.append(message) + # type of content: str | list[str | dict] | None + if message.type == "system" and isinstance(message.content, str): + system_instructions.append(message.content) + + elif message.type == "system" and isinstance(message.content, list): + # content_blocks accessor standardizes string and dict elements + for block in message.content_blocks: + if block.get("type") == "text": + text = block.get("text", None) + if text is not None: + system_instructions.append(text) return system_instructions def _transform_system_instructions( - system_instructions: "List[BaseMessage]", + system_instructions: "List[str]", ) -> "List[TextPart]": return [ { "type": "text", - "content": instruction.content, + "content": instruction, } for instruction in system_instructions ] diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 54c4664f3e..252b531a87 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -75,8 +75,26 @@ def _llm_type(self) -> str: (False, False, True), ], ) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + ["You are a helpful assistant.", "Be concise and clear."], + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "list", "blocks"], +) def test_langchain_agent( - sentry_init, capture_events, send_default_pii, include_prompts, use_unknown_llm_type + sentry_init, + capture_events, + send_default_pii, + include_prompts, + use_unknown_llm_type, + system_instructions_content, + request, ): global llm_type llm_type = "acme-llm" if use_unknown_llm_type else "openai-chat" @@ -96,7 +114,7 @@ def test_langchain_agent( [ ( "system", - "You are very powerful assistant, but don't know current events", + system_instructions_content, ), ("user", "{input}"), MessagesPlaceholder(variable_name="agent_scratchpad"), @@ -217,21 +235,30 @@ def test_langchain_agent( assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 if send_default_pii and include_prompts: - assert [ - { - "type": "text", - "content": "You are very powerful assistant, but don't know current events", - } - ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) - assert [ - { - "type": "text", - "content": "You are very powerful assistant, but don't know current events", - } - ] == json.loads(chat_spans[1]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Verify tool calls are recorded when PII is enabled From 1d07d7a46646559f2c65ea47f381788d065a22ed Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 12:34:45 +0100 Subject: [PATCH 4/5] old langchain compatible --- sentry_sdk/integrations/langchain.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index eb490bd416..60feecff15 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -200,12 +200,13 @@ def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]" system_instructions.append(message.content) elif message.type == "system" and isinstance(message.content, list): - # content_blocks accessor standardizes string and dict elements - for block in message.content_blocks: - if block.get("type") == "text": - text = block.get("text", None) - if text is not None: - system_instructions.append(text) + for item in message.content: + if isinstance(item, str): + system_instructions.append(item) + + elif isinstance(item, dict) and item.get("type") == "text": + if "text" in item: + system_instructions.append(item.get("text")) return system_instructions From 38c52fe1e7f01ee29a136ab126a03ced77cdab4e Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Fri, 23 Jan 2026 12:38:02 +0100 Subject: [PATCH 5/5] more defensive handling of content parts --- sentry_sdk/integrations/langchain.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 60feecff15..831aeeab91 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -205,8 +205,9 @@ def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]" system_instructions.append(item) elif isinstance(item, dict) and item.get("type") == "text": - if "text" in item: - system_instructions.append(item.get("text")) + instruction = item.get("text") + if isinstance(instruction, str): + system_instructions.append(instruction) return system_instructions