diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 8a9bc5167c..831aeeab91 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -36,6 +36,7 @@ from uuid import UUID from sentry_sdk.tracing import Span + from sentry_sdk._types import TextPart try: @@ -189,6 +190,40 @@ def _get_current_agent() -> "Optional[str]": return None +def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]": + system_instructions = [] + + for list_ in messages: + for message in list_: + # type of content: str | list[str | dict] | None + if message.type == "system" and isinstance(message.content, str): + system_instructions.append(message.content) + + elif message.type == "system" and isinstance(message.content, list): + for item in message.content: + if isinstance(item, str): + system_instructions.append(item) + + elif isinstance(item, dict) and item.get("type") == "text": + instruction = item.get("text") + if isinstance(instruction, str): + system_instructions.append(instruction) + + return system_instructions + + +def _transform_system_instructions( + system_instructions: "List[str]", +) -> "List[TextPart]": + return [ + { + "type": "text", + "content": instruction, + } + for instruction in system_instructions + ] + + class LangchainIntegration(Integration): identifier = "langchain" origin = f"auto.ai.{identifier}" @@ -430,9 +465,21 @@ def on_chat_model_start( _set_tools_on_span(span, all_params.get("tools")) if should_send_default_pii() and self.include_prompts: + system_instructions = _get_system_instructions(messages) + if len(system_instructions) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + _transform_system_instructions(system_instructions), + unpack=False, + ) + normalized_messages = [] for list_ in messages: for message in list_: + if message.type == "system": + continue + normalized_messages.append( self._normalize_langchain_message(message) ) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 6f5f9f14a1..252b531a87 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -75,8 +75,26 @@ def _llm_type(self) -> str: (False, False, True), ], ) +@pytest.mark.parametrize( + "system_instructions_content", + [ + "You are very powerful assistant, but don't know current events", + ["You are a helpful assistant.", "Be concise and clear."], + [ + {"type": "text", "text": "You are a helpful assistant."}, + {"type": "text", "text": "Be concise and clear."}, + ], + ], + ids=["string", "list", "blocks"], +) def test_langchain_agent( - sentry_init, capture_events, send_default_pii, include_prompts, use_unknown_llm_type + sentry_init, + capture_events, + send_default_pii, + include_prompts, + use_unknown_llm_type, + system_instructions_content, + request, ): global llm_type llm_type = "acme-llm" if use_unknown_llm_type else "openai-chat" @@ -96,7 +114,7 @@ def test_langchain_agent( [ ( "system", - "You are very powerful assistant, but don't know current events", + system_instructions_content, ), ("user", "{input}"), MessagesPlaceholder(variable_name="agent_scratchpad"), @@ -217,17 +235,30 @@ def test_langchain_agent( assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117 if send_default_pii and include_prompts: - assert ( - "You are very powerful" - in chat_spans[0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT] assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT]) - assert ( - "You are very powerful" - in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - ) + + param_id = request.node.callspec.id + if "string" in param_id: + assert [ + { + "type": "text", + "content": "You are very powerful assistant, but don't know current events", + } + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + else: + assert [ + { + "type": "text", + "content": "You are a helpful assistant.", + }, + { + "type": "text", + "content": "Be concise and clear.", + }, + ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]) + assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] # Verify tool calls are recorded when PII is enabled @@ -243,8 +274,10 @@ def test_langchain_agent( tool_call_str = str(tool_calls_data) assert "get_word_length" in tool_call_str else: + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {}) assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {}) assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {}) + assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {}) assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})