From 951175fd7d22728a0ae8cbb0594e38e7754f27ac Mon Sep 17 00:00:00 2001 From: Michael Witbrock Date: Fri, 26 Dec 2025 22:33:14 +1300 Subject: [PATCH] JVNAUTOSCI-841: Add path to aux debug events --- .../integrations/internal_mcp/orchestrator.py | 6 ++++++ tests/backend/test_orchestrator_extraction.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/backend/integrations/internal_mcp/orchestrator.py b/src/backend/integrations/internal_mcp/orchestrator.py index 484c6ef..b925dcb 100644 --- a/src/backend/integrations/internal_mcp/orchestrator.py +++ b/src/backend/integrations/internal_mcp/orchestrator.py @@ -801,6 +801,7 @@ def _llm_detects_missing_tool_call( *, fallback_model: Optional[str], aux_log: Optional[List[Mapping[str, Any]]] = None, + path: str | None = None, ) -> Optional[bool]: """Run the Vontology-configured detector LLM to classify the response. @@ -859,6 +860,7 @@ def _llm_detects_missing_tool_call( aux_log.append( { "type": "missing_tool_call_classifier", + "path": path or "", "model": model_name or fallback_model or "default", "model_raw": raw_model_name or "", "model_resolved": model_name or "", @@ -1488,6 +1490,7 @@ def _assess_missing_tool_call( llm_client, fallback_model=model, aux_log=aux_log, + path=path, ) if llm_flag is not None: @@ -1565,6 +1568,7 @@ def _persist_trace(*, status: str, error: str | None = None) -> None: aux_llm_calls.append( { "type": "workflow_execution_trace", + "path": "trace", "workflow_id": trace.workflow_id, "execution_id": trace.execution_id, "stored": bool(stored_execution_id), @@ -1809,6 +1813,7 @@ def _persist_trace(*, status: str, error: str | None = None) -> None: aux_llm_calls.append( { "type": "missing_tool_call_retry", + "path": assessment.path, "stage": "prompt", "retry_reason": assessment.retry_reason, "prompt_preview": retry_prompt[:800], @@ -1825,6 +1830,7 @@ def _persist_trace(*, status: str, error: str | None = None) -> None: aux_llm_calls.append( { "type": "missing_tool_call_retry", + "path": assessment.path, "stage": "response", "retry_reason": assessment.retry_reason, "response_preview": ( diff --git a/tests/backend/test_orchestrator_extraction.py b/tests/backend/test_orchestrator_extraction.py index 660fa8b..806dd20 100644 --- a/tests/backend/test_orchestrator_extraction.py +++ b/tests/backend/test_orchestrator_extraction.py @@ -331,6 +331,7 @@ def test_llm_detector_returns_true_on_yes(): llm, fallback_model="fallback-model", aux_log=aux_log, + path="legacy", ) assert decision is True @@ -338,6 +339,7 @@ def test_llm_detector_returns_true_on_yes(): assert llm.calls[0]["context"] is None assert "search the web" in llm.calls[0]["prompt"] assert aux_log and aux_log[0]["type"] == "missing_tool_call_classifier" + assert aux_log[0]["path"] == "legacy" def test_llm_detector_strips_vontology_model_prefix_before_calling_llm(): @@ -357,12 +359,14 @@ def test_llm_detector_strips_vontology_model_prefix_before_calling_llm(): llm, fallback_model="fallback-model", aux_log=aux_log, + path="legacy", ) assert decision is False assert llm.calls[0]["model"] == "gpt-4o-mini" assert aux_log and aux_log[0]["model_raw"] == "#V#gpt-4o-mini" assert aux_log[0]["model_resolved"] == "gpt-4o-mini" + assert aux_log[0]["path"] == "legacy" def test_llm_detector_appends_response_when_placeholder_missing(): @@ -382,12 +386,14 @@ def test_llm_detector_appends_response_when_placeholder_missing(): llm, fallback_model="fallback-model", aux_log=aux_log, + path="legacy", ) assert decision is True assert "fetch JVNAUTOSCI-803" in llm.calls[0]["prompt"] assert aux_log and aux_log[0]["prompt_placeholder_response"] is False assert aux_log[0]["prompt_injection_mode"] == "append" + assert aux_log[0]["path"] == "legacy" def test_llm_detector_uses_fallback_model_when_missing(): @@ -407,11 +413,13 @@ def test_llm_detector_uses_fallback_model_when_missing(): llm, fallback_model="fallback-model", aux_log=aux_log, + path="legacy", ) assert decision is False assert llm.calls[0]["model"] == "fallback-model" assert aux_log and aux_log[0]["model"] == "fallback-model" + assert aux_log[0]["path"] == "legacy" def test_run_retries_when_llm_detector_flags_missing_tool_call(): @@ -454,6 +462,16 @@ def test_run_retries_when_llm_detector_flags_missing_tool_call(): assert result.response_text == "Final response" assert result.aux_llm_calls + aux_by_type = {} + for entry in result.aux_llm_calls: + if isinstance(entry, dict) and isinstance(entry.get("type"), str): + aux_by_type.setdefault(entry["type"], []).append(entry) + + assert aux_by_type["missing_tool_call_detection"][0]["path"] == "legacy" + assert aux_by_type["missing_tool_call_classifier"][0]["path"] == "legacy" + for retry_entry in aux_by_type["missing_tool_call_retry"]: + assert retry_entry["path"] == "legacy" + def test_run_retries_when_classifier_misses_but_heuristic_triggers(): gateway = _DummyGateway()