From 951175fd7d22728a0ae8cbb0594e38e7754f27ac Mon Sep 17 00:00:00 2001
From: Michael Witbrock <witbr@users.noreply.github.com>
Date: Fri, 26 Dec 2025 22:33:14 +1300
Subject: [PATCH] JVNAUTOSCI-841: Add path to aux debug events

---
 .../integrations/internal_mcp/orchestrator.py  |  6 ++++++
 tests/backend/test_orchestrator_extraction.py  | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/src/backend/integrations/internal_mcp/orchestrator.py b/src/backend/integrations/internal_mcp/orchestrator.py
index 484c6ef..b925dcb 100644
--- a/src/backend/integrations/internal_mcp/orchestrator.py
+++ b/src/backend/integrations/internal_mcp/orchestrator.py
@@ -801,6 +801,7 @@ def _llm_detects_missing_tool_call(
         *,
         fallback_model: Optional[str],
         aux_log: Optional[List[Mapping[str, Any]]] = None,
+        path: str | None = None,
     ) -> Optional[bool]:
         """Run the Vontology-configured detector LLM to classify the response.
 
@@ -859,6 +860,7 @@ def _llm_detects_missing_tool_call(
                 aux_log.append(
                     {
                         "type": "missing_tool_call_classifier",
+                        "path": path or "",
                         "model": model_name or fallback_model or "default",
                         "model_raw": raw_model_name or "",
                         "model_resolved": model_name or "",
@@ -1488,6 +1490,7 @@ def _assess_missing_tool_call(
                 llm_client,
                 fallback_model=model,
                 aux_log=aux_log,
+                path=path,
             )
 
             if llm_flag is not None:
@@ -1565,6 +1568,7 @@ def _persist_trace(*, status: str, error: str | None = None) -> None:
                 aux_llm_calls.append(
                     {
                         "type": "workflow_execution_trace",
+                        "path": "trace",
                         "workflow_id": trace.workflow_id,
                         "execution_id": trace.execution_id,
                         "stored": bool(stored_execution_id),
@@ -1809,6 +1813,7 @@ def _persist_trace(*, status: str, error: str | None = None) -> None:
                     aux_llm_calls.append(
                         {
                             "type": "missing_tool_call_retry",
+                            "path": assessment.path,
                             "stage": "prompt",
                             "retry_reason": assessment.retry_reason,
                             "prompt_preview": retry_prompt[:800],
@@ -1825,6 +1830,7 @@ def _persist_trace(*, status: str, error: str | None = None) -> None:
                     aux_llm_calls.append(
                         {
                             "type": "missing_tool_call_retry",
+                            "path": assessment.path,
                             "stage": "response",
                             "retry_reason": assessment.retry_reason,
                             "response_preview": (
diff --git a/tests/backend/test_orchestrator_extraction.py b/tests/backend/test_orchestrator_extraction.py
index 660fa8b..806dd20 100644
--- a/tests/backend/test_orchestrator_extraction.py
+++ b/tests/backend/test_orchestrator_extraction.py
@@ -331,6 +331,7 @@ def test_llm_detector_returns_true_on_yes():
         llm,
         fallback_model="fallback-model",
         aux_log=aux_log,
+        path="legacy",
     )
 
     assert decision is True
@@ -338,6 +339,7 @@ def test_llm_detector_returns_true_on_yes():
     assert llm.calls[0]["context"] is None
     assert "search the web" in llm.calls[0]["prompt"]
     assert aux_log and aux_log[0]["type"] == "missing_tool_call_classifier"
+    assert aux_log[0]["path"] == "legacy"
 
 
 def test_llm_detector_strips_vontology_model_prefix_before_calling_llm():
@@ -357,12 +359,14 @@ def test_llm_detector_strips_vontology_model_prefix_before_calling_llm():
         llm,
         fallback_model="fallback-model",
         aux_log=aux_log,
+        path="legacy",
     )
 
     assert decision is False
     assert llm.calls[0]["model"] == "gpt-4o-mini"
     assert aux_log and aux_log[0]["model_raw"] == "#V#gpt-4o-mini"
     assert aux_log[0]["model_resolved"] == "gpt-4o-mini"
+    assert aux_log[0]["path"] == "legacy"
 
 
 def test_llm_detector_appends_response_when_placeholder_missing():
@@ -382,12 +386,14 @@ def test_llm_detector_appends_response_when_placeholder_missing():
         llm,
         fallback_model="fallback-model",
         aux_log=aux_log,
+        path="legacy",
     )
 
     assert decision is True
     assert "fetch JVNAUTOSCI-803" in llm.calls[0]["prompt"]
     assert aux_log and aux_log[0]["prompt_placeholder_response"] is False
     assert aux_log[0]["prompt_injection_mode"] == "append"
+    assert aux_log[0]["path"] == "legacy"
 
 
 def test_llm_detector_uses_fallback_model_when_missing():
@@ -407,11 +413,13 @@ def test_llm_detector_uses_fallback_model_when_missing():
         llm,
         fallback_model="fallback-model",
         aux_log=aux_log,
+        path="legacy",
     )
 
     assert decision is False
     assert llm.calls[0]["model"] == "fallback-model"
     assert aux_log and aux_log[0]["model"] == "fallback-model"
+    assert aux_log[0]["path"] == "legacy"
 
 
 def test_run_retries_when_llm_detector_flags_missing_tool_call():
@@ -454,6 +462,16 @@ def test_run_retries_when_llm_detector_flags_missing_tool_call():
     assert result.response_text == "Final response"
     assert result.aux_llm_calls
 
+    aux_by_type = {}
+    for entry in result.aux_llm_calls:
+        if isinstance(entry, dict) and isinstance(entry.get("type"), str):
+            aux_by_type.setdefault(entry["type"], []).append(entry)
+
+    assert aux_by_type["missing_tool_call_detection"][0]["path"] == "legacy"
+    assert aux_by_type["missing_tool_call_classifier"][0]["path"] == "legacy"
+    for retry_entry in aux_by_type["missing_tool_call_retry"]:
+        assert retry_entry["path"] == "legacy"
+
 
 def test_run_retries_when_classifier_misses_but_heuristic_triggers():
     gateway = _DummyGateway()