Skip to content
Closed
50 changes: 50 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
# Default: databricks
MODEL_PROVIDER=ollama

# Default model to use (overrides provider-specific defaults)
# Without this, defaults to a Databricks Claude model regardless of MODEL_PROVIDER.
# Set this when using Ollama or other local providers to match your installed model.
# MODEL_DEFAULT=qwen2.5-coder:latest

# ==============================================================================
# Ollama Configuration (Hybrid Routing)
# ==============================================================================
Expand All @@ -24,6 +29,13 @@ OLLAMA_MODEL=qwen2.5-coder:latest
# Ollama endpoint (default: http://localhost:11434)
OLLAMA_ENDPOINT=http://localhost:11434

# Ollama request timeout in milliseconds (default: 120000)
# OLLAMA_TIMEOUT_MS=120000

# Ollama keep_alive parameter - how long to keep model loaded in memory
# Values: -1 (forever), 0 (unload immediately), or duration string (e.g. "5m", "1h")
# OLLAMA_KEEP_ALIVE=-1

# Ollama embeddings configuration (for Cursor @Codebase semantic search)
# Embedding models for local, privacy-first semantic search
# Popular models:
Expand Down Expand Up @@ -225,6 +237,13 @@ TOOL_EXECUTION_MODE=server
# <model> - Use a specific model (e.g. "llama3.1" for a lighter model)
SUGGESTION_MODE_MODEL=default

# Topic detection model override
# Redirects topic classification to a lighter/faster model to reduce GPU load.
# Values:
# default - Use the main model (no change)
# <model> - Use a specific model (e.g. "llama3.2:1b" for a lightweight classifier)
TOPIC_DETECTION_MODEL=default

# Enable/disable automatic tool injection for local models
INJECT_TOOLS_LLAMACPP=true
INJECT_TOOLS_OLLAMA=true
Expand Down Expand Up @@ -328,6 +347,37 @@ HOT_RELOAD_ENABLED=true
# Debounce delay in ms (prevents rapid reloads)
HOT_RELOAD_DEBOUNCE_MS=1000

# ==============================================================================
# LLM Audit Logging
# ==============================================================================

# Enable LLM audit logging (default: false)
# Logs all LLM requests/responses for debugging and analysis
LLM_AUDIT_ENABLED=false

# Audit log file path
# LLM_AUDIT_LOG_FILE=./logs/llm-audit.log

# Maximum content length per field (characters) - controls log file size
# LLM_AUDIT_MAX_CONTENT_LENGTH=5000
# LLM_AUDIT_MAX_SYSTEM_LENGTH=2000
# LLM_AUDIT_MAX_USER_LENGTH=3000
# LLM_AUDIT_MAX_RESPONSE_LENGTH=3000

# Log rotation settings
# LLM_AUDIT_MAX_FILES=30
# LLM_AUDIT_MAX_SIZE=100M

# Include annotation metadata in audit logs (default: true)
# LLM_AUDIT_ANNOTATIONS=true

# Deduplication - reduces log size by referencing repeated content
# LLM_AUDIT_DEDUP_ENABLED=true
# LLM_AUDIT_DEDUP_MIN_SIZE=500
# LLM_AUDIT_DEDUP_CACHE_SIZE=100
# LLM_AUDIT_DEDUP_SANITIZE=true
# LLM_AUDIT_DEDUP_SESSION_CACHE=true

# ==============================================================================
# Quick Start Examples
# ==============================================================================
Expand Down
12 changes: 6 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
"dev": "nodemon index.js",
"lint": "eslint src index.js",
"test": "npm run test:unit && npm run test:performance",
"test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
"test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
"test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
"test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
"test:benchmark": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-benchmark.js",
"test:quick": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js",
"test:unit": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
"test:memory": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
"test:new-features": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
"test:performance": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
"test:benchmark": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-benchmark.js",
"test:quick": "NODE_ENV=test DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js",
"test:all": "npm run test:unit && npm run test:performance && npm run test:benchmark"
},
"keywords": [
Expand Down
96 changes: 81 additions & 15 deletions src/api/middleware/logging.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,92 @@
return clone;
}

const loggingMiddleware = pinoHttp({
const baseLoggingMiddleware = pinoHttp({
logger,
customProps: (req) => ({
autoLogging: false, // Disable automatic logging so we can log manually with bodies
customProps: (req, res) => ({

Check failure on line 18 in src/api/middleware/logging.js

View workflow job for this annotation

GitHub Actions / Run Tests (23.x)

'res' is defined but never used

Check failure on line 18 in src/api/middleware/logging.js

View workflow job for this annotation

GitHub Actions / Run Tests (20.x)

'res' is defined but never used

Check failure on line 18 in src/api/middleware/logging.js

View workflow job for this annotation

GitHub Actions / Run Tests (22.x)

'res' is defined but never used

Check failure on line 18 in src/api/middleware/logging.js

View workflow job for this annotation

GitHub Actions / Run Tests (24.x)

'res' is defined but never used
sessionId: req.sessionId ?? null,
}),
customLogLevel: (req, res, err) => {
if (err || res.statusCode >= 500) return "error";
if (res.statusCode >= 400) return "warn";
return "info";
},
wrapSerializers: true,
serializers: {
req(req) {
return {
});

// Wrapper middleware to capture and log full request/response bodies
function loggingMiddleware(req, res, next) {
const startTime = Date.now();

// Log request with full body immediately
logger.info({
sessionId: req.sessionId ?? null,
req: {
method: req.method,
url: req.url,
headers: maskHeaders(req.headers),
},
requestBody: req.body, // Full request body without truncation
}, 'request started');

// Intercept res.write for streaming responses
const originalWrite = res.write;
const chunks = [];
res.write = function (chunk) {
if (chunk) {
chunks.push(Buffer.from(chunk));
}
return originalWrite.apply(this, arguments);
};

// Intercept res.send to capture the body
const originalSend = res.send;
res.send = function (body) {
res._capturedBody = body;

// Parse if it's a JSON string for better logging
if (typeof body === 'string') {
try {
res._capturedBody = JSON.parse(body);
} catch (e) {
res._capturedBody = body;
}
}

return originalSend.call(this, body);
};

// Log response when finished
res.on('finish', () => {
const responseTime = Date.now() - startTime;

// Capture streaming body if not already captured via send()
if (chunks.length > 0 && !res._capturedBody) {
const fullBody = Buffer.concat(chunks).toString('utf8');
res._capturedBody = {
type: 'stream',
contentType: res.getHeader('content-type'),
size: fullBody.length,
preview: fullBody.substring(0, 1000)
};
}

const logLevel = res.statusCode >= 500 ? 'error' : res.statusCode >= 400 ? 'warn' : 'info';

logger[logLevel]({
sessionId: req.sessionId ?? null,
req: {
method: req.method,
url: req.url,
headers: maskHeaders(req.headers),
};
},
},
});
},
res: {
statusCode: res.statusCode,
headers: res.getHeaders ? res.getHeaders() : res.headers,
},
requestBody: req.body, // Full request body without truncation
responseBody: res._capturedBody, // Full response body without truncation
responseTime,
}, 'request completed');
});

// Still call base middleware to set up req.log
baseLoggingMiddleware(req, res, next);
}

module.exports = loggingMiddleware;
18 changes: 16 additions & 2 deletions src/api/middleware/request-logging.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@ function requestLoggingMiddleware(req, res, next) {
// Add to response headers
res.setHeader("X-Request-ID", requestId);

// Log request start
// Log request start with full body
logger.info(
{
requestId,
method: req.method,
path: req.path || req.url,
query: req.query,
body: req.body, // Full request body without truncation
ip: req.ip || req.socket.remoteAddress,
userAgent: req.headers["user-agent"],
},
Expand All @@ -43,7 +44,18 @@ function requestLoggingMiddleware(req, res, next) {
res.send = function (body) {
const duration = Date.now() - startTime;

// Log request completion
// Parse body if it's a string
let responseBody = body;
if (typeof body === 'string') {
try {
responseBody = JSON.parse(body);
} catch (e) {
// Keep as string if not JSON
responseBody = body;
}
}

// Log request completion with full request and response bodies
logger.info(
{
requestId,
Expand All @@ -52,6 +64,8 @@ function requestLoggingMiddleware(req, res, next) {
status: res.statusCode,
duration,
contentLength: res.getHeader("content-length"),
requestBody: req.body, // Full request body for reference
responseBody, // Full response body without truncation
},
"Request completed"
);
Expand Down
77 changes: 76 additions & 1 deletion src/api/router.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const openaiRouter = require("./openai-router");
const providersRouter = require("./providers-handler");
const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing");
const { validateCwd } = require("../workspace");
const logger = require("../logger");

const router = express.Router();

Expand Down Expand Up @@ -121,6 +122,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream);
const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0;

logger.info({
sessionId: req.headers['x-claude-session-id'],
wantsStream,
hasTools,
willUseStreamingPath: wantsStream || hasTools
}, "=== REQUEST ROUTING DECISION ===");

// Analyze complexity for routing headers (Phase 3)
const complexity = analyzeComplexity(req.body);
const routingHeaders = getRoutingHeaders({
Expand Down Expand Up @@ -338,6 +346,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {

// Legacy streaming wrapper (for tool-based requests that requested streaming)
if (wantsStream && hasTools) {
logger.info({
sessionId: req.headers['x-claude-session-id'],
pathType: 'legacy_streaming_wrapper',
wantsStream,
hasTools
}, "=== USING LEGACY STREAMING WRAPPER (TOOL-BASED WITH STREAMING) ===");

metrics.recordStreamingStart();
res.set({
"Content-Type": "text/event-stream",
Expand All @@ -359,6 +374,13 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
// Use proper Anthropic SSE format
const msg = result.body;

logger.info({
sessionId: req.headers['x-claude-session-id'],
eventType: 'message_start',
streamingWithTools: true,
hasContent: !!(msg.content && msg.content.length > 0)
}, "=== SENDING SSE MESSAGE_START ===");

// 1. message_start
res.write(`event: message_start\n`);
res.write(`data: ${JSON.stringify({
Expand Down Expand Up @@ -419,9 +441,52 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {

res.write(`event: content_block_stop\n`);
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
} else if (block.type === "tool_result") {
// === TOOL_RESULT SSE STREAMING - ENTERED ===
logger.info({
blockIndex: i,
blockType: block.type,
toolUseId: block.tool_use_id,
contentType: typeof block.content,
contentLength: typeof block.content === 'string' ? block.content.length : JSON.stringify(block.content).length
}, "=== SSE: STREAMING TOOL_RESULT BLOCK - START ===");

// Stream tool_result blocks so CLI can display actual tool output
res.write(`event: content_block_start\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_start",
index: i,
content_block: { type: "tool_result", tool_use_id: block.tool_use_id, content: "" }
})}\n\n`);

// Stream the actual content
const content = typeof block.content === 'string'
? block.content
: JSON.stringify(block.content);

logger.info({
blockIndex: i,
contentLength: content.length,
contentPreview: content.substring(0, 200)
}, "=== SSE: STREAMING TOOL_RESULT CONTENT ===");

res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "tool_result_delta", content: content }
})}\n\n`);

res.write(`event: content_block_stop\n`);
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);

// === TOOL_RESULT SSE STREAMING - COMPLETED ===
logger.info({
blockIndex: i,
toolUseId: block.tool_use_id
}, "=== SSE: STREAMING TOOL_RESULT BLOCK - END ===");
}
}

// 3. message_delta with stop_reason
res.write(`event: message_delta\n`);
res.write(`data: ${JSON.stringify({
Expand Down Expand Up @@ -454,6 +519,16 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
});
}


// DIAGNOSTIC: Log response being sent to client
logger.info({
status: result.status,
hasBody: !!result.body,
bodyKeys: result.body ? Object.keys(result.body) : [],
bodyType: typeof result.body,
contentLength: result.body ? JSON.stringify(result.body).length : 0
}, "=== SENDING RESPONSE TO CLIENT ===");

metrics.recordResponse(result.status);
res.status(result.status).send(result.body);
} catch (error) {
Expand Down
Loading
Loading