Skip to content

Commit df06795

Browse files
🤖 feat: add session-usage.json for cumulative cost tracking (#1069)
## Summary Introduces persistent cumulative cost tracking via `session-usage.json`, eliminating O(n) message iteration for cost calculation. Usage is accumulated on stream-end and persisted to disk, making costs immune to message deletion and compaction. ## Schema ```typescript interface SessionUsageFile { byModel: Record<string, ChatUsageDisplay>; lastRequest?: { model: string; usage: ChatUsageDisplay; timestamp: number; }; version: 1; } interface ChatUsageDisplay { input: { tokens: number; cost_usd?: number }; output: { tokens: number; cost_usd?: number }; cached: { tokens: number; cost_usd?: number }; cacheCreate: { tokens: number; cost_usd?: number }; reasoning: { tokens: number; cost_usd?: number }; model?: string; hasUnknownCosts?: boolean; } ``` **File location:** `~/.mux/sessions/{workspaceId}/session-usage.json` ## Implementation **Backend (`SessionUsageService`):** - `recordUsage()` - Called on stream-end, atomically updates the file under workspace file lock - `getSessionUsage()` - Returns current usage, rebuilding from messages if file is missing/corrupted - `copyUsageFile()` - Copies usage file on workspace fork - Rebuild scans all messages to recreate usage data when needed **Frontend (`WorkspaceStore`):** - Mirrors accumulation locally on stream-end events for instant UI updates - `sessionUsage` map stores per-workspace usage data - `usageStore` triggers React re-renders when usage changes - Loads persisted usage via IPC on workspace initialization **oRPC endpoint:** `workspace.getSessionUsage` ## Dead Code Removal Removes `historicalUsage` field from `MuxMetadata` type and compaction logic, plus unused `collectUsageHistory()` function and `_messages` parameter in compaction handler (~280 lines total). These were redundant since `session-usage.json` now tracks cumulative costs independently of message history. ## Bug Fix Fixes a bug where using multiple different models in a session, then compacting, would cause costs to be under-reported if the session-usage.json file needed to be rebuilt (e.g., on upgrade when file is missing). The rebuild logic now reads `historicalUsage` from legacy compaction summaries and stores it under a `"historical"` key, ensuring pre-compaction costs are included in session totals. ## Note for reviewing agents Accounting for session-usage.json being missing or corrupted is completely out of scope ## Future Steps This JSON-file approach is a good intermediate solution. Eventually, we plan to migrate usage tracking to a SQLite database with per-step granularity (instead of per-stream/message), enabling: - Cost breakdown queries by time range, model, or workspace - Usage insights and analytics (e.g., "most expensive conversations", "cost trends over time") - More efficient queries without loading entire usage history into memory The current design keeps the door open for this migration—the `byModel` structure maps cleanly to database rows, and the rebuild-from-messages fallback demonstrates the data is recoverable from chat history if schema changes are needed. --- _Generated with `mux`_
1 parent e2f68eb commit df06795

28 files changed

+585
-403
lines changed

.storybook/mocks/orpc.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ export function createMockORPCClient(options: MockORPCClientOptions = {}): APICl
207207
terminate: async () => ({ success: true, data: undefined }),
208208
sendToBackground: async () => ({ success: true, data: undefined }),
209209
},
210+
getSessionUsage: async () => undefined,
210211
},
211212
window: {
212213
setTitle: async () => undefined,

src/browser/components/RightSidebar/CostsTab.tsx

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import React from "react";
22
import { useWorkspaceUsage, useWorkspaceConsumers } from "@/browser/stores/WorkspaceStore";
33
import { getModelStats } from "@/common/utils/tokens/modelStats";
4-
import { sumUsageHistory } from "@/common/utils/tokens/usageAggregator";
4+
import { sumUsageHistory, type ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
55
import { usePersistedState } from "@/browser/hooks/usePersistedState";
66
import { ToggleGroup, type ToggleOption } from "../ToggleGroup";
77
import { useProviderOptions } from "@/browser/hooks/useProviderOptions";
@@ -83,17 +83,17 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
8383
useAutoCompactionSettings(workspaceId, currentModel);
8484

8585
// Session usage for cost calculation
86-
// Uses usageHistory (total across all steps) + liveCostUsage (cumulative during streaming)
86+
// Uses sessionTotal (pre-computed) + liveCostUsage (cumulative during streaming)
8787
const sessionUsage = React.useMemo(() => {
88-
const historicalSum = sumUsageHistory(usage.usageHistory);
89-
if (!usage.liveCostUsage) return historicalSum;
90-
if (!historicalSum) return usage.liveCostUsage;
91-
return sumUsageHistory([historicalSum, usage.liveCostUsage]);
92-
}, [usage.usageHistory, usage.liveCostUsage]);
88+
const parts: ChatUsageDisplay[] = [];
89+
if (usage.sessionTotal) parts.push(usage.sessionTotal);
90+
if (usage.liveCostUsage) parts.push(usage.liveCostUsage);
91+
return parts.length > 0 ? sumUsageHistory(parts) : undefined;
92+
}, [usage.sessionTotal, usage.liveCostUsage]);
9393

9494
const hasUsageData =
9595
usage &&
96-
(usage.usageHistory.length > 0 ||
96+
(usage.sessionTotal !== undefined ||
9797
usage.lastContextUsage !== undefined ||
9898
usage.liveUsage !== undefined);
9999
const hasConsumerData = consumers && (consumers.totalTokens > 0 || consumers.isCalculating);
@@ -111,8 +111,8 @@ const CostsTabComponent: React.FC<CostsTabProps> = ({ workspaceId }) => {
111111
);
112112
}
113113

114-
// Last Request (for Cost section): always the last completed request
115-
const lastRequestUsage = usage.usageHistory[usage.usageHistory.length - 1];
114+
// Last Request (for Cost section): from persisted data
115+
const lastRequestUsage = usage.lastRequest?.usage;
116116

117117
// Cost and Details table use viewMode
118118
const displayUsage = viewMode === "last-request" ? lastRequestUsage : sessionUsage;

src/browser/contexts/WorkspaceContext.test.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,7 @@ function createMockAPI(options: MockAPIOptions = {}) {
605605
);
606606
})
607607
),
608+
getSessionUsage: mock(options.workspace?.getSessionUsage ?? (() => Promise.resolve(undefined))),
608609
onChat: mock(
609610
options.workspace?.onChat ??
610611
(async () => {

src/browser/stores/WorkspaceStore.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,12 @@ const mockOnChat = mock(async function* (): AsyncGenerator<WorkspaceChatMessage,
1111
await Promise.resolve();
1212
});
1313

14+
const mockGetSessionUsage = mock(() => Promise.resolve(undefined));
15+
1416
const mockClient = {
1517
workspace: {
1618
onChat: mockOnChat,
19+
getSessionUsage: mockGetSessionUsage,
1720
},
1821
};
1922

src/browser/stores/WorkspaceStore.ts

Lines changed: 76 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,14 @@ import {
2020
} from "@/common/orpc/types";
2121
import type { StreamEndEvent, StreamAbortEvent } from "@/common/types/stream";
2222
import { MapStore } from "./MapStore";
23-
import { collectUsageHistory, createDisplayUsage } from "@/common/utils/tokens/displayUsage";
23+
import { createDisplayUsage } from "@/common/utils/tokens/displayUsage";
2424
import { WorkspaceConsumerManager } from "./WorkspaceConsumerManager";
2525
import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator";
26+
import { sumUsageHistory } from "@/common/utils/tokens/usageAggregator";
2627
import type { TokenConsumer } from "@/common/types/chatStats";
28+
import { normalizeGatewayModel } from "@/common/utils/ai/models";
29+
import type { z } from "zod";
30+
import type { SessionUsageFileSchema } from "@/common/orpc/schemas/chatStats";
2731
import type { LanguageModelV2Usage } from "@ai-sdk/provider";
2832
import { createFreshRetryState } from "@/browser/utils/messages/retryState";
2933
import { trackStreamCompleted } from "@/common/telemetry";
@@ -65,12 +69,19 @@ type DerivedState = Record<string, number>;
6569
* Updates instantly when usage metadata arrives.
6670
*
6771
* For multi-step tool calls, cost and context usage differ:
68-
* - usageHistory: Total usage per message (sum of all steps) for cost calculation
72+
* - sessionTotal: Pre-computed sum of all models from session-usage.json
73+
* - lastRequest: Last completed request (persisted for app restart)
6974
* - lastContextUsage: Last step's usage for context window display (inputTokens = actual context size)
7075
*/
7176
export interface WorkspaceUsageState {
72-
/** Usage history for cost calculation (total across all steps per message) */
73-
usageHistory: ChatUsageDisplay[];
77+
/** Pre-computed session total (sum of all models) */
78+
sessionTotal?: ChatUsageDisplay;
79+
/** Last completed request (persisted) */
80+
lastRequest?: {
81+
model: string;
82+
usage: ChatUsageDisplay;
83+
timestamp: number;
84+
};
7485
/** Last message's context usage (last step only, for context window display) */
7586
lastContextUsage?: ChatUsageDisplay;
7687
totalTokens: number;
@@ -124,6 +135,8 @@ export class WorkspaceStore {
124135
private pendingStreamEvents = new Map<string, WorkspaceChatMessage[]>();
125136
private workspaceMetadata = new Map<string, FrontendWorkspaceMetadata>(); // Store metadata for name lookup
126137
private queuedMessages = new Map<string, QueuedMessage | null>(); // Cached queued messages
138+
// Cumulative session usage (from session-usage.json)
139+
private sessionUsage = new Map<string, z.infer<typeof SessionUsageFileSchema>>();
127140

128141
// Idle callback handles for high-frequency delta events to reduce re-renders during streaming.
129142
// Data is always updated immediately in the aggregator; only UI notification is scheduled.
@@ -172,6 +185,26 @@ export class WorkspaceStore {
172185
// Reset retry state on successful stream completion
173186
updatePersistedState(getRetryStateKey(workspaceId), createFreshRetryState());
174187

188+
// Update local session usage (mirrors backend's addUsage)
189+
const model = streamEndData.metadata?.model;
190+
const rawUsage = streamEndData.metadata?.usage;
191+
const providerMetadata = streamEndData.metadata?.providerMetadata;
192+
if (model && rawUsage) {
193+
const usage = createDisplayUsage(rawUsage, model, providerMetadata);
194+
if (usage) {
195+
const normalizedModel = normalizeGatewayModel(model);
196+
const current = this.sessionUsage.get(workspaceId) ?? {
197+
byModel: {},
198+
version: 1 as const,
199+
};
200+
const existing = current.byModel[normalizedModel];
201+
// CRITICAL: Accumulate, don't overwrite (same logic as backend)
202+
current.byModel[normalizedModel] = existing ? sumUsageHistory([existing, usage])! : usage;
203+
current.lastRequest = { model: normalizedModel, usage, timestamp: Date.now() };
204+
this.sessionUsage.set(workspaceId, current);
205+
}
206+
}
207+
175208
// Flush any pending debounced bump before final bump to avoid double-bump
176209
this.cancelPendingIdleBump(workspaceId);
177210
this.states.bump(workspaceId);
@@ -548,48 +581,45 @@ export class WorkspaceStore {
548581
}
549582

550583
/**
551-
* Extract usage from messages (no tokenization).
552-
* Each usage entry calculated with its own model for accurate costs.
584+
* Extract usage from session-usage.json (no tokenization or message iteration).
553585
*
554586
* Returns empty state if workspace doesn't exist (e.g., creation mode).
555587
*/
556588
getWorkspaceUsage(workspaceId: string): WorkspaceUsageState {
557589
return this.usageStore.get(workspaceId, () => {
558590
const aggregator = this.aggregators.get(workspaceId);
559591
if (!aggregator) {
560-
return { usageHistory: [], totalTokens: 0 };
592+
return { totalTokens: 0 };
561593
}
562594

563-
const messages = aggregator.getAllMessages();
564595
const model = aggregator.getCurrentModel();
596+
const sessionData = this.sessionUsage.get(workspaceId);
565597

566-
// Collect usage history for cost calculation (total across all steps per message)
567-
const usageHistory = collectUsageHistory(messages, model);
568-
569-
// Calculate total from usage history (now includes historical)
570-
const totalTokens = usageHistory.reduce(
571-
(sum, u) =>
572-
sum +
573-
u.input.tokens +
574-
u.cached.tokens +
575-
u.cacheCreate.tokens +
576-
u.output.tokens +
577-
u.reasoning.tokens,
578-
0
579-
);
598+
// Session total: sum all models from persisted data
599+
const sessionTotal =
600+
sessionData && Object.keys(sessionData.byModel).length > 0
601+
? sumUsageHistory(Object.values(sessionData.byModel))
602+
: undefined;
603+
604+
// Last request from persisted data
605+
const lastRequest = sessionData?.lastRequest;
580606

581-
// Get last message's context usage for context window display
582-
// Uses contextUsage (last step) if available, falls back to usage for old messages
583-
// Skips compacted messages - their usage reflects pre-compaction context, not current
607+
// Calculate total tokens from session total
608+
const totalTokens = sessionTotal
609+
? sessionTotal.input.tokens +
610+
sessionTotal.cached.tokens +
611+
sessionTotal.cacheCreate.tokens +
612+
sessionTotal.output.tokens +
613+
sessionTotal.reasoning.tokens
614+
: 0;
615+
616+
// Get last message's context usage (unchanged from before)
617+
const messages = aggregator.getAllMessages();
584618
const lastContextUsage = (() => {
585619
for (let i = messages.length - 1; i >= 0; i--) {
586620
const msg = messages[i];
587621
if (msg.role === "assistant") {
588-
// Skip compacted messages - their usage is from pre-compaction context
589-
// and doesn't reflect current context window size
590-
if (msg.metadata?.compacted) {
591-
continue;
592-
}
622+
if (msg.metadata?.compacted) continue;
593623
const rawUsage = msg.metadata?.contextUsage;
594624
const providerMeta =
595625
msg.metadata?.contextProviderMetadata ?? msg.metadata?.providerMetadata;
@@ -602,10 +632,8 @@ export class WorkspaceStore {
602632
return undefined;
603633
})();
604634

605-
// Include active stream usage if currently streaming
635+
// Live streaming data (unchanged)
606636
const activeStreamId = aggregator.getActiveStreamMessageId();
607-
608-
// Live context usage (last step's inputTokens = current context window)
609637
const rawContextUsage = activeStreamId
610638
? aggregator.getActiveStreamUsage(activeStreamId)
611639
: undefined;
@@ -617,7 +645,6 @@ export class WorkspaceStore {
617645
? createDisplayUsage(rawContextUsage, model, rawStepProviderMetadata)
618646
: undefined;
619647

620-
// Live cost usage (cumulative across all steps, with accumulated cache creation tokens)
621648
const rawCumulativeUsage = activeStreamId
622649
? aggregator.getActiveStreamCumulativeUsage(activeStreamId)
623650
: undefined;
@@ -629,7 +656,7 @@ export class WorkspaceStore {
629656
? createDisplayUsage(rawCumulativeUsage, model, rawCumulativeProviderMetadata)
630657
: undefined;
631658

632-
return { usageHistory, lastContextUsage, totalTokens, liveUsage, liveCostUsage };
659+
return { sessionTotal, lastRequest, lastContextUsage, totalTokens, liveUsage, liveCostUsage };
633660
});
634661
}
635662

@@ -793,6 +820,19 @@ export class WorkspaceStore {
793820
})();
794821

795822
this.ipcUnsubscribers.set(workspaceId, () => controller.abort());
823+
824+
// Fetch persisted session usage (fire-and-forget)
825+
this.client.workspace
826+
.getSessionUsage({ workspaceId })
827+
.then((data) => {
828+
if (data) {
829+
this.sessionUsage.set(workspaceId, data);
830+
this.usageStore.bump(workspaceId);
831+
}
832+
})
833+
.catch((error) => {
834+
console.warn(`Failed to fetch session usage for ${workspaceId}:`, error);
835+
});
796836
} else {
797837
console.warn(`[WorkspaceStore] No ORPC client available for workspace ${workspaceId}`);
798838
}
@@ -831,6 +871,7 @@ export class WorkspaceStore {
831871
this.previousSidebarValues.delete(workspaceId);
832872
this.sidebarStateCache.delete(workspaceId);
833873
this.workspaceCreatedAt.delete(workspaceId);
874+
this.sessionUsage.delete(workspaceId);
834875
}
835876

836877
/**

src/browser/utils/compaction/autoCompactionCheck.test.ts

Lines changed: 13 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,14 @@ const createUsageEntry = (
2727
// Helper to create mock WorkspaceUsageState
2828
const createMockUsage = (
2929
lastEntryTokens: number,
30-
historicalTokens?: number,
30+
_historicalTokens?: number, // Kept for backward compat but unused (session-usage.json handles historical)
3131
model: string = KNOWN_MODELS.SONNET.id,
3232
liveUsage?: ChatUsageDisplay
3333
): WorkspaceUsageState => {
34-
const usageHistory: ChatUsageDisplay[] = [];
34+
// Create lastContextUsage representing the most recent context window state
35+
const lastContextUsage = createUsageEntry(lastEntryTokens, model);
3536

36-
if (historicalTokens !== undefined) {
37-
// Add historical usage (from compaction)
38-
usageHistory.push(createUsageEntry(historicalTokens, "historical-model"));
39-
}
40-
41-
// Add recent usage
42-
const recentUsage = createUsageEntry(lastEntryTokens, model);
43-
usageHistory.push(recentUsage);
44-
45-
// lastContextUsage is the most recent context window state
46-
return { usageHistory, lastContextUsage: recentUsage, totalTokens: 0, liveUsage };
37+
return { lastContextUsage, totalTokens: 0, liveUsage };
4738
};
4839

4940
describe("checkAutoCompaction", () => {
@@ -60,8 +51,8 @@ describe("checkAutoCompaction", () => {
6051
expect(result.thresholdPercentage).toBe(70);
6152
});
6253

63-
test("returns false when usage history is empty", () => {
64-
const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0 };
54+
test("returns false when no context usage data", () => {
55+
const usage: WorkspaceUsageState = { totalTokens: 0 };
6556
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
6657

6758
expect(result.shouldShowWarning).toBe(false);
@@ -146,7 +137,6 @@ describe("checkAutoCompaction", () => {
146137
model: KNOWN_MODELS.SONNET.id,
147138
};
148139
const usage: WorkspaceUsageState = {
149-
usageHistory: [usageEntry],
150140
lastContextUsage: usageEntry,
151141
totalTokens: 0,
152142
};
@@ -195,16 +185,16 @@ describe("checkAutoCompaction", () => {
195185
});
196186

197187
describe("Edge Cases", () => {
198-
test("empty usageHistory array returns safe defaults", () => {
199-
const usage: WorkspaceUsageState = { usageHistory: [], totalTokens: 0 };
188+
test("missing context usage returns safe defaults", () => {
189+
const usage: WorkspaceUsageState = { totalTokens: 0 };
200190
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
201191

202192
expect(result.shouldShowWarning).toBe(false);
203193
expect(result.usagePercentage).toBe(0);
204194
expect(result.thresholdPercentage).toBe(70);
205195
});
206196

207-
test("single entry in usageHistory works correctly", () => {
197+
test("single context usage entry works correctly", () => {
208198
const usage = createMockUsage(140_000);
209199
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
210200

@@ -242,7 +232,6 @@ describe("checkAutoCompaction", () => {
242232
model: KNOWN_MODELS.SONNET.id,
243233
};
244234
const usage: WorkspaceUsageState = {
245-
usageHistory: [zeroEntry],
246235
lastContextUsage: zeroEntry,
247236
totalTokens: 0,
248237
};
@@ -356,24 +345,22 @@ describe("checkAutoCompaction", () => {
356345
expect(result.shouldForceCompact).toBe(true);
357346
});
358347

359-
test("shouldForceCompact triggers with empty history but liveUsage at force threshold", () => {
348+
test("shouldForceCompact triggers with liveUsage at force threshold (no lastContextUsage)", () => {
360349
const liveUsage = createUsageEntry(150_000); // 75%
361350
const usage: WorkspaceUsageState = {
362-
usageHistory: [],
363351
totalTokens: 0,
364352
liveUsage,
365353
};
366354
const result = checkAutoCompaction(usage, KNOWN_MODELS.SONNET.id, false);
367355

368356
expect(result.shouldForceCompact).toBe(true);
369-
expect(result.usagePercentage).toBe(75); // usagePercentage reflects live even with empty history
357+
expect(result.usagePercentage).toBe(75); // usagePercentage reflects live
370358
});
371359

372-
test("shouldShowWarning uses live usage when no history exists", () => {
373-
// No lastUsage, liveUsage at 65% - should show warning (65% >= 60%)
360+
test("shouldShowWarning uses live usage when no lastContextUsage exists", () => {
361+
// No lastContextUsage, liveUsage at 65% - should show warning (65% >= 60%)
374362
const liveUsage = createUsageEntry(130_000); // 65%
375363
const usage: WorkspaceUsageState = {
376-
usageHistory: [],
377364
totalTokens: 0,
378365
liveUsage,
379366
};

src/cli/cli.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ async function createTestServer(authToken?: string): Promise<TestServerHandle> {
7373
menuEventService: services.menuEventService,
7474
voiceService: services.voiceService,
7575
telemetryService: services.telemetryService,
76+
sessionUsageService: services.sessionUsageService,
7677
};
7778

7879
// Use the actual createOrpcServer function

src/cli/server.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ async function createTestServer(): Promise<TestServerHandle> {
7676
menuEventService: services.menuEventService,
7777
voiceService: services.voiceService,
7878
telemetryService: services.telemetryService,
79+
sessionUsageService: services.sessionUsageService,
7980
};
8081

8182
// Use the actual createOrpcServer function

0 commit comments

Comments
 (0)