From af7270f58e2f6a01d9d9723a232275ebe4508eb1 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 12 Dec 2025 09:42:27 -0600 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=A4=96=20refactor:=20deduplicate=20Op?= =?UTF-8?q?enAI=20service=5Ftier=20constants?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create OPENAI_SERVICE_TIERS constant array and OpenAIServiceTier type - Add isValidServiceTier type guard for validation - Add OPENAI_DEFAULT_SERVICE_TIER constant (auto) - Replace all duplicated enum definitions with shared constant - Add TODO for cost calculation adjustments based on service tier Service tiers have different pricing: - flex: ~50% cheaper (Batch API rates) - priority: premium pricing - default/auto: standard pricing _Generated with mux_ --- .../Settings/sections/ProvidersSection.tsx | 27 +++++++++---------- src/common/constants/providers.ts | 20 ++++++++++++++ src/common/orpc/schemas/api.ts | 3 ++- src/common/orpc/schemas/providerOptions.ts | 3 ++- src/common/utils/ai/providerOptions.ts | 3 ++- src/common/utils/tokens/displayUsage.ts | 6 +++++ src/node/services/aiService.ts | 6 ++--- src/node/services/providerService.ts | 13 +++------ 8 files changed, 51 insertions(+), 30 deletions(-) diff --git a/src/browser/components/Settings/sections/ProvidersSection.tsx b/src/browser/components/Settings/sections/ProvidersSection.tsx index 1361b6ee8c..1766ff194e 100644 --- a/src/browser/components/Settings/sections/ProvidersSection.tsx +++ b/src/browser/components/Settings/sections/ProvidersSection.tsx @@ -1,7 +1,12 @@ import React, { useState, useCallback } from "react"; import { ChevronDown, ChevronRight, Check, X, Eye, EyeOff } from "lucide-react"; import { createEditKeyHandler } from "@/browser/utils/ui/keybinds"; -import { SUPPORTED_PROVIDERS } from "@/common/constants/providers"; +import { + SUPPORTED_PROVIDERS, + OPENAI_SERVICE_TIERS, + OPENAI_DEFAULT_SERVICE_TIER, + isValidServiceTier, +} from "@/common/constants/providers"; import type { ProviderName } from "@/common/constants/providers"; import { ProviderWithIcon } from "@/browser/components/ProviderIcon"; import { useAPI } from "@/browser/contexts/API"; @@ -389,17 +394,10 @@ export function ProvidersSection() { diff --git a/src/common/constants/providers.ts b/src/common/constants/providers.ts index d7f9eacb54..a12176b0e5 100644 --- a/src/common/constants/providers.ts +++ b/src/common/constants/providers.ts @@ -85,6 +85,26 @@ export const PROVIDER_DEFINITIONS = { /** * Union type of all supported provider names */ +/** + * OpenAI service tier options for API requests. + * - priority: Low-latency responses (default) + * - flex: 50% cheaper, higher latency (o3, o4-mini, gpt-5) + * - auto: Let OpenAI select appropriate tier + * - default: Standard processing + */ +export const OPENAI_SERVICE_TIERS = ["auto", "default", "flex", "priority"] as const; +export type OpenAIServiceTier = (typeof OPENAI_SERVICE_TIERS)[number]; + +/** Default service tier for OpenAI requests */ +export const OPENAI_DEFAULT_SERVICE_TIER: OpenAIServiceTier = "auto"; + +/** + * Type guard to check if a string is a valid OpenAI service tier + */ +export function isValidServiceTier(tier: unknown): tier is OpenAIServiceTier { + return typeof tier === "string" && OPENAI_SERVICE_TIERS.includes(tier as OpenAIServiceTier); +} + export type ProviderName = keyof typeof PROVIDER_DEFINITIONS; /** diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index 46c44da95e..f5e83eb875 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -1,5 +1,6 @@ import { eventIterator } from "@orpc/server"; import { z } from "zod"; +import { OPENAI_SERVICE_TIERS } from "@/common/constants/providers"; import { ChatStatsSchema, SessionUsageFileSchema } from "./chatStats"; import { SendMessageErrorSchema } from "./errors"; import { BranchListResultSchema, ImagePartSchema, MuxMessageSchema } from "./message"; @@ -71,7 +72,7 @@ export const ProviderConfigInfoSchema = z.object({ baseUrl: z.string().optional(), models: z.array(z.string()).optional(), /** OpenAI-specific fields */ - serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional(), + serviceTier: z.enum(OPENAI_SERVICE_TIERS).optional(), /** AWS-specific fields (only present for bedrock provider) */ aws: AWSCredentialStatusSchema.optional(), /** Mux Gateway-specific fields */ diff --git a/src/common/orpc/schemas/providerOptions.ts b/src/common/orpc/schemas/providerOptions.ts index c58c4eda22..c24f3056c4 100644 --- a/src/common/orpc/schemas/providerOptions.ts +++ b/src/common/orpc/schemas/providerOptions.ts @@ -1,4 +1,5 @@ import { z } from "zod"; +import { OPENAI_SERVICE_TIERS } from "@/common/constants/providers"; export const MuxProviderOptionsSchema = z.object({ anthropic: z @@ -10,7 +11,7 @@ export const MuxProviderOptionsSchema = z.object({ .optional(), openai: z .object({ - serviceTier: z.enum(["auto", "default", "flex", "priority"]).optional().meta({ + serviceTier: z.enum(OPENAI_SERVICE_TIERS).optional().meta({ description: "OpenAI service tier: priority (low-latency), flex (50% cheaper, higher latency), auto/default (standard)", }), diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 6193c9d041..515939ca46 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -18,6 +18,7 @@ import { OPENROUTER_REASONING_EFFORT, } from "@/common/types/thinking"; import { log } from "@/node/services/log"; +import { OPENAI_DEFAULT_SERVICE_TIER } from "@/common/constants/providers"; import type { MuxMessage } from "@/common/types/message"; import { enforceThinkingPolicy } from "@/browser/utils/thinking/policy"; import { normalizeGatewayModel } from "./models"; @@ -217,7 +218,7 @@ export function buildProviderOptions( disableAutoTruncation, }); - const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "auto"; + const serviceTier = muxProviderOptions?.openai?.serviceTier ?? OPENAI_DEFAULT_SERVICE_TIER; const options: ProviderOptions = { openai: { diff --git a/src/common/utils/tokens/displayUsage.ts b/src/common/utils/tokens/displayUsage.ts index f936d67926..cc0dfe476a 100644 --- a/src/common/utils/tokens/displayUsage.ts +++ b/src/common/utils/tokens/displayUsage.ts @@ -60,6 +60,12 @@ export function createDisplayUsage( // Get model stats for cost calculation const modelStats = getModelStats(model); + // TODO: Adjust costs based on OpenAI service_tier from providerMetadata.openai.serviceTier + // - flex: ~50% cheaper (Batch API rates) + // - priority: premium pricing (~1.5x for some models) + // - default/auto: standard pricing + // The actual tier used is in the API response, not the requested tier. + // Calculate costs based on model stats (undefined if model unknown) let inputCost: number | undefined; let cachedCost: number | undefined; diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 373346c32c..9cf90d0afc 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -12,6 +12,7 @@ import type { WorkspaceMetadata } from "@/common/types/workspace"; import { PROVIDER_REGISTRY, PROVIDER_DEFINITIONS, + isValidServiceTier, type ProviderName, } from "@/common/constants/providers"; @@ -502,11 +503,10 @@ export class AIService extends EventEmitter { } // Extract serviceTier from config to pass through to buildProviderOptions - const configServiceTier = providerConfig.serviceTier as string | undefined; - if (configServiceTier && muxProviderOptions) { + if (isValidServiceTier(providerConfig.serviceTier) && muxProviderOptions) { muxProviderOptions.openai = { ...muxProviderOptions.openai, - serviceTier: configServiceTier as "auto" | "default" | "flex" | "priority", + serviceTier: providerConfig.serviceTier, }; } diff --git a/src/node/services/providerService.ts b/src/node/services/providerService.ts index ba75aa3533..d80dcdaa95 100644 --- a/src/node/services/providerService.ts +++ b/src/node/services/providerService.ts @@ -1,6 +1,6 @@ import { EventEmitter } from "events"; import type { Config } from "@/node/config"; -import { SUPPORTED_PROVIDERS } from "@/common/constants/providers"; +import { SUPPORTED_PROVIDERS, isValidServiceTier } from "@/common/constants/providers"; import type { Result } from "@/common/types/result"; import type { AWSCredentialStatus, @@ -65,15 +65,8 @@ export class ProviderService { }; // OpenAI-specific fields - const serviceTier = config.serviceTier; - if ( - provider === "openai" && - (serviceTier === "auto" || - serviceTier === "default" || - serviceTier === "flex" || - serviceTier === "priority") - ) { - providerInfo.serviceTier = serviceTier; + if (provider === "openai" && isValidServiceTier(config.serviceTier)) { + providerInfo.serviceTier = config.serviceTier; } // AWS/Bedrock-specific fields From e88cb84b361cd19491698949a1dd677395687474 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 12 Dec 2025 09:54:47 -0600 Subject: [PATCH 2/4] feat: implement service tier cost calculations - Add tier-specific cost fields to ModelStats (flex, priority) - Extract pricing data from models.json (already has flex/priority rates) - Apply tier-adjusted pricing based on serviceTier from OpenAI response - Add tests for service tier cost adjustments --- src/common/utils/tokens/displayUsage.test.ts | 86 ++++++++++++++++++++ src/common/utils/tokens/displayUsage.ts | 60 ++++++++++++-- src/common/utils/tokens/modelStats.ts | 39 +++++++++ 3 files changed, 176 insertions(+), 9 deletions(-) diff --git a/src/common/utils/tokens/displayUsage.test.ts b/src/common/utils/tokens/displayUsage.test.ts index a11015938c..29ce77f633 100644 --- a/src/common/utils/tokens/displayUsage.test.ts +++ b/src/common/utils/tokens/displayUsage.test.ts @@ -195,4 +195,90 @@ describe("createDisplayUsage", () => { expect(result!.cacheCreate.tokens).toBe(1500); }); }); + + describe("OpenAI service tier cost adjustments", () => { + // gpt-5 has tier-specific pricing in models.json: + // - standard: input $1.25/M, output $10/M + // - flex: input $0.625/M, output $5/M (~50% cheaper) + // - priority: input $2.50/M, output $20/M (~2x) + const usage: LanguageModelV2Usage = { + inputTokens: 1000000, // 1M tokens for easy math + outputTokens: 100000, // 100K tokens + totalTokens: 1100000, + }; + + test("applies standard pricing when serviceTier is undefined", () => { + const result = createDisplayUsage(usage, "openai:gpt-5"); + + expect(result).toBeDefined(); + // Standard: $1.25/M input = $1.25 for 1M tokens + expect(result!.input.cost_usd).toBeCloseTo(1.25, 2); + // Standard: $10/M output = $1.00 for 100K tokens + expect(result!.output.cost_usd).toBeCloseTo(1.0, 2); + }); + + test("applies standard pricing when serviceTier is 'default'", () => { + const result = createDisplayUsage(usage, "openai:gpt-5", { + openai: { serviceTier: "default" }, + }); + + expect(result).toBeDefined(); + expect(result!.input.cost_usd).toBeCloseTo(1.25, 2); + expect(result!.output.cost_usd).toBeCloseTo(1.0, 2); + }); + + test("applies flex pricing when serviceTier is 'flex'", () => { + const result = createDisplayUsage(usage, "openai:gpt-5", { + openai: { serviceTier: "flex" }, + }); + + expect(result).toBeDefined(); + // Flex: $0.625/M input = $0.625 for 1M tokens + expect(result!.input.cost_usd).toBeCloseTo(0.625, 3); + // Flex: $5/M output = $0.50 for 100K tokens + expect(result!.output.cost_usd).toBeCloseTo(0.5, 2); + }); + + test("applies priority pricing when serviceTier is 'priority'", () => { + const result = createDisplayUsage(usage, "openai:gpt-5", { + openai: { serviceTier: "priority" }, + }); + + expect(result).toBeDefined(); + // Priority: $2.50/M input = $2.50 for 1M tokens + expect(result!.input.cost_usd).toBeCloseTo(2.5, 2); + // Priority: $20/M output = $2.00 for 100K tokens + expect(result!.output.cost_usd).toBeCloseTo(2.0, 2); + }); + + test("ignores serviceTier for non-OpenAI models", () => { + // Even if serviceTier is present, non-OpenAI models should use standard pricing + const result = createDisplayUsage(usage, "anthropic:claude-sonnet-4-5", { + openai: { serviceTier: "flex" }, // Should be ignored + }); + + expect(result).toBeDefined(); + // Anthropic pricing shouldn't change based on OpenAI serviceTier + // Just verify tokens are correct (pricing varies by model) + expect(result!.input.tokens).toBe(1000000); + expect(result!.output.tokens).toBe(100000); + }); + + test("applies flex pricing to cached tokens", () => { + const usageWithCache: LanguageModelV2Usage = { + inputTokens: 1000000, // Includes cached + outputTokens: 100000, + totalTokens: 1100000, + cachedInputTokens: 500000, // 500K cached + }; + + const result = createDisplayUsage(usageWithCache, "openai:gpt-5", { + openai: { serviceTier: "flex" }, + }); + + expect(result).toBeDefined(); + // Flex cache: $0.0625/M = $0.03125 for 500K tokens + expect(result!.cached.cost_usd).toBeCloseTo(0.03125, 4); + }); + }); }); diff --git a/src/common/utils/tokens/displayUsage.ts b/src/common/utils/tokens/displayUsage.ts index cc0dfe476a..511897e3f7 100644 --- a/src/common/utils/tokens/displayUsage.ts +++ b/src/common/utils/tokens/displayUsage.ts @@ -6,10 +6,49 @@ */ import type { LanguageModelV2Usage } from "@ai-sdk/provider"; +import type { ModelStats } from "./modelStats"; import { getModelStats } from "./modelStats"; import type { ChatUsageDisplay } from "./usageAggregator"; import { normalizeGatewayModel } from "../ai/models"; +/** + * Get tier-adjusted costs from model stats based on OpenAI service tier. + * Falls back to standard pricing if tier-specific costs aren't available. + */ +function getTierCosts( + modelStats: ModelStats, + serviceTier: string | undefined +): { + inputCost: number; + outputCost: number; + cacheReadCost: number; +} { + const standardCosts = { + inputCost: modelStats.input_cost_per_token, + outputCost: modelStats.output_cost_per_token, + cacheReadCost: modelStats.cache_read_input_token_cost ?? 0, + }; + + if (serviceTier === "flex") { + return { + inputCost: modelStats.input_cost_per_token_flex ?? standardCosts.inputCost, + outputCost: modelStats.output_cost_per_token_flex ?? standardCosts.outputCost, + cacheReadCost: modelStats.cache_read_input_token_cost_flex ?? standardCosts.cacheReadCost, + }; + } + + if (serviceTier === "priority") { + return { + inputCost: modelStats.input_cost_per_token_priority ?? standardCosts.inputCost, + outputCost: modelStats.output_cost_per_token_priority ?? standardCosts.outputCost, + cacheReadCost: modelStats.cache_read_input_token_cost_priority ?? standardCosts.cacheReadCost, + }; + } + + // "default", "auto", or undefined → standard pricing + return standardCosts; +} + /** * Create a display-friendly usage object from AI SDK usage * @@ -60,11 +99,11 @@ export function createDisplayUsage( // Get model stats for cost calculation const modelStats = getModelStats(model); - // TODO: Adjust costs based on OpenAI service_tier from providerMetadata.openai.serviceTier - // - flex: ~50% cheaper (Batch API rates) - // - priority: premium pricing (~1.5x for some models) - // - default/auto: standard pricing - // The actual tier used is in the API response, not the requested tier. + // Extract OpenAI service tier from response metadata (actual tier used, not requested) + // AI SDK returns serviceTier in providerMetadata.openai.serviceTier + const serviceTier = isOpenAI + ? (providerMetadata?.openai as { serviceTier?: string } | undefined)?.serviceTier + : undefined; // Calculate costs based on model stats (undefined if model unknown) let inputCost: number | undefined; @@ -74,11 +113,14 @@ export function createDisplayUsage( let reasoningCost: number | undefined; if (modelStats) { - inputCost = inputTokens * modelStats.input_cost_per_token; - cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0); + // Get tier-adjusted costs (flex ~50% cheaper, priority ~2x) + const tierCosts = getTierCosts(modelStats, serviceTier); + + inputCost = inputTokens * tierCosts.inputCost; + cachedCost = cachedTokens * tierCosts.cacheReadCost; cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0); - outputCost = outputWithoutReasoning * modelStats.output_cost_per_token; - reasoningCost = reasoningTokens * modelStats.output_cost_per_token; + outputCost = outputWithoutReasoning * tierCosts.outputCost; + reasoningCost = reasoningTokens * tierCosts.outputCost; } return { diff --git a/src/common/utils/tokens/modelStats.ts b/src/common/utils/tokens/modelStats.ts index e676ba94c5..1381a7b9e0 100644 --- a/src/common/utils/tokens/modelStats.ts +++ b/src/common/utils/tokens/modelStats.ts @@ -9,6 +9,13 @@ export interface ModelStats { output_cost_per_token: number; cache_creation_input_token_cost?: number; cache_read_input_token_cost?: number; + // OpenAI service tier-specific costs (flex ~50% cheaper, priority ~2x) + input_cost_per_token_flex?: number; + output_cost_per_token_flex?: number; + cache_read_input_token_cost_flex?: number; + input_cost_per_token_priority?: number; + output_cost_per_token_priority?: number; + cache_read_input_token_cost_priority?: number; } interface RawModelData { @@ -18,6 +25,13 @@ interface RawModelData { output_cost_per_token?: number; cache_creation_input_token_cost?: number; cache_read_input_token_cost?: number; + // OpenAI service tier-specific costs + input_cost_per_token_flex?: number; + output_cost_per_token_flex?: number; + cache_read_input_token_cost_flex?: number; + input_cost_per_token_priority?: number; + output_cost_per_token_priority?: number; + cache_read_input_token_cost_priority?: number; [key: string]: unknown; } @@ -52,6 +66,31 @@ function extractModelStats(data: RawModelData): ModelStats { typeof data.cache_read_input_token_cost === "number" ? data.cache_read_input_token_cost : undefined, + // OpenAI service tier-specific costs + input_cost_per_token_flex: + typeof data.input_cost_per_token_flex === "number" + ? data.input_cost_per_token_flex + : undefined, + output_cost_per_token_flex: + typeof data.output_cost_per_token_flex === "number" + ? data.output_cost_per_token_flex + : undefined, + cache_read_input_token_cost_flex: + typeof data.cache_read_input_token_cost_flex === "number" + ? data.cache_read_input_token_cost_flex + : undefined, + input_cost_per_token_priority: + typeof data.input_cost_per_token_priority === "number" + ? data.input_cost_per_token_priority + : undefined, + output_cost_per_token_priority: + typeof data.output_cost_per_token_priority === "number" + ? data.output_cost_per_token_priority + : undefined, + cache_read_input_token_cost_priority: + typeof data.cache_read_input_token_cost_priority === "number" + ? data.cache_read_input_token_cost_priority + : undefined, }; /* eslint-enable @typescript-eslint/non-nullable-type-assertion-style */ } From db85e343b702e99cbf4f19f31aa53db52a37ac29 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 12 Dec 2025 10:18:30 -0600 Subject: [PATCH 3/4] refactor: use multipliers for service tier costs Address review feedback: - Rename isValidServiceTier to isValidOpenAIServiceTier (namespacing) - Replace verbose tier-specific cost fields with standard multipliers - Add OPENAI_SERVICE_TIER_COST_MULTIPLIERS constant (flex: 0.5x, priority: 2x) --- .../Settings/sections/ProvidersSection.tsx | 4 +- src/common/constants/providers.ts | 15 ++++- src/common/utils/tokens/displayUsage.ts | 56 ++++++------------- src/common/utils/tokens/modelStats.ts | 39 ------------- src/node/services/aiService.ts | 4 +- src/node/services/providerService.ts | 4 +- 6 files changed, 36 insertions(+), 86 deletions(-) diff --git a/src/browser/components/Settings/sections/ProvidersSection.tsx b/src/browser/components/Settings/sections/ProvidersSection.tsx index 1766ff194e..e8147a04b4 100644 --- a/src/browser/components/Settings/sections/ProvidersSection.tsx +++ b/src/browser/components/Settings/sections/ProvidersSection.tsx @@ -5,7 +5,7 @@ import { SUPPORTED_PROVIDERS, OPENAI_SERVICE_TIERS, OPENAI_DEFAULT_SERVICE_TIER, - isValidServiceTier, + isValidOpenAIServiceTier, } from "@/common/constants/providers"; import type { ProviderName } from "@/common/constants/providers"; import { ProviderWithIcon } from "@/browser/components/ProviderIcon"; @@ -397,7 +397,7 @@ export function ProvidersSection() { value={config?.openai?.serviceTier ?? OPENAI_DEFAULT_SERVICE_TIER} onValueChange={(next) => { if (!api) return; - if (!isValidServiceTier(next)) return; + if (!isValidOpenAIServiceTier(next)) return; updateOptimistically("openai", { serviceTier: next }); void api.providers.setProviderConfig({ diff --git a/src/common/constants/providers.ts b/src/common/constants/providers.ts index a12176b0e5..936007badb 100644 --- a/src/common/constants/providers.ts +++ b/src/common/constants/providers.ts @@ -101,10 +101,23 @@ export const OPENAI_DEFAULT_SERVICE_TIER: OpenAIServiceTier = "auto"; /** * Type guard to check if a string is a valid OpenAI service tier */ -export function isValidServiceTier(tier: unknown): tier is OpenAIServiceTier { +export function isValidOpenAIServiceTier(tier: unknown): tier is OpenAIServiceTier { return typeof tier === "string" && OPENAI_SERVICE_TIERS.includes(tier as OpenAIServiceTier); } +/** + * Cost multipliers for OpenAI service tiers relative to standard pricing. + * - flex: 50% cheaper (0.5x) + * - priority: 2x premium + * - default/auto: standard pricing (1x) + */ +export const OPENAI_SERVICE_TIER_COST_MULTIPLIERS: Record = { + flex: 0.5, + priority: 2.0, + default: 1.0, + auto: 1.0, +}; + export type ProviderName = keyof typeof PROVIDER_DEFINITIONS; /** diff --git a/src/common/utils/tokens/displayUsage.ts b/src/common/utils/tokens/displayUsage.ts index 511897e3f7..6c2be19ccf 100644 --- a/src/common/utils/tokens/displayUsage.ts +++ b/src/common/utils/tokens/displayUsage.ts @@ -6,47 +6,23 @@ */ import type { LanguageModelV2Usage } from "@ai-sdk/provider"; -import type { ModelStats } from "./modelStats"; import { getModelStats } from "./modelStats"; import type { ChatUsageDisplay } from "./usageAggregator"; import { normalizeGatewayModel } from "../ai/models"; +import { + OPENAI_SERVICE_TIER_COST_MULTIPLIERS, + isValidOpenAIServiceTier, +} from "@/common/constants/providers"; /** - * Get tier-adjusted costs from model stats based on OpenAI service tier. - * Falls back to standard pricing if tier-specific costs aren't available. + * Get cost multiplier for OpenAI service tier. + * Returns 1.0 for unknown tiers (standard pricing). */ -function getTierCosts( - modelStats: ModelStats, - serviceTier: string | undefined -): { - inputCost: number; - outputCost: number; - cacheReadCost: number; -} { - const standardCosts = { - inputCost: modelStats.input_cost_per_token, - outputCost: modelStats.output_cost_per_token, - cacheReadCost: modelStats.cache_read_input_token_cost ?? 0, - }; - - if (serviceTier === "flex") { - return { - inputCost: modelStats.input_cost_per_token_flex ?? standardCosts.inputCost, - outputCost: modelStats.output_cost_per_token_flex ?? standardCosts.outputCost, - cacheReadCost: modelStats.cache_read_input_token_cost_flex ?? standardCosts.cacheReadCost, - }; +function getServiceTierMultiplier(serviceTier: string | undefined): number { + if (serviceTier && isValidOpenAIServiceTier(serviceTier)) { + return OPENAI_SERVICE_TIER_COST_MULTIPLIERS[serviceTier]; } - - if (serviceTier === "priority") { - return { - inputCost: modelStats.input_cost_per_token_priority ?? standardCosts.inputCost, - outputCost: modelStats.output_cost_per_token_priority ?? standardCosts.outputCost, - cacheReadCost: modelStats.cache_read_input_token_cost_priority ?? standardCosts.cacheReadCost, - }; - } - - // "default", "auto", or undefined → standard pricing - return standardCosts; + return 1.0; } /** @@ -113,14 +89,14 @@ export function createDisplayUsage( let reasoningCost: number | undefined; if (modelStats) { - // Get tier-adjusted costs (flex ~50% cheaper, priority ~2x) - const tierCosts = getTierCosts(modelStats, serviceTier); + // Get tier multiplier (flex ~50% cheaper, priority ~2x) + const tierMultiplier = getServiceTierMultiplier(serviceTier); - inputCost = inputTokens * tierCosts.inputCost; - cachedCost = cachedTokens * tierCosts.cacheReadCost; + inputCost = inputTokens * modelStats.input_cost_per_token * tierMultiplier; + cachedCost = cachedTokens * (modelStats.cache_read_input_token_cost ?? 0) * tierMultiplier; cacheCreateCost = cacheCreateTokens * (modelStats.cache_creation_input_token_cost ?? 0); - outputCost = outputWithoutReasoning * tierCosts.outputCost; - reasoningCost = reasoningTokens * tierCosts.outputCost; + outputCost = outputWithoutReasoning * modelStats.output_cost_per_token * tierMultiplier; + reasoningCost = reasoningTokens * modelStats.output_cost_per_token * tierMultiplier; } return { diff --git a/src/common/utils/tokens/modelStats.ts b/src/common/utils/tokens/modelStats.ts index 1381a7b9e0..e676ba94c5 100644 --- a/src/common/utils/tokens/modelStats.ts +++ b/src/common/utils/tokens/modelStats.ts @@ -9,13 +9,6 @@ export interface ModelStats { output_cost_per_token: number; cache_creation_input_token_cost?: number; cache_read_input_token_cost?: number; - // OpenAI service tier-specific costs (flex ~50% cheaper, priority ~2x) - input_cost_per_token_flex?: number; - output_cost_per_token_flex?: number; - cache_read_input_token_cost_flex?: number; - input_cost_per_token_priority?: number; - output_cost_per_token_priority?: number; - cache_read_input_token_cost_priority?: number; } interface RawModelData { @@ -25,13 +18,6 @@ interface RawModelData { output_cost_per_token?: number; cache_creation_input_token_cost?: number; cache_read_input_token_cost?: number; - // OpenAI service tier-specific costs - input_cost_per_token_flex?: number; - output_cost_per_token_flex?: number; - cache_read_input_token_cost_flex?: number; - input_cost_per_token_priority?: number; - output_cost_per_token_priority?: number; - cache_read_input_token_cost_priority?: number; [key: string]: unknown; } @@ -66,31 +52,6 @@ function extractModelStats(data: RawModelData): ModelStats { typeof data.cache_read_input_token_cost === "number" ? data.cache_read_input_token_cost : undefined, - // OpenAI service tier-specific costs - input_cost_per_token_flex: - typeof data.input_cost_per_token_flex === "number" - ? data.input_cost_per_token_flex - : undefined, - output_cost_per_token_flex: - typeof data.output_cost_per_token_flex === "number" - ? data.output_cost_per_token_flex - : undefined, - cache_read_input_token_cost_flex: - typeof data.cache_read_input_token_cost_flex === "number" - ? data.cache_read_input_token_cost_flex - : undefined, - input_cost_per_token_priority: - typeof data.input_cost_per_token_priority === "number" - ? data.input_cost_per_token_priority - : undefined, - output_cost_per_token_priority: - typeof data.output_cost_per_token_priority === "number" - ? data.output_cost_per_token_priority - : undefined, - cache_read_input_token_cost_priority: - typeof data.cache_read_input_token_cost_priority === "number" - ? data.cache_read_input_token_cost_priority - : undefined, }; /* eslint-enable @typescript-eslint/non-nullable-type-assertion-style */ } diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 9cf90d0afc..e6c452380d 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -12,7 +12,7 @@ import type { WorkspaceMetadata } from "@/common/types/workspace"; import { PROVIDER_REGISTRY, PROVIDER_DEFINITIONS, - isValidServiceTier, + isValidOpenAIServiceTier, type ProviderName, } from "@/common/constants/providers"; @@ -503,7 +503,7 @@ export class AIService extends EventEmitter { } // Extract serviceTier from config to pass through to buildProviderOptions - if (isValidServiceTier(providerConfig.serviceTier) && muxProviderOptions) { + if (isValidOpenAIServiceTier(providerConfig.serviceTier) && muxProviderOptions) { muxProviderOptions.openai = { ...muxProviderOptions.openai, serviceTier: providerConfig.serviceTier, diff --git a/src/node/services/providerService.ts b/src/node/services/providerService.ts index d80dcdaa95..4f2f776447 100644 --- a/src/node/services/providerService.ts +++ b/src/node/services/providerService.ts @@ -1,6 +1,6 @@ import { EventEmitter } from "events"; import type { Config } from "@/node/config"; -import { SUPPORTED_PROVIDERS, isValidServiceTier } from "@/common/constants/providers"; +import { SUPPORTED_PROVIDERS, isValidOpenAIServiceTier } from "@/common/constants/providers"; import type { Result } from "@/common/types/result"; import type { AWSCredentialStatus, @@ -65,7 +65,7 @@ export class ProviderService { }; // OpenAI-specific fields - if (provider === "openai" && isValidServiceTier(config.serviceTier)) { + if (provider === "openai" && isValidOpenAIServiceTier(config.serviceTier)) { providerInfo.serviceTier = config.serviceTier; } From bb06b69d6719e9432b7073f895c627e28d60ddc7 Mon Sep 17 00:00:00 2001 From: Ammar Date: Fri, 12 Dec 2025 10:40:03 -0600 Subject: [PATCH 4/4] =?UTF-8?q?=F0=9F=A4=96=20fix:=20preserve=20compaction?= =?UTF-8?q?=20usage=20for=20cost=20persistence?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/orpc/schemas/message.ts | 1 + src/common/types/message.ts | 8 ++++ src/node/services/agentSession.ts | 4 ++ src/node/services/compactionHandler.test.ts | 48 ++++++++++++++++++++- src/node/services/compactionHandler.ts | 28 +++++++++--- src/node/services/serviceContainer.ts | 3 +- src/node/services/workspaceService.test.ts | 6 ++- src/node/services/workspaceService.ts | 5 ++- 8 files changed, 92 insertions(+), 11 deletions(-) diff --git a/src/common/orpc/schemas/message.ts b/src/common/orpc/schemas/message.ts index 2f9689409a..6b886d6ff7 100644 --- a/src/common/orpc/schemas/message.ts +++ b/src/common/orpc/schemas/message.ts @@ -74,6 +74,7 @@ export const MuxMessageSchema = z.object({ historySequence: z.number().optional(), timestamp: z.number().optional(), model: z.string().optional(), + historicalUsage: z.any().optional(), usage: z.any().optional(), contextUsage: z.any().optional(), providerMetadata: z.record(z.string(), z.unknown()).optional(), diff --git a/src/common/types/message.ts b/src/common/types/message.ts index f8af4dd71c..e14ffc52f0 100644 --- a/src/common/types/message.ts +++ b/src/common/types/message.ts @@ -1,4 +1,5 @@ import type { UIMessage } from "ai"; +import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; import type { LanguageModelV2Usage } from "@ai-sdk/provider"; import type { StreamErrorType } from "./errors"; import type { ToolPolicy } from "@/common/utils/tools/toolPolicy"; @@ -101,6 +102,13 @@ export interface MuxMetadata { usage?: LanguageModelV2Usage; // Last step's usage only (for context window display - inputTokens = current context size) contextUsage?: LanguageModelV2Usage; + /** + * Snapshot of cumulative costs/tokens from before a compaction. + * + * This is only set on compaction summary messages so we can rebuild session usage + * from chat.jsonl if session-usage.json is missing/corrupted. + */ + historicalUsage?: ChatUsageDisplay; // Aggregated provider metadata across all steps (for cost calculation) providerMetadata?: Record; // Last step's provider metadata (for context window cache display) diff --git a/src/node/services/agentSession.ts b/src/node/services/agentSession.ts index 57dffc0fbc..4d51a9996e 100644 --- a/src/node/services/agentSession.ts +++ b/src/node/services/agentSession.ts @@ -28,6 +28,7 @@ import { prepareUserMessageForSend } from "@/common/types/message"; import { createRuntime } from "@/node/runtime/runtimeFactory"; import { MessageQueue } from "./messageQueue"; import type { StreamEndEvent } from "@/common/types/stream"; +import type { SessionUsageService } from "./sessionUsageService"; import { CompactionHandler } from "./compactionHandler"; import type { BackgroundProcessManager } from "./backgroundProcessManager"; import { computeDiff } from "@/node/utils/diff"; @@ -85,6 +86,7 @@ interface AgentSessionOptions { config: Config; historyService: HistoryService; partialService: PartialService; + sessionUsageService?: SessionUsageService; aiService: AIService; initStateManager: InitStateManager; backgroundProcessManager: BackgroundProcessManager; @@ -134,6 +136,7 @@ export class AgentSession { workspaceId, config, historyService, + sessionUsageService, partialService, aiService, initStateManager, @@ -156,6 +159,7 @@ export class AgentSession { this.compactionHandler = new CompactionHandler({ workspaceId: this.workspaceId, + sessionUsageService, historyService: this.historyService, partialService: this.partialService, emitter: this.emitter, diff --git a/src/node/services/compactionHandler.test.ts b/src/node/services/compactionHandler.test.ts index db2b0201d6..628144c9d5 100644 --- a/src/node/services/compactionHandler.test.ts +++ b/src/node/services/compactionHandler.test.ts @@ -3,6 +3,8 @@ import { CompactionHandler } from "./compactionHandler"; import type { HistoryService } from "./historyService"; import type { PartialService } from "./partialService"; import type { EventEmitter } from "events"; +import type { ChatUsageDisplay } from "@/common/utils/tokens/usageAggregator"; +import type { SessionUsageService } from "./sessionUsageService"; import { createMuxMessage, type MuxMessage } from "@/common/types/message"; import type { StreamEndEvent } from "@/common/types/stream"; import { Ok, Err, type Result } from "@/common/types/result"; @@ -102,6 +104,14 @@ const createStreamEndEvent = ( }); // DRY helper to set up successful compaction scenario +const createChatUsage = (input: number, output: number): ChatUsageDisplay => ({ + input: { tokens: input }, + cached: { tokens: 0 }, + cacheCreate: { tokens: 0 }, + output: { tokens: output }, + reasoning: { tokens: 0 }, +}); + const setupSuccessfulCompaction = ( mockHistoryService: ReturnType, messages: MuxMessage[] = [createCompactionRequest()], @@ -289,15 +299,49 @@ describe("CompactionHandler", () => { }); expect(summaryEvent).toBeDefined(); const sevt = summaryEvent?.data.message as MuxMessage; - // providerMetadata is omitted to avoid inflating context with pre-compaction cacheCreationInputTokens expect(sevt.metadata).toMatchObject({ model: "claude-3-5-sonnet-20241022", usage, duration: 2000, systemMessageTokens: 100, compacted: true, + providerMetadata: { anthropic: { cacheCreationInputTokens: 50000 } }, + }); + }); + + it("should store historicalUsage snapshot on compaction summary when available", async () => { + const compactionReq = createCompactionRequest(); + setupSuccessfulCompaction(mockHistoryService, [compactionReq]); + + const sessionUsage = { + byModel: { + "openai:gpt-5": createChatUsage(100, 50), + "anthropic:claude-sonnet-4-5": createChatUsage(200, 75), + }, + version: 1 as const, + }; + + const mockSessionUsageService: Pick = { + getSessionUsage: mock(() => Promise.resolve(sessionUsage)), + }; + + handler = new CompactionHandler({ + workspaceId, + historyService: mockHistoryService as unknown as HistoryService, + partialService: mockPartialService as unknown as PartialService, + emitter: mockEmitter, + sessionUsageService: mockSessionUsageService as unknown as SessionUsageService, + }); + + const event = createStreamEndEvent("Summary"); + await handler.handleCompletion(event); + + const appendedMsg = mockHistoryService.appendToHistory.mock.calls[0][1] as MuxMessage; + expect(appendedMsg.metadata?.historicalUsage).toBeDefined(); + expect(appendedMsg.metadata?.historicalUsage).toMatchObject({ + input: { tokens: 300 }, + output: { tokens: 125 }, }); - expect(sevt.metadata?.providerMetadata).toBeUndefined(); }); it("should emit stream-end event to frontend", async () => { diff --git a/src/node/services/compactionHandler.ts b/src/node/services/compactionHandler.ts index dfdd02d345..d9ae3d2079 100644 --- a/src/node/services/compactionHandler.ts +++ b/src/node/services/compactionHandler.ts @@ -5,6 +5,8 @@ import type { StreamEndEvent } from "@/common/types/stream"; import type { WorkspaceChatMessage, DeleteMessage } from "@/common/orpc/types"; import type { Result } from "@/common/types/result"; import { Ok, Err } from "@/common/types/result"; +import type { SessionUsageService } from "./sessionUsageService"; +import { sumUsageHistory } from "@/common/utils/tokens/usageAggregator"; import type { LanguageModelV2Usage } from "@ai-sdk/provider"; import { createMuxMessage, type MuxMessage } from "@/common/types/message"; @@ -16,6 +18,7 @@ import { interface CompactionHandlerOptions { workspaceId: string; + sessionUsageService?: SessionUsageService; historyService: HistoryService; partialService: PartialService; emitter: EventEmitter; @@ -31,6 +34,7 @@ interface CompactionHandlerOptions { */ export class CompactionHandler { private readonly workspaceId: string; + private readonly sessionUsageService?: SessionUsageService; private readonly historyService: HistoryService; private readonly partialService: PartialService; private readonly emitter: EventEmitter; @@ -42,6 +46,7 @@ export class CompactionHandler { private cachedFileDiffs: FileEditDiff[] = []; constructor(options: CompactionHandlerOptions) { + this.sessionUsageService = options.sessionUsageService; this.workspaceId = options.workspaceId; this.historyService = options.historyService; this.partialService = options.partialService; @@ -134,6 +139,7 @@ export class CompactionHandler { model: string; usage?: LanguageModelV2Usage; duration?: number; + contextProviderMetadata?: Record; providerMetadata?: Record; systemMessageTokens?: number; }, @@ -152,6 +158,17 @@ export class CompactionHandler { } // Extract diffs BEFORE clearing history (they'll be gone after clear) + + // Snapshot cumulative usage BEFORE clearing history. + // This preserves pre-compaction costs if session-usage.json is missing/corrupted + // and needs to be rebuilt from chat.jsonl. + const historicalUsage = await (async () => { + if (!this.sessionUsageService) return undefined; + const sessionUsage = await this.sessionUsageService.getSessionUsage(this.workspaceId); + if (!sessionUsage) return undefined; + const values = Object.values(sessionUsage.byModel); + return values.length > 0 ? sumUsageHistory(values) : undefined; + })(); this.cachedFileDiffs = extractEditedFileDiffs(messages); // Clear entire history and get deleted sequences @@ -162,12 +179,8 @@ export class CompactionHandler { const deletedSequences = clearResult.data; // Create summary message with metadata. - // We omit providerMetadata because it contains cacheCreationInputTokens from the - // pre-compaction context, which inflates context usage display. - // Note: We no longer store historicalUsage here. Cumulative costs are tracked in - // session-usage.json, which is updated on every stream-end. If that file is deleted - // or corrupted, pre-compaction costs are lost - this is acceptable since manual - // file deletion is out of scope for data recovery. + // - providerMetadata: needed for accurate cost reconstruction (cacheCreate tokens, serviceTier, etc.) + // - historicalUsage: snapshot of cumulative pre-compaction costs/tokens for rebuildFromMessages() const summaryMessage = createMuxMessage( `summary-${Date.now()}-${Math.random().toString(36).substring(2, 11)}`, "assistant", @@ -176,6 +189,9 @@ export class CompactionHandler { timestamp: Date.now(), compacted: true, model: metadata.model, + historicalUsage, + providerMetadata: metadata.providerMetadata, + contextProviderMetadata: metadata.contextProviderMetadata, usage: metadata.usage, duration: metadata.duration, systemMessageTokens: metadata.systemMessageTokens, diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts index d3927ed7a5..0688f48a8e 100644 --- a/src/node/services/serviceContainer.ts +++ b/src/node/services/serviceContainer.ts @@ -87,7 +87,8 @@ export class ServiceContainer { this.aiService, this.initStateManager, this.extensionMetadata, - this.backgroundProcessManager + this.backgroundProcessManager, + this.sessionUsageService ); this.workspaceService.setMCPServerManager(this.mcpServerManager); this.providerService = new ProviderService(config); diff --git a/src/node/services/workspaceService.test.ts b/src/node/services/workspaceService.test.ts index 49cda4476f..da5850227e 100644 --- a/src/node/services/workspaceService.test.ts +++ b/src/node/services/workspaceService.test.ts @@ -6,6 +6,7 @@ import type { PartialService } from "./partialService"; import type { AIService } from "./aiService"; import type { InitStateManager } from "./initStateManager"; import type { ExtensionMetadataService } from "./ExtensionMetadataService"; +import type { SessionUsageService } from "./sessionUsageService"; import type { BackgroundProcessManager } from "./backgroundProcessManager"; // Helper to access private renamingWorkspaces set @@ -47,6 +48,8 @@ describe("WorkspaceService rename lock", () => { const mockInitStateManager: Partial = {}; const mockExtensionMetadataService: Partial = {}; + + const mockSessionUsageService: Partial = {}; const mockBackgroundProcessManager: Partial = { cleanup: mock(() => Promise.resolve()), }; @@ -58,7 +61,8 @@ describe("WorkspaceService rename lock", () => { mockAIService, mockInitStateManager as InitStateManager, mockExtensionMetadataService as ExtensionMetadataService, - mockBackgroundProcessManager as BackgroundProcessManager + mockBackgroundProcessManager as BackgroundProcessManager, + mockSessionUsageService as SessionUsageService ); }); diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index b87d0175c9..bd67fac692 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -9,6 +9,7 @@ import { log } from "@/node/services/log"; import { AgentSession } from "@/node/services/agentSession"; import type { HistoryService } from "@/node/services/historyService"; import type { PartialService } from "@/node/services/partialService"; +import type { SessionUsageService } from "@/node/services/sessionUsageService"; import type { AIService } from "@/node/services/aiService"; import type { InitStateManager } from "@/node/services/initStateManager"; import type { ExtensionMetadataService } from "@/node/services/ExtensionMetadataService"; @@ -100,7 +101,8 @@ export class WorkspaceService extends EventEmitter { private readonly aiService: AIService, private readonly initStateManager: InitStateManager, private readonly extensionMetadata: ExtensionMetadataService, - private readonly backgroundProcessManager: BackgroundProcessManager + private readonly backgroundProcessManager: BackgroundProcessManager, + private readonly sessionUsageService: SessionUsageService ) { super(); this.setupMetadataListeners(); @@ -238,6 +240,7 @@ export class WorkspaceService extends EventEmitter { workspaceId: trimmed, config: this.config, historyService: this.historyService, + sessionUsageService: this.sessionUsageService, partialService: this.partialService, aiService: this.aiService, initStateManager: this.initStateManager,