From f024896359fd73544ea270552fbecdd37124a905 Mon Sep 17 00:00:00 2001 From: noorbhatia Date: Thu, 29 Jan 2026 13:35:35 +0530 Subject: [PATCH 1/2] Implement prewarm for MLXLanguageModel --- .../Models/MLXLanguageModel.swift | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift index 8f44b31..fc840e2 100644 --- a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift @@ -357,6 +357,51 @@ import Foundation return LanguageModelSession.ResponseStream(stream: stream) } + + /// Prewarms the model for the given session and optional prompt prefix. + public func prewarm( + for session: LanguageModelSession, + promptPrefix: Prompt? + ) { + let modelId = self.modelId + let hub = self.hub + let directory = self.directory + + let instructions = session.instructions?.description + let tools = session.tools + + Task { + + let context = try await loadContext(modelId: modelId, hub: hub, directory: directory) + + // Build chat history similar to respond() to prime the cache effectively + var chat: [MLXLMCommon.Chat.Message] = [] + + // Add system instructions if present + if let instructions, !instructions.isEmpty { + chat.append(.init(role: .system, content: instructions)) + } + + // Add prompt prefix or minimal user message + let promptText = promptPrefix?.description ?? "." + chat.append(.init(role: .user, content: promptText)) + + // Convert tools to MLX format + let toolSpecs: [ToolSpec]? = + tools.isEmpty + ? nil + : tools.map { convertToolToMLXSpec($0) } + + let userInput = MLXLMCommon.UserInput( + chat: chat, + processing: .init(resize: .init(width: 512, height: 512)), + tools: toolSpecs + ) + + // Prepare input - triggers tokenization and processor initialization + _ = try await context.processor.prepare(input: userInput) + } + } } // MARK: - Options Mapping From cef7d5512bd3dc4a294ce0f620ae3d1bab6dd7bb Mon Sep 17 00:00:00 2001 From: noorbhatia Date: Mon, 2 Feb 2026 17:00:29 +0530 Subject: [PATCH 2/2] Reduce prewarm to just loadContext --- .../Models/MLXLanguageModel.swift | 37 +++---------------- 1 file changed, 5 insertions(+), 32 deletions(-) diff --git a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift index fc840e2..72a5a88 100644 --- a/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift +++ b/Sources/AnyLanguageModel/Models/MLXLanguageModel.swift @@ -358,7 +358,7 @@ import Foundation return LanguageModelSession.ResponseStream(stream: stream) } - /// Prewarms the model for the given session and optional prompt prefix. + /// Prewarms the model public func prewarm( for session: LanguageModelSession, promptPrefix: Prompt? @@ -367,39 +367,12 @@ import Foundation let hub = self.hub let directory = self.directory - let instructions = session.instructions?.description - let tools = session.tools - Task { - - let context = try await loadContext(modelId: modelId, hub: hub, directory: directory) - - // Build chat history similar to respond() to prime the cache effectively - var chat: [MLXLMCommon.Chat.Message] = [] - - // Add system instructions if present - if let instructions, !instructions.isEmpty { - chat.append(.init(role: .system, content: instructions)) + do { + _ = try await loadContext(modelId: modelId, hub: hub, directory: directory) + } catch { + // Ignore errors during prewarm } - - // Add prompt prefix or minimal user message - let promptText = promptPrefix?.description ?? "." - chat.append(.init(role: .user, content: promptText)) - - // Convert tools to MLX format - let toolSpecs: [ToolSpec]? = - tools.isEmpty - ? nil - : tools.map { convertToolToMLXSpec($0) } - - let userInput = MLXLMCommon.UserInput( - chat: chat, - processing: .init(resize: .init(width: 512, height: 512)), - tools: toolSpecs - ) - - // Prepare input - triggers tokenization and processor initialization - _ = try await context.processor.prepare(input: userInput) } } }