From ad1b045e12b7d8bb3fd77db26a902c689fded494 Mon Sep 17 00:00:00 2001 From: link2026 Date: Mon, 25 May 2026 16:03:04 +0800 Subject: [PATCH] =?UTF-8?q?feat(ai):=20LLMSession=20=E6=8E=A5=20MLX-Swift,?= =?UTF-8?q?=E8=B7=91=20Qwen3-1.7B=20=E6=B5=81=E5=BC=8F=E7=94=9F=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 按 W2 plan Task 6 + docs/superpowers/notes/2026-05-25-mlx-api-corrections.md 落地 LLM 推理底座: - actor LLMSession 包装 MLXLLM.ModelContainer - load(folderURL:) 用 ModelConfiguration(directory:) + LLMModelFactory.shared.loadContainer - generate(prompt:maxTokens:) 返回 AsyncThrowingStream - 内部 container.perform { (context: ModelContext) in ... } 拿到模型上下文 - UserInput → processor.prepare → MLXLMCommon.generate(顶层函数, AsyncStream) - Generation switch 穷举 3 个 case(chunk / info / toolCall) - maxTokens 通过 GenerateParameters 传递,温度 0.6 topP 0.9 - 取消传播:continuation.onTermination 同步 task.cancel() - 每 chunk yield 时计算 tok/s decodeRate API 基线:mlx-swift-examples tag 2.29.1, commit 9bff95ca。 需用户手动: 1. Xcode 把 LLMSession.swift 拖入 体己 target (AI group) 2. ⌘B 验证 AIRuntime 不再报 "Cannot find LLMSession" 3. 把 ~/tiji-models/Qwen3-1.7B-4bit/ 拷到模拟器沙盒 Application Support/Models/ 4. Task 7 (DebugAIRunner) 才能跑通 Co-Authored-By: Claude Opus 4.7 (1M context) --- 体己/AI/LLMSession.swift | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 体己/AI/LLMSession.swift diff --git a/体己/AI/LLMSession.swift b/体己/AI/LLMSession.swift new file mode 100644 index 0000000..20b6dd5 --- /dev/null +++ b/体己/AI/LLMSession.swift @@ -0,0 +1,78 @@ +import Foundation +import MLX +import MLXLLM +import MLXLMCommon + +/// 封装 MLX 语言模型的流式生成,actor 保证单线程访问。 +/// 基于 mlx-swift-examples 2.29.1(commit 9bff95ca)的 API。 +actor LLMSession { + let container: ModelContainer + + init(container: ModelContainer) { + self.container = container + } + + /// 从本地目录加载模型(包含 config.json + weights + tokenizer)。 + static func load(folderURL: URL) async throws -> LLMSession { + let configuration = ModelConfiguration(directory: folderURL) + let container = try await LLMModelFactory.shared.loadContainer( + configuration: configuration + ) + return LLMSession(container: container) + } + + /// 流式生成。返回的 AsyncThrowingStream 被取消时,内部 Task 也会取消。 + /// - Parameters: + /// - prompt: 原始 prompt 文本(经 processor 转 LMInput) + /// - maxTokens: 最大 token 数,由 GenerateParameters 控制 + func generate(prompt: String, maxTokens: Int) -> AsyncThrowingStream { + AsyncThrowingStream { continuation in + let task = Task { + do { + let parameters = GenerateParameters( + maxTokens: maxTokens, + temperature: Float(0.6), + topP: Float(0.9) + ) + + try await container.perform { (context: ModelContext) in + let userInput = UserInput(prompt: prompt) + let lmInput = try await context.processor.prepare(input: userInput) + + let start = Date() + var produced = 0 + + for await event in try MLXLMCommon.generate( + input: lmInput, + parameters: parameters, + context: context + ) { + if Task.isCancelled { break } + + switch event { + case .chunk(let text): + produced += 1 + let elapsed = Date().timeIntervalSince(start) + let rate = elapsed > 0 ? Double(produced) / elapsed : 0 + continuation.yield(TokenChunk(text: text, decodeRate: rate)) + + case .info: + // 生成完成统计,是流的最后一个事件 + break + + case .toolCall: + // 纯文本生成不会触发,switch 穷举 + break + } + } + MLX.GPU.synchronize() + } + continuation.finish() + } catch { + continuation.finish(throwing: error) + } + } + continuation.onTermination = { _ in task.cancel() } + } + } +}