From 193e47842596f0d38db821c142ddd78c947a27d2 Mon Sep 17 00:00:00 2001 From: link2026 Date: Mon, 25 May 2026 15:53:54 +0800 Subject: [PATCH] =?UTF-8?q?docs:=20=E8=AE=B0=E5=BD=95=20MLX-Swift-Examples?= =?UTF-8?q?=202.29.1=20=E7=9C=9F=E5=AE=9E=20API=20=E4=B8=8E=20plan=20?= =?UTF-8?q?=E8=8D=89=E7=A8=BF=E7=9A=84=E5=81=8F=E5=B7=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit W2 plan Task 6 写的 LLMSession 草稿在 4 处与真实 API 不符: - container.perform 的 context 是具体 ModelContext struct - MLXLMCommon.generate 是顶层函数,只 try 不 await,返回 AsyncStream 非 Throwing - Generation 有第三个 case .toolCall,switch 必须穷举 - GenerateParameters 需要 maxTokens,且 temperature/topP 是 Float - 取消传播需 continuation.onTermination = { _ in task.cancel() } 本笔记含完整修正版 LLMSession.swift,Task 6 implementer 必用此为准。 参考:mlx-swift-examples tag 2.29.1,commit 9bff95ca。 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../notes/2026-05-25-mlx-api-corrections.md | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 docs/superpowers/notes/2026-05-25-mlx-api-corrections.md diff --git a/docs/superpowers/notes/2026-05-25-mlx-api-corrections.md b/docs/superpowers/notes/2026-05-25-mlx-api-corrections.md new file mode 100644 index 0000000..fc88d3e --- /dev/null +++ b/docs/superpowers/notes/2026-05-25-mlx-api-corrections.md @@ -0,0 +1,113 @@ +# MLX-Swift-Examples API 核对(2026-05-25) + +研究产出来源:`https://github.com/ml-explore/mlx-swift-examples` tag `2.29.1`,commit `9bff95ca5f0b9e8c021acc4d71a2bbe4a7441631`。 + +W2 plan Task 6 的 LLMSession 草稿与真实 API 有 4 处偏差,**Task 6 必须用本文修正版,不要回头读 plan 里的草稿**。 + +## 关键修正 + +| 项 | 草稿 | 真实 API | +|---|---|---| +| `ModelConfiguration(directory:)` | ✓ | ✓ 一致 | +| `LLMModelFactory.shared.loadContainer(configuration:)` | ✓ | ✓ 一致(`hub` / `progressHandler` 有默认值) | +| `container.perform { context in ... }` | 未类型化 | context 是 `ModelContext` struct(具体类型);`processor: any UserInputProcessor` | +| `MLXLMCommon.generate(...)` 调用语义 | `try MLXLMCommon.generate(...)` 后内部 `for await` | **同上,只需 `try`(无 `await`)**;**返回 `AsyncStream`(非 throwing)** | +| `Generation` 枚举 case | 只列了 `.chunk(String)` 和 `.info(...)` | **还有 `.toolCall(ToolCall)`,switch 必须穷举** | +| `GenerateParameters` | 只传 `temperature / topP`,`maxTokens` 在草稿用 `produced >= maxTokens break` 控制 | **`maxTokens` 必须传 GenerateParameters**;`temperature` / `topP` 是 `Float` 不是 `Double` | +| 取消 | 草稿没处理 | **必须** `continuation.onTermination = { _ in task.cancel() }` | +| `UserInput` 构造 | `LMInput.init(prompt:)` | `UserInput(prompt: prompt)` → `context.processor.prepare(input: userInput)` → `LMInput` | + +## 修正版 LLMSession.swift(Task 6 直接抄) + +```swift +import Foundation +import MLX +import MLXLLM +import MLXLMCommon + +actor LLMSession { + let container: ModelContainer + + init(container: ModelContainer) { + self.container = container + } + + static func load(folderURL: URL) async throws -> LLMSession { + let configuration = ModelConfiguration(directory: folderURL) + let container = try await LLMModelFactory.shared.loadContainer( + configuration: configuration + ) + return LLMSession(container: container) + } + + /// 流式生成。返回的 AsyncThrowingStream 取消时,内部 Task 也会取消。 + func generate(prompt: String, maxTokens: Int) -> AsyncThrowingStream { + AsyncThrowingStream { continuation in + let task = Task { + do { + let parameters = GenerateParameters( + maxTokens: maxTokens, + temperature: Float(0.6), + topP: Float(0.9) + ) + + try await container.perform { (context: ModelContext) in + let userInput = UserInput(prompt: prompt) + let lmInput = try await context.processor.prepare(input: userInput) + + let start = Date() + var produced = 0 + + for await event in try MLXLMCommon.generate( + input: lmInput, + parameters: parameters, + context: context + ) { + if Task.isCancelled { break } + + switch event { + case .chunk(let text): + produced += 1 + let elapsed = Date().timeIntervalSince(start) + let rate = elapsed > 0 ? Double(produced) / elapsed : 0 + continuation.yield(TokenChunk(text: text, decodeRate: rate)) + + case .info: + // 生成完成统计,是流的最后一个事件 + break + + case .toolCall: + // 纯文本生成不会触发,switch 穷举 + break + } + } + MLX.GPU.synchronize() + } + continuation.finish() + } catch { + continuation.finish(throwing: error) + } + } + continuation.onTermination = { _ in task.cancel() } + } + } +} +``` + +## 与 AIRuntime 的对接 + +`AIRuntime.swift`(W2-T5 提交的 `4dcd951` + `e7cdb45`)已经预设: + +```swift +let session = try await LLMSession.load(folderURL: ModelStore.shared.localURL(for: .llm)) +let stream = await session.generate(prompt: prompt, maxTokens: maxTokens) +``` + +签名匹配,Task 6 不改 AIRuntime。 + +## 真实模型 HF 仓库名 + +- LLM: `mlx-community/Qwen3-1.7B-4bit`(沙盒目录:`Qwen3-1.7B-4bit`) +- VL: `mlx-community/Qwen2.5-VL-3B-Instruct-4bit`(沙盒目录:`Qwen2.5-VL-3B-Instruct-4bit`) + +注:plan 文档 Task 6 里写的是带 "MLX-" 中缀的旧名,**已弃用**。ModelKind rawValue 已在 commit `771b28e` 修正。