feat(ai): add AIRuntime actor skeleton + TokenChunk

按 W2 plan Task 5 落地推理串行化骨架: - TokenChunk: Sendable struct (text + decodeRate tok/s) - AIRuntime: actor 单例 - Status: notReady / loading / ready / error(msg) - prepare() async throws: 幂等加载,失败回滚 status - generate(prompt:maxTokens:) -> AsyncThrowingStream: 流式输出跨 actor 边界用 snapshot 模式捕获 self.status/llmSession - lastDecodeRate: 给 UI 顶部条 / Live Activity 取 - AIRuntimeError: LocalizedError, 三种 case WIP: Build will fail until Task 6 lands LLMSession (intentional). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 15:30:47 +08:00
parent d40cb7d1e0
commit 4dcd951821
2 changed files with 97 additions and 0 deletions
--- a/体己/AI/AIRuntime.swift
+++ b/体己/AI/AIRuntime.swift
@@ -0,0 +1,91 @@
+import Foundation
+
+enum AIRuntimeError: Error, LocalizedError {
+    case notReady
+    case modelLoadFailed(String)
+    case inferenceFailed(String)
+
+    var errorDescription: String? {
+        switch self {
+        case .notReady:                  return "AI 模型尚未准备好"
+        case .modelLoadFailed(let m):    return "模型加载失败:\(m)"
+        case .inferenceFailed(let m):    return "推理失败:\(m)"
+        }
+    }
+}
+
+actor AIRuntime {
+    static let shared = AIRuntime()
+
+    enum Status: Sendable, Equatable {
+        case notReady
+        case loading
+        case ready
+        case error(String)
+    }
+
+    private(set) var status: Status = .notReady
+    private(set) var lastDecodeRate: Double = 0
+
+    private var llmSession: LLMSession?
+
+    private init() {}
+
+    /// 加载模型。首次调用会真正加载,后续幂等。
+    func prepare() async throws {
+        switch status {
+        case .ready: return
+        case .loading: return  // 已经在加载
+        case .error, .notReady: break
+        }
+
+        guard ModelStore.shared.isReady(.llm) else {
+            status = .error("LLM 模型未就绪")
+            throw AIRuntimeError.notReady
+        }
+
+        status = .loading
+        do {
+            let session = try await LLMSession.load(
+                folderURL: ModelStore.shared.localURL(for: .llm)
+            )
+            self.llmSession = session
+            status = .ready
+        } catch {
+            status = .error("\(error)")
+            throw AIRuntimeError.modelLoadFailed("\(error)")
+        }
+    }
+
+    /// 流式生成。调用前应先 await prepare()。
+    /// 注意:返回流是同步创建的,但跨 actor 调用 LLMSession 需要 await。
+    func generate(prompt: String, maxTokens: Int = 256) -> AsyncThrowingStream<TokenChunk, Error> {
+        // 在 actor 隔离上下文中捕获快照,Task 内不再访问 self.status / self.llmSession
+        let snapshotStatus = status
+        let snapshotSession = llmSession
+
+        return AsyncThrowingStream { continuation in
+            Task { [weak self] in
+                guard snapshotStatus == .ready, let session = snapshotSession else {
+                    continuation.finish(throwing: AIRuntimeError.notReady)
+                    return
+                }
+                do {
+                    // session.generate 跨 actor 边界,需要 await
+                    let stream = await session.generate(prompt: prompt, maxTokens: maxTokens)
+                    for try await chunk in stream {
+                        await self?.recordRate(chunk.decodeRate)
+                        continuation.yield(chunk)
+                    }
+                    continuation.finish()
+                } catch {
+                    continuation.finish(throwing: AIRuntimeError.inferenceFailed("\(error)"))
+                }
+            }
+        }
+    }
+
+    private func recordRate(_ rate: Double) {
+        if rate > 0 { lastDecodeRate = rate }
+    }
+}
--- a/体己/AI/TokenChunk.swift
+++ b/体己/AI/TokenChunk.swift
@@ -0,0 +1,6 @@
+import Foundation
+
+struct TokenChunk: Sendable {
+    let text: String
+    let decodeRate: Double  // tokens / second, 估算值
+}