feat(ai): add AIRuntime actor skeleton + TokenChunk

按 W2 plan Task 5 落地推理串行化骨架:
- TokenChunk: Sendable struct (text + decodeRate tok/s)
- AIRuntime: actor 单例
  - Status: notReady / loading / ready / error(msg)
  - prepare() async throws: 幂等加载,失败回滚 status
  - generate(prompt:maxTokens:) -> AsyncThrowingStream: 流式输出
    跨 actor 边界用 snapshot 模式捕获 self.status/llmSession
  - lastDecodeRate: 给 UI 顶部条 / Live Activity 取
- AIRuntimeError: LocalizedError, 三种 case

WIP: Build will fail until Task 6 lands LLMSession (intentional).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
link2026
2026-05-25 15:30:47 +08:00
parent d40cb7d1e0
commit 4dcd951821
2 changed files with 97 additions and 0 deletions

91
体己/AI/AIRuntime.swift Normal file
View File

@@ -0,0 +1,91 @@
import Foundation
enum AIRuntimeError: Error, LocalizedError {
case notReady
case modelLoadFailed(String)
case inferenceFailed(String)
var errorDescription: String? {
switch self {
case .notReady: return "AI 模型尚未准备好"
case .modelLoadFailed(let m): return "模型加载失败:\(m)"
case .inferenceFailed(let m): return "推理失败:\(m)"
}
}
}
actor AIRuntime {
static let shared = AIRuntime()
enum Status: Sendable, Equatable {
case notReady
case loading
case ready
case error(String)
}
private(set) var status: Status = .notReady
private(set) var lastDecodeRate: Double = 0
private var llmSession: LLMSession?
private init() {}
/// ,
func prepare() async throws {
switch status {
case .ready: return
case .loading: return //
case .error, .notReady: break
}
guard ModelStore.shared.isReady(.llm) else {
status = .error("LLM 模型未就绪")
throw AIRuntimeError.notReady
}
status = .loading
do {
let session = try await LLMSession.load(
folderURL: ModelStore.shared.localURL(for: .llm)
)
self.llmSession = session
status = .ready
} catch {
status = .error("\(error)")
throw AIRuntimeError.modelLoadFailed("\(error)")
}
}
/// await prepare()
/// :, actor LLMSession await
func generate(prompt: String, maxTokens: Int = 256) -> AsyncThrowingStream<TokenChunk, Error> {
// actor ,Task 访 self.status / self.llmSession
let snapshotStatus = status
let snapshotSession = llmSession
return AsyncThrowingStream { continuation in
Task { [weak self] in
guard snapshotStatus == .ready, let session = snapshotSession else {
continuation.finish(throwing: AIRuntimeError.notReady)
return
}
do {
// session.generate actor , await
let stream = await session.generate(prompt: prompt, maxTokens: maxTokens)
for try await chunk in stream {
await self?.recordRate(chunk.decodeRate)
continuation.yield(chunk)
}
continuation.finish()
} catch {
continuation.finish(throwing: AIRuntimeError.inferenceFailed("\(error)"))
}
}
}
}
private func recordRate(_ rate: Double) {
if rate > 0 { lastDecodeRate = rate }
}
}

View File

@@ -0,0 +1,6 @@
import Foundation
struct TokenChunk: Sendable {
let text: String
let decodeRate: Double // tokens / second,
}