Files
kangkang/体己/AI/AIRuntime.swift
link2026 e7cdb45472 harden(ai): AIRuntime 去掉冗余 weak self,prepare loading 路径加注释
按 code quality review 反馈(2×P0):
- generate() 的 Task 闭包不再 [weak self];actor 单例 strong capture
  没有循环引用风险,且避免 Swift 5.10+ weak-on-actor 警告
- prepare() 的 case .loading: return 加注释说明这是有意设计,
  调用方需轮询或显示 loading UI(W3 引入 prepare 队列优化)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 15:33:51 +08:00

98 lines
3.1 KiB
Swift

import Foundation
enum AIRuntimeError: Error, LocalizedError {
case notReady
case modelLoadFailed(String)
case inferenceFailed(String)
var errorDescription: String? {
switch self {
case .notReady: return "AI 模型尚未准备好"
case .modelLoadFailed(let m): return "模型加载失败:\(m)"
case .inferenceFailed(let m): return "推理失败:\(m)"
}
}
}
actor AIRuntime {
static let shared = AIRuntime()
enum Status: Sendable, Equatable {
case notReady
case loading
case ready
case error(String)
}
private(set) var status: Status = .notReady
private(set) var lastDecodeRate: Double = 0
private var llmSession: LLMSession?
private init() {}
/// ,
func prepare() async throws {
switch status {
case .ready:
return
case .loading:
// ; prepare ,
// await prepare() status, / UI
// W3 prepare
return
case .error, .notReady:
break
}
guard ModelStore.shared.isReady(.llm) else {
status = .error("LLM 模型未就绪")
throw AIRuntimeError.notReady
}
status = .loading
do {
let session = try await LLMSession.load(
folderURL: ModelStore.shared.localURL(for: .llm)
)
self.llmSession = session
status = .ready
} catch {
status = .error("\(error)")
throw AIRuntimeError.modelLoadFailed("\(error)")
}
}
/// await prepare()
/// :, actor LLMSession await
func generate(prompt: String, maxTokens: Int = 256) -> AsyncThrowingStream<TokenChunk, Error> {
// actor ,Task 访 self.status / self.llmSession
let snapshotStatus = status
let snapshotSession = llmSession
return AsyncThrowingStream { continuation in
Task {
guard snapshotStatus == .ready, let session = snapshotSession else {
continuation.finish(throwing: AIRuntimeError.notReady)
return
}
do {
// session.generate actor , await
let stream = await session.generate(prompt: prompt, maxTokens: maxTokens)
for try await chunk in stream {
await self.recordRate(chunk.decodeRate)
continuation.yield(chunk)
}
continuation.finish()
} catch {
continuation.finish(throwing: AIRuntimeError.inferenceFailed("\(error)"))
}
}
}
}
private func recordRate(_ rate: Double) {
if rate > 0 { lastDecodeRate = rate }
}
}