Files
kangkang/康康/AI/AIRuntime.swift
link2026 1b01923c8e feat(capture): 统一报告捕获流程并集成视觉语言模型识别
- 替换 QuickCaptureFlow 和 ArchiveFlow 为 UnifiedCaptureFlow 统一流程
- 新增 VLSession 封装 Qwen2.5-VL 模型进行图像文本推理
- 实现 AIRuntime 中 VL 模型的准备和分析功能
- 添加 VLPrompts 定义体检化验单识别的 JSON 输出模板
- 创建 CaptureReviewForm 提供 VL 解析结果的可编辑表单界面
- 集成 VisionKit 文档扫描器支持真机多页文档扫描
- 为模拟器实现 PhotosPicker 回退方案选择已有照片
- 在 RootView 中统一使用 UnifiedCaptureFlow 处理快速和归档流程
- 添加 CustomMetricEditor 支持自定义监测指标的创建编辑删除
- 扩展 KangkangApp 模型配置以支持新数据类型
- 实现档案列表中症状结束功能通过时间线行点击触发
2026-05-26 11:18:00 +08:00

151 lines
4.9 KiB
Swift

import Foundation
enum AIRuntimeError: Error, LocalizedError {
case notReady
case modelLoadFailed(String)
case inferenceFailed(String)
var errorDescription: String? {
switch self {
case .notReady: return "AI 模型尚未准备好"
case .modelLoadFailed(let m): return "模型加载失败:\(m)"
case .inferenceFailed(let m): return "推理失败:\(m)"
}
}
}
actor AIRuntime {
static let shared = AIRuntime()
enum Status: Sendable, Equatable {
case notReady
case loading
case ready
case error(String)
}
private(set) var status: Status = .notReady
private(set) var vlStatus: Status = .notReady
private(set) var lastDecodeRate: Double = 0
private var llmSession: LLMSession?
private var vlSession: VLSession?
private init() {}
/// ,
func prepare() async throws {
switch status {
case .ready:
return
case .loading:
// ; prepare ,
// await prepare() status, / UI
// W3 prepare
return
case .error, .notReady:
break
}
guard ModelStore.shared.isReady(.llm) else {
status = .error("LLM 模型未就绪")
throw AIRuntimeError.notReady
}
status = .loading
do {
let session = try await LLMSession.load(
folderURL: ModelStore.shared.localURL(for: .llm)
)
self.llmSession = session
status = .ready
} catch {
status = .error("\(error)")
throw AIRuntimeError.modelLoadFailed("\(error)")
}
}
/// await prepare()
/// :, actor LLMSession await
func generate(prompt: String, maxTokens: Int = 256) -> AsyncThrowingStream<TokenChunk, Error> {
// actor ,Task 访 self.status / self.llmSession
let snapshotStatus = status
let snapshotSession = llmSession
return AsyncThrowingStream { continuation in
Task {
guard snapshotStatus == .ready, let session = snapshotSession else {
continuation.finish(throwing: AIRuntimeError.notReady)
return
}
do {
// session.generate actor , await
let stream = await session.generate(prompt: prompt, maxTokens: maxTokens)
for try await chunk in stream {
// Task generate() , AIRuntime actor ;
// actor recordRate await
self.recordRate(chunk.decodeRate)
continuation.yield(chunk)
}
continuation.finish()
} catch {
continuation.finish(throwing: AIRuntimeError.inferenceFailed("\(error)"))
}
}
}
}
private func recordRate(_ rate: Double) {
if rate > 0 { lastDecodeRate = rate }
}
// MARK: - VL
/// VL , load
func prepareVL() async throws {
switch vlStatus {
case .ready, .loading:
return
case .error, .notReady:
break
}
guard ModelStore.shared.isReady(.vl) else {
vlStatus = .error("VL 模型未就绪")
throw AIRuntimeError.notReady
}
vlStatus = .loading
do {
let session = try await VLSession.load(
folderURL: ModelStore.shared.localURL(for: .vl)
)
self.vlSession = session
vlStatus = .ready
} catch {
vlStatus = .error("\(error)")
throw AIRuntimeError.modelLoadFailed("\(error)")
}
}
/// JSON ( VLPrompts.reportExtraction )
/// + 退(§3.2)
/// AIRuntime actor, LLM.generate() , OOM
func analyzeReport(imageURLs: [URL],
prompt: String,
maxTokens: Int = 512) async throws -> String {
guard vlStatus == .ready, let session = vlSession else {
throw AIRuntimeError.notReady
}
do {
return try await session.analyze(
imageURLs: imageURLs,
prompt: prompt,
maxTokens: maxTokens
)
} catch {
throw AIRuntimeError.inferenceFailed("\(error)")
}
}
}