feat(capture): 统一报告捕获流程并集成视觉语言模型识别

- 替换 QuickCaptureFlow 和 ArchiveFlow 为 UnifiedCaptureFlow 统一流程
- 新增 VLSession 封装 Qwen2.5-VL 模型进行图像文本推理
- 实现 AIRuntime 中 VL 模型的准备和分析功能
- 添加 VLPrompts 定义体检化验单识别的 JSON 输出模板
- 创建 CaptureReviewForm 提供 VL 解析结果的可编辑表单界面
- 集成 VisionKit 文档扫描器支持真机多页文档扫描
- 为模拟器实现 PhotosPicker 回退方案选择已有照片
- 在 RootView 中统一使用 UnifiedCaptureFlow 处理快速和归档流程
- 添加 CustomMetricEditor 支持自定义监测指标的创建编辑删除
- 扩展 KangkangApp 模型配置以支持新数据类型
- 实现档案列表中症状结束功能通过时间线行点击触发
This commit is contained in:
link2026
2026-05-26 11:18:00 +08:00
parent 39edc25dc1
commit 1b01923c8e
27 changed files with 3128 additions and 29 deletions

View File

@@ -25,9 +25,11 @@ actor AIRuntime {
}
private(set) var status: Status = .notReady
private(set) var vlStatus: Status = .notReady
private(set) var lastDecodeRate: Double = 0
private var llmSession: LLMSession?
private var vlSession: VLSession?
private init() {}
@@ -96,4 +98,53 @@ actor AIRuntime {
private func recordRate(_ rate: Double) {
if rate > 0 { lastDecodeRate = rate }
}
// MARK: - VL
/// VL , load
func prepareVL() async throws {
switch vlStatus {
case .ready, .loading:
return
case .error, .notReady:
break
}
guard ModelStore.shared.isReady(.vl) else {
vlStatus = .error("VL 模型未就绪")
throw AIRuntimeError.notReady
}
vlStatus = .loading
do {
let session = try await VLSession.load(
folderURL: ModelStore.shared.localURL(for: .vl)
)
self.vlSession = session
vlStatus = .ready
} catch {
vlStatus = .error("\(error)")
throw AIRuntimeError.modelLoadFailed("\(error)")
}
}
/// JSON ( VLPrompts.reportExtraction )
/// + 退(§3.2)
/// AIRuntime actor, LLM.generate() , OOM
func analyzeReport(imageURLs: [URL],
prompt: String,
maxTokens: Int = 512) async throws -> String {
guard vlStatus == .ready, let session = vlSession else {
throw AIRuntimeError.notReady
}
do {
return try await session.analyze(
imageURLs: imageURLs,
prompt: prompt,
maxTokens: maxTokens
)
} catch {
throw AIRuntimeError.inferenceFailed("\(error)")
}
}
}

View File

@@ -0,0 +1,71 @@
import Foundation
/// VL (Qwen2.5-VL) / prompt
/// : JSON,markdown
/// CaptureService 退(§3.2 退线)
enum VLPrompts {
/// JSON ( prompt ):
/// ```
/// {
/// "title": "", // , ""
/// "type": "checkup|lab|imaging|prescription|other",
/// "report_date": "YYYY-MM-DD", // ()
/// "institution": "XX ", //
/// "page_count": 1,
/// "summary": "", //
/// "indicators": [
/// {
/// "name": "",
/// "value": "3.84",
/// "unit": "mmol/L",
/// "range": "< 3.40",
/// "status": "high|low|normal"
/// }
/// ]
/// }
/// ```
/// `kind` UI indicators A2() B3()
static let reportExtraction: String = #"""
你是一个医学体检报告识别助手。请只输出一段合法 JSON,不要解释、不要 markdown 围栏、不要任何前后缀文字。
JSON schema(严格):
{
"title": string,
"type": "checkup" | "lab" | "imaging" | "prescription" | "other",
"report_date": "YYYY-MM-DD",
"institution": string,
"page_count": number,
"summary": string,
"indicators": [
{
"name": string,
"value": string,
"unit": string,
"range": string,
"status": "high" | "low" | "normal"
}
]
}
规则:
- status 根据 value 与 range 自己判断:value > range 上限 → "high",< 下限 → "low",否则 → "normal"
- range 字段保留原文(如 "< 3.40""3.9 - 6.1""0 - 5"),不要解析成区间对象。
- 无法识别的字段填空字符串(institution / summary)或合理默认值(report_date 用今天)。
- 不要发明指标。看不清的整行跳过。
- 化验单一般 type = "lab",体检套餐 = "checkup"
示例 1(化验单 · 单项):
输入: 一张化验单照片,只能看清「低密度脂蛋白 3.84 mmol/L 参考 <3.40」
输出:
{"title":"","type":"lab","report_date":"2026-05-25","institution":"","page_count":1,"summary":"","indicators":[{"name":"","value":"3.84","unit":"mmol/L","range":"< 3.40","status":"high"}]}
示例 2(体检 · 多项):
输入: 一份春季体检,3 项可读
输出:
{"title":"","type":"checkup","report_date":"2026-04-12","institution":"","page_count":1,"summary":"","indicators":[{"name":"","value":"3.84","unit":"mmol/L","range":"< 3.40","status":"high"},{"name":"","value":"32","unit":"U/L","range":"9 - 50","status":"normal"},{"name":"","value":"5.2","unit":"mmol/L","range":"3.9 - 6.1","status":"normal"}]}
现在请识别图片并输出 JSON:
"""#
}

72
康康/AI/VLSession.swift Normal file
View File

@@ -0,0 +1,72 @@
import Foundation
import MLX
import MLXVLM
import MLXLMCommon
/// MLX VL (Qwen2.5-VL)
/// LLMSession actor , AIRuntime
actor VLSession {
let container: ModelContainer
init(container: ModelContainer) {
self.container = container
}
private static func withDeviceOverride<R>(
_ body: () async throws -> R
) async rethrows -> R {
#if targetEnvironment(simulator)
return try await Device.withDefaultDevice(.cpu, body)
#else
return try await body()
#endif
}
/// VL ( config.json + weights + tokenizer + processor)
static func load(folderURL: URL) async throws -> VLSession {
let configuration = ModelConfiguration(directory: folderURL)
let container = try await withDeviceOverride {
try await VLMModelFactory.shared.loadContainer(
configuration: configuration
)
}
return VLSession(container: container)
}
/// ( token )
/// VL JSON , JSON UI
/// - Parameters:
/// - imageURLs: file:// URL, FileVault
/// - prompt: (VLPrompts.reportExtraction)
/// - maxTokens: 512(JSON 200-400)
func analyze(imageURLs: [URL],
prompt: String,
maxTokens: Int = 512) async throws -> String {
try await Self.withDeviceOverride {
try await container.perform { (context: ModelContext) in
let images = imageURLs.map { UserInput.Image.url($0) }
let userInput = UserInput(prompt: prompt, images: images)
let lmInput = try await context.processor.prepare(input: userInput)
let parameters = GenerateParameters(
maxTokens: maxTokens,
temperature: Float(0.2), // JSON ,
topP: Float(0.9)
)
var collected = ""
for await event in try MLXLMCommon.generate(
input: lmInput,
parameters: parameters,
context: context
) {
if Task.isCancelled { break }
if case .chunk(let text) = event {
collected.append(text)
}
}
return collected
}
}
}
}