feat(capture): 统一报告捕获流程并集成视觉语言模型识别

- 替换 QuickCaptureFlow 和 ArchiveFlow 为 UnifiedCaptureFlow 统一流程 - 新增 VLSession 封装 Qwen2.5-VL 模型进行图像文本推理 - 实现 AIRuntime 中 VL 模型的准备和分析功能 - 添加 VLPrompts 定义体检化验单识别的 JSON 输出模板 - 创建 CaptureReviewForm 提供 VL 解析结果的可编辑表单界面 - 集成 VisionKit 文档扫描器支持真机多页文档扫描 - 为模拟器实现 PhotosPicker 回退方案选择已有照片 - 在 RootView 中统一使用 UnifiedCaptureFlow 处理快速和归档流程 - 添加 CustomMetricEditor 支持自定义监测指标的创建编辑删除 - 扩展 KangkangApp 模型配置以支持新数据类型 - 实现档案列表中症状结束功能通过时间线行点击触发
2026-05-26 11:18:00 +08:00
parent 39edc25dc1
commit 1b01923c8e
27 changed files with 3128 additions and 29 deletions
--- a/康康/AI/AIRuntime.swift
+++ b/康康/AI/AIRuntime.swift
@@ -25,9 +25,11 @@ actor AIRuntime {
    }

    private(set) var status: Status = .notReady
+    private(set) var vlStatus: Status = .notReady
    private(set) var lastDecodeRate: Double = 0

    private var llmSession: LLMSession?
+    private var vlSession: VLSession?

    private init() {}

@@ -96,4 +98,53 @@ actor AIRuntime {
    private func recordRate(_ rate: Double) {
        if rate > 0 { lastDecodeRate = rate }
    }
+
+    // MARK: - VL
+
+    /// 加载 VL 模型。幂等,首调真正 load。
+    func prepareVL() async throws {
+        switch vlStatus {
+        case .ready, .loading:
+            return
+        case .error, .notReady:
+            break
+        }
+
+        guard ModelStore.shared.isReady(.vl) else {
+            vlStatus = .error("VL 模型未就绪")
+            throw AIRuntimeError.notReady
+        }
+
+        vlStatus = .loading
+        do {
+            let session = try await VLSession.load(
+                folderURL: ModelStore.shared.localURL(for: .vl)
+            )
+            self.vlSession = session
+            vlStatus = .ready
+        } catch {
+            vlStatus = .error("\(error)")
+            throw AIRuntimeError.modelLoadFailed("\(error)")
+        }
+    }
+
+    /// 图像 → JSON 字符串(由 VLPrompts.reportExtraction 引导)。
+    /// 调用方负责解析 + 失败回退(§3.2)。
+    /// AIRuntime 是 actor,本调用与 LLM.generate() 自然串行,不会 OOM。
+    func analyzeReport(imageURLs: [URL],
+                       prompt: String,
+                       maxTokens: Int = 512) async throws -> String {
+        guard vlStatus == .ready, let session = vlSession else {
+            throw AIRuntimeError.notReady
+        }
+        do {
+            return try await session.analyze(
+                imageURLs: imageURLs,
+                prompt: prompt,
+                maxTokens: maxTokens
+            )
+        } catch {
+            throw AIRuntimeError.inferenceFailed("\(error)")
+        }
+    }
 }
--- a/康康/AI/Prompts/VLPrompts.swift
+++ b/康康/AI/Prompts/VLPrompts.swift
@@ -0,0 +1,71 @@
+import Foundation
+
+/// VL 模型(Qwen2.5-VL)用于体检 / 化验单识别的 prompt 模板。
+/// 输出契约:严格 JSON,无任何解释文字、markdown 围栏或前后缀。
+/// 解析失败 → CaptureService 回退到手动录入(§3.2 失败回退红线)。
+enum VLPrompts {
+
+    /// 输出 JSON 的字段定义(写进 prompt 里教模型):
+    /// ```
+    /// {
+    ///   "title": "春季年度体检",        // 报告抬头,无则 "拍摄识别"
+    ///   "type": "checkup|lab|imaging|prescription|other",
+    ///   "report_date": "YYYY-MM-DD",   // 报告日期(无则今天)
+    ///   "institution": "XX 医院",       // 可空字符串
+    ///   "page_count": 1,
+    ///   "summary": "整体趋势短句",       // 可空字符串
+    ///   "indicators": [
+    ///     {
+    ///       "name": "低密度脂蛋白",
+    ///       "value": "3.84",
+    ///       "unit": "mmol/L",
+    ///       "range": "< 3.40",
+    ///       "status": "high|low|normal"
+    ///     }
+    ///   ]
+    /// }
+    /// ```
+    /// `kind` 字段省略 —— UI 由 indicators 数量决定走 A2(单项)或 B3(多项)。
+
+    static let reportExtraction: String = #"""
+你是一个医学体检报告识别助手。请只输出一段合法 JSON,不要解释、不要 markdown 围栏、不要任何前后缀文字。
+
+JSON schema(严格):
+{
+  "title": string,
+  "type": "checkup" | "lab" | "imaging" | "prescription" | "other",
+  "report_date": "YYYY-MM-DD",
+  "institution": string,
+  "page_count": number,
+  "summary": string,
+  "indicators": [
+    {
+      "name": string,
+      "value": string,
+      "unit": string,
+      "range": string,
+      "status": "high" | "low" | "normal"
+    }
+  ]
+}
+
+规则:
+- status 根据 value 与 range 自己判断:value > range 上限 → "high",< 下限 → "low",否则 → "normal"。
+- range 字段保留原文(如 "< 3.40"、"3.9 - 6.1"、"0 - 5"),不要解析成区间对象。
+- 无法识别的字段填空字符串(institution / summary)或合理默认值(report_date 用今天)。
+- 不要发明指标。看不清的整行跳过。
+- 化验单一般 type = "lab",体检套餐 = "checkup"。
+
+示例 1(化验单 · 单项):
+输入: 一张化验单照片,只能看清「低密度脂蛋白 3.84 mmol/L 参考 <3.40」
+输出:
+{"title":"低密度脂蛋白单项","type":"lab","report_date":"2026-05-25","institution":"","page_count":1,"summary":"","indicators":[{"name":"低密度脂蛋白","value":"3.84","unit":"mmol/L","range":"< 3.40","status":"high"}]}
+
+示例 2(体检 · 多项):
+输入: 一份春季体检,3 项可读
+输出:
+{"title":"春季年度体检","type":"checkup","report_date":"2026-04-12","institution":"协和医院","page_count":1,"summary":"血脂偏高、其他正常","indicators":[{"name":"低密度脂蛋白","value":"3.84","unit":"mmol/L","range":"< 3.40","status":"high"},{"name":"谷丙转氨酶","value":"32","unit":"U/L","range":"9 - 50","status":"normal"},{"name":"空腹血糖","value":"5.2","unit":"mmol/L","range":"3.9 - 6.1","status":"normal"}]}
+
+现在请识别图片并输出 JSON:
+"""#
+}
--- a/康康/AI/VLSession.swift
+++ b/康康/AI/VLSession.swift
@@ -0,0 +1,72 @@
+import Foundation
+import MLX
+import MLXVLM
+import MLXLMCommon
+
+/// 封装 MLX VL 模型(Qwen2.5-VL)的图像 → 文本推理。
+/// 与 LLMSession 同款 actor 隔离,串行化由上游 AIRuntime 统一保证。
+actor VLSession {
+    let container: ModelContainer
+
+    init(container: ModelContainer) {
+        self.container = container
+    }
+
+    private static func withDeviceOverride<R>(
+        _ body: () async throws -> R
+    ) async rethrows -> R {
+        #if targetEnvironment(simulator)
+        return try await Device.withDefaultDevice(.cpu, body)
+        #else
+        return try await body()
+        #endif
+    }
+
+    /// 从本地目录加载 VL 模型(包含 config.json + weights + tokenizer + processor)。
+    static func load(folderURL: URL) async throws -> VLSession {
+        let configuration = ModelConfiguration(directory: folderURL)
+        let container = try await withDeviceOverride {
+            try await VLMModelFactory.shared.loadContainer(
+                configuration: configuration
+            )
+        }
+        return VLSession(container: container)
+    }
+
+    /// 一次性生成(等收完所有 token 再返回完整字符串)。
+    /// VL 用于结构化 JSON 抽取,不需要流式 — 也避免半成品 JSON 抖动 UI。
+    /// - Parameters:
+    ///   - imageURLs: 本地 file:// URL,从 FileVault 拿
+    ///   - prompt: 文本指令(VLPrompts.reportExtraction)
+    ///   - maxTokens: 默认 512(JSON 体量 ≈ 200-400)
+    func analyze(imageURLs: [URL],
+                 prompt: String,
+                 maxTokens: Int = 512) async throws -> String {
+        try await Self.withDeviceOverride {
+            try await container.perform { (context: ModelContext) in
+                let images = imageURLs.map { UserInput.Image.url($0) }
+                let userInput = UserInput(prompt: prompt, images: images)
+                let lmInput = try await context.processor.prepare(input: userInput)
+
+                let parameters = GenerateParameters(
+                    maxTokens: maxTokens,
+                    temperature: Float(0.2),   // JSON 要稳,温度低
+                    topP: Float(0.9)
+                )
+
+                var collected = ""
+                for await event in try MLXLMCommon.generate(
+                    input: lmInput,
+                    parameters: parameters,
+                    context: context
+                ) {
+                    if Task.isCancelled { break }
+                    if case .chunk(let text) = event {
+                        collected.append(text)
+                    }
+                }
+                return collected
+            }
+        }
+    }
+}