feat(capture): 统一报告捕获流程并集成视觉语言模型识别
- 替换 QuickCaptureFlow 和 ArchiveFlow 为 UnifiedCaptureFlow 统一流程 - 新增 VLSession 封装 Qwen2.5-VL 模型进行图像文本推理 - 实现 AIRuntime 中 VL 模型的准备和分析功能 - 添加 VLPrompts 定义体检化验单识别的 JSON 输出模板 - 创建 CaptureReviewForm 提供 VL 解析结果的可编辑表单界面 - 集成 VisionKit 文档扫描器支持真机多页文档扫描 - 为模拟器实现 PhotosPicker 回退方案选择已有照片 - 在 RootView 中统一使用 UnifiedCaptureFlow 处理快速和归档流程 - 添加 CustomMetricEditor 支持自定义监测指标的创建编辑删除 - 扩展 KangkangApp 模型配置以支持新数据类型 - 实现档案列表中症状结束功能通过时间线行点击触发
This commit is contained in:
@@ -25,9 +25,11 @@ actor AIRuntime {
|
||||
}
|
||||
|
||||
private(set) var status: Status = .notReady
|
||||
private(set) var vlStatus: Status = .notReady
|
||||
private(set) var lastDecodeRate: Double = 0
|
||||
|
||||
private var llmSession: LLMSession?
|
||||
private var vlSession: VLSession?
|
||||
|
||||
private init() {}
|
||||
|
||||
@@ -96,4 +98,53 @@ actor AIRuntime {
|
||||
private func recordRate(_ rate: Double) {
|
||||
if rate > 0 { lastDecodeRate = rate }
|
||||
}
|
||||
|
||||
// MARK: - VL
|
||||
|
||||
/// 加载 VL 模型。幂等,首调真正 load。
|
||||
func prepareVL() async throws {
|
||||
switch vlStatus {
|
||||
case .ready, .loading:
|
||||
return
|
||||
case .error, .notReady:
|
||||
break
|
||||
}
|
||||
|
||||
guard ModelStore.shared.isReady(.vl) else {
|
||||
vlStatus = .error("VL 模型未就绪")
|
||||
throw AIRuntimeError.notReady
|
||||
}
|
||||
|
||||
vlStatus = .loading
|
||||
do {
|
||||
let session = try await VLSession.load(
|
||||
folderURL: ModelStore.shared.localURL(for: .vl)
|
||||
)
|
||||
self.vlSession = session
|
||||
vlStatus = .ready
|
||||
} catch {
|
||||
vlStatus = .error("\(error)")
|
||||
throw AIRuntimeError.modelLoadFailed("\(error)")
|
||||
}
|
||||
}
|
||||
|
||||
/// 图像 → JSON 字符串(由 VLPrompts.reportExtraction 引导)。
|
||||
/// 调用方负责解析 + 失败回退(§3.2)。
|
||||
/// AIRuntime 是 actor,本调用与 LLM.generate() 自然串行,不会 OOM。
|
||||
func analyzeReport(imageURLs: [URL],
|
||||
prompt: String,
|
||||
maxTokens: Int = 512) async throws -> String {
|
||||
guard vlStatus == .ready, let session = vlSession else {
|
||||
throw AIRuntimeError.notReady
|
||||
}
|
||||
do {
|
||||
return try await session.analyze(
|
||||
imageURLs: imageURLs,
|
||||
prompt: prompt,
|
||||
maxTokens: maxTokens
|
||||
)
|
||||
} catch {
|
||||
throw AIRuntimeError.inferenceFailed("\(error)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
71
康康/AI/Prompts/VLPrompts.swift
Normal file
71
康康/AI/Prompts/VLPrompts.swift
Normal file
@@ -0,0 +1,71 @@
|
||||
import Foundation
|
||||
|
||||
/// VL 模型(Qwen2.5-VL)用于体检 / 化验单识别的 prompt 模板。
|
||||
/// 输出契约:严格 JSON,无任何解释文字、markdown 围栏或前后缀。
|
||||
/// 解析失败 → CaptureService 回退到手动录入(§3.2 失败回退红线)。
|
||||
enum VLPrompts {
|
||||
|
||||
/// 输出 JSON 的字段定义(写进 prompt 里教模型):
|
||||
/// ```
|
||||
/// {
|
||||
/// "title": "春季年度体检", // 报告抬头,无则 "拍摄识别"
|
||||
/// "type": "checkup|lab|imaging|prescription|other",
|
||||
/// "report_date": "YYYY-MM-DD", // 报告日期(无则今天)
|
||||
/// "institution": "XX 医院", // 可空字符串
|
||||
/// "page_count": 1,
|
||||
/// "summary": "整体趋势短句", // 可空字符串
|
||||
/// "indicators": [
|
||||
/// {
|
||||
/// "name": "低密度脂蛋白",
|
||||
/// "value": "3.84",
|
||||
/// "unit": "mmol/L",
|
||||
/// "range": "< 3.40",
|
||||
/// "status": "high|low|normal"
|
||||
/// }
|
||||
/// ]
|
||||
/// }
|
||||
/// ```
|
||||
/// `kind` 字段省略 —— UI 由 indicators 数量决定走 A2(单项)或 B3(多项)。
|
||||
|
||||
static let reportExtraction: String = #"""
|
||||
你是一个医学体检报告识别助手。请只输出一段合法 JSON,不要解释、不要 markdown 围栏、不要任何前后缀文字。
|
||||
|
||||
JSON schema(严格):
|
||||
{
|
||||
"title": string,
|
||||
"type": "checkup" | "lab" | "imaging" | "prescription" | "other",
|
||||
"report_date": "YYYY-MM-DD",
|
||||
"institution": string,
|
||||
"page_count": number,
|
||||
"summary": string,
|
||||
"indicators": [
|
||||
{
|
||||
"name": string,
|
||||
"value": string,
|
||||
"unit": string,
|
||||
"range": string,
|
||||
"status": "high" | "low" | "normal"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
规则:
|
||||
- status 根据 value 与 range 自己判断:value > range 上限 → "high",< 下限 → "low",否则 → "normal"。
|
||||
- range 字段保留原文(如 "< 3.40"、"3.9 - 6.1"、"0 - 5"),不要解析成区间对象。
|
||||
- 无法识别的字段填空字符串(institution / summary)或合理默认值(report_date 用今天)。
|
||||
- 不要发明指标。看不清的整行跳过。
|
||||
- 化验单一般 type = "lab",体检套餐 = "checkup"。
|
||||
|
||||
示例 1(化验单 · 单项):
|
||||
输入: 一张化验单照片,只能看清「低密度脂蛋白 3.84 mmol/L 参考 <3.40」
|
||||
输出:
|
||||
{"title":"低密度脂蛋白单项","type":"lab","report_date":"2026-05-25","institution":"","page_count":1,"summary":"","indicators":[{"name":"低密度脂蛋白","value":"3.84","unit":"mmol/L","range":"< 3.40","status":"high"}]}
|
||||
|
||||
示例 2(体检 · 多项):
|
||||
输入: 一份春季体检,3 项可读
|
||||
输出:
|
||||
{"title":"春季年度体检","type":"checkup","report_date":"2026-04-12","institution":"协和医院","page_count":1,"summary":"血脂偏高、其他正常","indicators":[{"name":"低密度脂蛋白","value":"3.84","unit":"mmol/L","range":"< 3.40","status":"high"},{"name":"谷丙转氨酶","value":"32","unit":"U/L","range":"9 - 50","status":"normal"},{"name":"空腹血糖","value":"5.2","unit":"mmol/L","range":"3.9 - 6.1","status":"normal"}]}
|
||||
|
||||
现在请识别图片并输出 JSON:
|
||||
"""#
|
||||
}
|
||||
72
康康/AI/VLSession.swift
Normal file
72
康康/AI/VLSession.swift
Normal file
@@ -0,0 +1,72 @@
|
||||
import Foundation
|
||||
import MLX
|
||||
import MLXVLM
|
||||
import MLXLMCommon
|
||||
|
||||
/// 封装 MLX VL 模型(Qwen2.5-VL)的图像 → 文本推理。
|
||||
/// 与 LLMSession 同款 actor 隔离,串行化由上游 AIRuntime 统一保证。
|
||||
actor VLSession {
|
||||
let container: ModelContainer
|
||||
|
||||
init(container: ModelContainer) {
|
||||
self.container = container
|
||||
}
|
||||
|
||||
private static func withDeviceOverride<R>(
|
||||
_ body: () async throws -> R
|
||||
) async rethrows -> R {
|
||||
#if targetEnvironment(simulator)
|
||||
return try await Device.withDefaultDevice(.cpu, body)
|
||||
#else
|
||||
return try await body()
|
||||
#endif
|
||||
}
|
||||
|
||||
/// 从本地目录加载 VL 模型(包含 config.json + weights + tokenizer + processor)。
|
||||
static func load(folderURL: URL) async throws -> VLSession {
|
||||
let configuration = ModelConfiguration(directory: folderURL)
|
||||
let container = try await withDeviceOverride {
|
||||
try await VLMModelFactory.shared.loadContainer(
|
||||
configuration: configuration
|
||||
)
|
||||
}
|
||||
return VLSession(container: container)
|
||||
}
|
||||
|
||||
/// 一次性生成(等收完所有 token 再返回完整字符串)。
|
||||
/// VL 用于结构化 JSON 抽取,不需要流式 — 也避免半成品 JSON 抖动 UI。
|
||||
/// - Parameters:
|
||||
/// - imageURLs: 本地 file:// URL,从 FileVault 拿
|
||||
/// - prompt: 文本指令(VLPrompts.reportExtraction)
|
||||
/// - maxTokens: 默认 512(JSON 体量 ≈ 200-400)
|
||||
func analyze(imageURLs: [URL],
|
||||
prompt: String,
|
||||
maxTokens: Int = 512) async throws -> String {
|
||||
try await Self.withDeviceOverride {
|
||||
try await container.perform { (context: ModelContext) in
|
||||
let images = imageURLs.map { UserInput.Image.url($0) }
|
||||
let userInput = UserInput(prompt: prompt, images: images)
|
||||
let lmInput = try await context.processor.prepare(input: userInput)
|
||||
|
||||
let parameters = GenerateParameters(
|
||||
maxTokens: maxTokens,
|
||||
temperature: Float(0.2), // JSON 要稳,温度低
|
||||
topP: Float(0.9)
|
||||
)
|
||||
|
||||
var collected = ""
|
||||
for await event in try MLXLMCommon.generate(
|
||||
input: lmInput,
|
||||
parameters: parameters,
|
||||
context: context
|
||||
) {
|
||||
if Task.isCancelled { break }
|
||||
if case .chunk(let text) = event {
|
||||
collected.append(text)
|
||||
}
|
||||
}
|
||||
return collected
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user