import Foundation
import UIKit
import SwiftData
/// VL 解析结果(已结构化,可直接喂 SwiftData 模型构造)。
/// 与 Indicator/Report 字段近似但解耦 —— 这样 prompt schema 调整不污染数据层。
struct ParsedReport: Sendable {
var title: String
var typeRaw: String
var reportDate: Date
var institution: String
var summary: String
var pageCount: Int
var indicators: [ParsedIndicator]
struct ParsedIndicator: Sendable, Identifiable {
// 稳定身份:供可编辑列表 ForEach 用,避免按 indices 作 id 在增删时错配输入。
let id = UUID()
var name: String
var value: String
var unit: String
var range: String
var status: IndicatorStatus
var sourcePageIndex: Int?
var sourceBoxX: Double?
var sourceBoxY: Double?
var sourceBoxWidth: Double?
var sourceBoxHeight: Double?
}
/// 一项都没识别出来 = 视作失败,UI 走手动录入回退。
var isEmpty: Bool { indicators.isEmpty }
/// 占位空结果,失败回退时给 UI。
static func empty(date: Date = .now) -> ParsedReport {
ParsedReport(
title: "",
typeRaw: ReportType.other.rawValue,
reportDate: date,
institution: "",
summary: "",
pageCount: 1,
indicators: []
)
}
}
/// CaptureService 错误 — UI 决定怎么呈现(回退表单 vs 重试)。
enum CaptureError: Error, LocalizedError {
case modelNotReady
case inferenceFailed(String)
case parseFailed(String)
var errorDescription: String? {
switch self {
case .modelNotReady: return String(appLoc: "VL 模型尚未就绪")
case .inferenceFailed(let m): return String(appLoc: "识别失败:\(m)")
case .parseFailed(let m): return String(appLoc: "结构化失败:\(m)")
}
}
}
/// `CaptureService` 是 actor 是因为它的方法会等 AIRuntime(也是 actor),
/// 但本身不持任何可变状态 —— 单例 stateless,纯粹是 §3.1 模块边界的"门面"。
actor CaptureService {
static let shared = CaptureService()
private init() {}
/// 对已写入 Vault 的 Asset 跑 VL,返回结构化 ParsedReport。
/// 用于:
/// - UnifiedCaptureFlow 的初次识别(UI 先写图、再调本方法,失败/取消都能保留 assets 走手动录入)
/// - 录入表单顶部的「重新识别」按钮
/// - C2「重新解读」(W5)
/// SwiftData 写回由调用方(MainActor)负责,见 `Report.applyReanalyzed(_:in:)`。
/// 不直接接 @Model 类型,避免把非 Sendable 引用抛过 actor 边界。
func reanalyze(assets: [FileVault.SavedAsset]) async throws -> ParsedReport {
try await runVL(on: assets)
}
/// 「拍照识别」OCR 链路:把 Vision OCR 出的纯文本交给 LLM(Qwen3-1.7B)结构化抽指标。
/// 不建 Report、不留图;失败抛 `CaptureError`,UI 回退手动录入(§3.2)。
/// 调用方(MainActor)先做 OCR,再把文本传进来——OCR 不需进 actor,也避免 UIImage 跨 actor。
func recognizeIndicators(fromOCRText text: String) async throws -> [ParsedReport.ParsedIndicator] {
do {
try await AIRuntime.shared.prepare() // 载 LLM(会先卸 VL,OOM 闸门已处理)
} catch {
throw CaptureError.modelNotReady
}
let prompt = VLPrompts.indicatorsFromText(text)
var collected = ""
do {
// 整张化验单十余项,给足 token;LLM 解码与任何 VL 解码由 AIRuntime 闸门串行。
let stream = await AIRuntime.shared.generate(prompt: prompt, maxTokens: 2048)
for try await chunk in stream {
collected += chunk.text
}
} catch {
throw CaptureError.inferenceFailed("\(error)")
}
// Qwen3 可能吐 …,先剥掉再抠 JSON。
let cleaned = CaptureService.stripThink(collected)
#if DEBUG
// 取证:原始输出(含可能未闭合的 )+ strip 后,定位「空/非法 JSON」根因。
// 用 NSLog(走统一日志)而非 print(stdout 被 Xcode lldb 接管,idevicesyslog 抓不到)。
NSLog("KKDBG-VL RAW LLM output (%d chars):\n%@\n--- end RAW ---", collected.count, collected)
NSLog("KKDBG-VL cleaned (%d chars):\n%@\n--- end cleaned ---", cleaned.count, cleaned)
#endif
do {
return try CaptureService.parseIndicatorsJSON(cleaned)
} catch let CaptureError.parseFailed(msg) {
// 把模型实际输出的特征带到屏幕上,便于现场定位(原始长度 / strip 后长度 / 前缀)。
let rawLen = collected.count
let cleanLen = cleaned.count
let preview = cleaned.isEmpty ? "(strip 后为空)" : String(cleaned.prefix(60))
throw CaptureError.parseFailed("\(msg)〔raw \(rawLen)字/clean \(cleanLen)字·前缀:\(preview)〕")
} catch {
throw CaptureError.parseFailed("\(error)")
}
}
/// 剥掉 Qwen3 的 …(配对块 / 未闭合开标签 / 孤立闭标签),再 trim 顶部空白。
/// 与 HealthExportService.stripThinkBlocks 同逻辑,但本类是非 MainActor actor,放一份 nonisolated 版避免跨隔离调用。
nonisolated static func stripThink(_ raw: String) -> String {
var s = raw
while let openR = s.range(of: ""),
let closeR = s.range(of: "", range: openR.upperBound..") { s = String(s[..") { s = String(s[closeR.upperBound...]) }
while let first = s.first, first.isWhitespace { s.removeFirst() }
return s
}
/// VL 推理 + JSON 解析的纯阶段。assets 必须已写入 Vault。
private func runVL(on assets: [FileVault.SavedAsset]) async throws -> ParsedReport {
do {
try await AIRuntime.shared.prepareVL()
} catch {
throw CaptureError.modelNotReady
}
let urls = assets.map { FileVault.shared.rootURL.appendingPathComponent($0.relativePath) }
let raw: String
do {
raw = try await AIRuntime.shared.analyzeReport(
imageURLs: urls,
prompt: VLPrompts.reportExtraction()
)
} catch {
throw CaptureError.inferenceFailed("\(error)")
}
do {
return try CaptureService.parseReportJSON(raw, pageCount: assets.count)
} catch let CaptureError.parseFailed(msg) {
throw CaptureError.parseFailed(msg)
} catch {
throw CaptureError.parseFailed("\(error)")
}
}
// MARK: - JSON parse(static + 纯函数 → 方便单测)
/// 从 VL 输出里抠出第一段合法 JSON 对象并解析。
/// 容错:
/// - 去掉 ```json``` markdown 围栏
/// - 去掉首尾非 JSON 文字
/// - 缺字段填默认值
/// 解析不到任何 indicator 也算成功,但 ParsedReport.isEmpty = true,
/// UI 走「手动录入」分支。
static func parseReportJSON(_ raw: String, pageCount: Int = 1) throws -> ParsedReport {
// 用 extractBalancedJSON(而非只认 {} 的 extractJSONObject):VL 多项时偶尔直接吐
// 裸数组 [{...},{...}],只认对象会从第一个 { 配对,只截出第一个 indicator、静默丢掉
// 其余 —— 这是影像档案核心卖点上的数据丢失。顶层是数组时整体视作 indicators。
let jsonString = repairJSON(extractBalancedJSON(from: raw))
guard let data = jsonString.data(using: .utf8) else {
throw CaptureError.parseFailed("非 UTF-8 输出")
}
let obj: Any
do {
obj = try JSONSerialization.jsonObject(with: data, options: [.fragmentsAllowed])
} catch {
throw CaptureError.parseFailed("JSON 不合法:\(error.localizedDescription)")
}
let dict: [String: Any]
if let d = obj as? [String: Any] {
dict = d
} else if let arr = obj as? [[String: Any]] {
dict = ["indicators": arr]
} else {
throw CaptureError.parseFailed("根节点既不是对象也不是数组")
}
let title = (dict["title"] as? String)?.trimmingCharacters(in: .whitespaces) ?? ""
let typeRaw = parseReportType(dict["type"] as? String)
let reportDate = parseDate(dict["report_date"] as? String) ?? .now
let institution = (dict["institution"] as? String) ?? ""
let summary = (dict["summary"] as? String) ?? ""
let pages = (dict["page_count"] as? Int) ?? pageCount
let indicatorsRaw = arrayValue(dict, keys: ["indicators", "indicator", "items", "指标", "指标列表", "项目"])
let indicators: [ParsedReport.ParsedIndicator] = indicatorsRaw.compactMap {
parseIndicator($0)
}
return ParsedReport(
title: title.isEmpty ? String(appLoc: "拍摄识别") : title,
typeRaw: typeRaw,
reportDate: reportDate,
institution: institution,
summary: summary,
pageCount: max(pages, pageCount),
indicators: indicators
)
}
/// 局部识别解析:VL 输出 `{"indicators":[...]}`,只抠 indicators 数组。
/// 复用 `extractJSONObject` + `parseIndicator`。解析不到任何 indicator 返回空数组(不抛),
/// UI 据此走「没读出指标,手动补充」分支。JSON 本身不合法才抛 `parseFailed`。
static func parseIndicatorsJSON(_ raw: String) throws -> [ParsedReport.ParsedIndicator] {
let jsonString = repairJSON(extractBalancedJSON(from: raw))
guard let data = jsonString.data(using: .utf8) else {
throw CaptureError.parseFailed("非 UTF-8 输出")
}
let obj: Any
do {
obj = try JSONSerialization.jsonObject(with: data, options: [.fragmentsAllowed])
} catch {
throw CaptureError.parseFailed("JSON 不合法:\(error.localizedDescription)")
}
// 兼容两种形态:{"indicators":[...]} 或直接 [...](模型偶尔省外层 key)
let indicatorsRaw: [[String: Any]]
if let dict = obj as? [String: Any] {
indicatorsRaw = arrayValue(dict, keys: ["indicators", "indicator", "items", "指标", "指标列表", "项目"])
} else if let arr = obj as? [[String: Any]] {
indicatorsRaw = arr
} else {
throw CaptureError.parseFailed("根节点既不是对象也不是数组")
}
return indicatorsRaw.compactMap { parseIndicator($0) }
}
/// 从字符串里抠出第一段平衡的 {...}。处理 markdown 围栏、前后乱码。
/// 失败返回原字符串(后续 JSONSerialization 报错)。
static func extractJSONObject(from raw: String) -> String {
var s = raw.trimmingCharacters(in: .whitespacesAndNewlines)
// 去 markdown 围栏
if s.hasPrefix("```") {
// 砍掉首行 ```json 或 ```
if let firstNewline = s.firstIndex(of: "\n") {
s = String(s[s.index(after: firstNewline)...])
}
// 砍掉末尾 ```
if let endRange = s.range(of: "```", options: .backwards) {
s = String(s[.. String {
var t = s
t = t.replacingOccurrences(of: "\u{201C}", with: "\"") // “
t = t.replacingOccurrences(of: "\u{201D}", with: "\"") // ”
if let re = try? NSRegularExpression(pattern: ",\\s*([}\\]])") {
t = re.stringByReplacingMatches(
in: t, range: NSRange(t.startIndex..., in: t), withTemplate: "$1")
}
return t
}
/// 抠出第一段平衡的 JSON 值,`{...}` 或 `[...]` 以先出现者为准。
/// 用于局部识别(模型可能输出 `{"indicators":[...]}` 或裸 `[...]`)。
/// 失败返回去围栏后的原串(后续 JSONSerialization 报错)。
static func extractBalancedJSON(from raw: String) -> String {
var s = raw.trimmingCharacters(in: .whitespacesAndNewlines)
if s.hasPrefix("```") {
if let firstNewline = s.firstIndex(of: "\n") {
s = String(s[s.index(after: firstNewline)...])
}
if let endRange = s.range(of: "```", options: .backwards) {
s = String(s[.. String {
guard let raw = raw?.lowercased() else { return ReportType.other.rawValue }
return ReportType(rawValue: raw)?.rawValue ?? ReportType.other.rawValue
}
private static func parseDate(_ raw: String?) -> Date? {
guard let s = raw?.trimmingCharacters(in: .whitespaces), !s.isEmpty else { return nil }
let f = DateFormatter()
f.locale = Locale(identifier: "en_US_POSIX")
// VL 不同来源会吐多种日期格式;逐一尝试,避免解析失败回退到「今天」(parseReportJSON 里
// ?? .now)导致归档按 reportDate 分年份时错位(C1)。
let patterns = ["yyyy-MM-dd", "yyyy/MM/dd", "yyyy.MM.dd",
"yyyy年MM月dd日", "yyyy年M月d日", "yyyy年MM月", "yyyy-MM", "yyyy/MM"]
for p in patterns {
f.dateFormat = p
if let d = f.date(from: s) { return d }
}
return nil
}
private static func parseIndicator(_ d: [String: Any]) -> ParsedReport.ParsedIndicator? {
guard let name = stringValue(d, keys: ["name", "item", "indicator", "test", "项目", "指标", "指标名", "指标名称", "检查项目", "检验项目"])?.trimmingCharacters(in: .whitespaces),
!name.isEmpty else { return nil }
let value: String
if let v = stringValue(d, keys: ["value", "result", "reading", "结果", "数值", "检测值", "测定值"]) { value = v }
else { value = "" }
let unit = stringValue(d, keys: ["unit", "单位"]) ?? ""
let range = stringValue(d, keys: ["range", "reference", "reference_range", "ref", "参考", "参考值", "参考范围", "正常范围"]) ?? ""
let statusRaw = stringValue(d, keys: ["status", "flag", "abnormal", "异常", "提示", "标记"])
let status = parseIndicatorStatus(raw: statusRaw, value: value, range: range)
let evidence = parseEvidenceLocation(d)
return .init(
name: name,
value: value,
unit: unit,
range: range,
status: status,
sourcePageIndex: evidence?.pageIndex,
sourceBoxX: evidence?.box.x,
sourceBoxY: evidence?.box.y,
sourceBoxWidth: evidence?.box.width,
sourceBoxHeight: evidence?.box.height
)
}
private static func parseEvidenceLocation(_ d: [String: Any]) -> (pageIndex: Int, box: (x: Double, y: Double, width: Double, height: Double))? {
guard let page = intValue(d, keys: ["source_page", "sourcePage", "page", "页码", "来源页码"]),
page >= 1,
let box = numberArrayValue(d, keys: ["source_box", "sourceBox", "box", "bbox", "位置", "来源位置"]),
box.count == 4 else {
return nil
}
let x = box[0]
let y = box[1]
let width = box[2]
let height = box[3]
guard x >= 0, y >= 0, width > 0, height > 0, x + width <= 1, y + height <= 1 else {
return nil
}
return (page - 1, (x, y, width, height))
}
private static func stringValue(_ d: [String: Any], keys: [String]) -> String? {
for key in keys {
if let s = d[key] as? String {
return s
}
if let n = d[key] as? NSNumber {
return n.stringValue
}
}
return nil
}
private static func intValue(_ d: [String: Any], keys: [String]) -> Int? {
for key in keys {
if let i = d[key] as? Int {
return i
}
if let n = d[key] as? NSNumber {
return n.intValue
}
if let s = d[key] as? String, let i = Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) {
return i
}
}
return nil
}
private static func numberArrayValue(_ d: [String: Any], keys: [String]) -> [Double]? {
for key in keys {
if let arr = d[key] as? [Double] {
return arr
}
if let arr = d[key] as? [NSNumber] {
return arr.map(\.doubleValue)
}
if let arr = d[key] as? [Any] {
let values = arr.compactMap { item -> Double? in
if let d = item as? Double { return d }
if let n = item as? NSNumber { return n.doubleValue }
if let s = item as? String { return Double(s.trimmingCharacters(in: .whitespacesAndNewlines)) }
return nil
}
if values.count == arr.count {
return values
}
}
}
return nil
}
private static func arrayValue(_ d: [String: Any], keys: [String]) -> [[String: Any]] {
for key in keys {
if let arr = d[key] as? [[String: Any]] {
return arr
}
}
return []
}
private static func parseIndicatorStatus(raw: String?, value: String, range: String) -> IndicatorStatus {
let normalized = raw?
.trimmingCharacters(in: .whitespacesAndNewlines)
.lowercased() ?? ""
if ["high", "h", "hi", "above", "up", "↑", "⬆", "+", "偏高", "高", "增高", "升高", "偏高↑", "h↑"].contains(normalized) {
return .high
}
if ["low", "l", "lo", "below", "down", "↓", "⬇", "-", "偏低", "低", "降低", "偏低↓", "l↓"].contains(normalized) {
return .low
}
if ["normal", "n", "ok", "正常", "阴性", "无异常"].contains(normalized) {
return .normal
}
return inferStatus(value: value, range: range) ?? .normal
}
private static func inferStatus(value: String, range: String) -> IndicatorStatus? {
guard let v = firstNumber(in: value) else { return nil }
let compact = range
.replacingOccurrences(of: "—", with: "-")
.replacingOccurrences(of: "–", with: "-")
.replacingOccurrences(of: "~", with: "-")
.replacingOccurrences(of: "~", with: "-")
.replacingOccurrences(of: "至", with: "-")
.trimmingCharacters(in: .whitespacesAndNewlines)
guard !compact.isEmpty else { return nil }
let numbers = numbers(in: compact)
if compact.contains("<") || compact.contains("≤") || compact.contains("<") {
guard let upper = numbers.first else { return nil }
return v > upper ? .high : .normal
}
if compact.contains(">") || compact.contains("≥") || compact.contains(">") {
guard let lower = numbers.first else { return nil }
return v < lower ? .low : .normal
}
if numbers.count >= 2 {
let lower = numbers[0]
let upper = numbers[1]
if v < lower { return .low }
if v > upper { return .high }
return .normal
}
return nil
}
private static func firstNumber(in text: String) -> Double? {
numbers(in: text).first
}
private static func numbers(in text: String) -> [Double] {
let pattern = #"-?\d+(?:\.\d+)?"#
guard let regex = try? NSRegularExpression(pattern: pattern) else { return [] }
let ns = text as NSString
let range = NSRange(location: 0, length: ns.length)
return regex.matches(in: text, range: range).compactMap {
Double(ns.substring(with: $0.range))
}
}
}
// MARK: - Report ↔ CaptureService 桥接(MainActor 侧)
//
// CaptureService 是 actor,不能直接收 Report(@Model 非 Sendable)。
// C2「重新解读」UI 走这条路径:
// ```
// let assets = report.savedAssets
// let parsed = try await CaptureService.shared.reanalyze(assets: assets)
// report.applyReanalyzed(parsed, in: ctx)
// ```
extension Report {
/// 关联 Asset 转 SavedAsset,直接喂 CaptureService.reanalyze。
var savedAssets: [FileVault.SavedAsset] {
assets.map { .init(relativePath: $0.relativePath, bytes: $0.bytes) }
}
/// 把 VL 重新识别结果写回 Report。
/// - indicators:旧的全删,新的整批插入并维持关联(cascade delete 会清缓存)
/// - summary / institution:非空才覆盖,避免空摘要把好结果清掉
/// 必须在 MainActor / SwiftData 主上下文里调用。
@MainActor
func applyReanalyzed(_ parsed: ParsedReport, in ctx: ModelContext) {
if !parsed.summary.isEmpty {
self.summary = parsed.summary
}
if !parsed.institution.isEmpty {
self.institution = parsed.institution
}
// 旧 indicators 全删。各自挂的 Asset(若有局部快拍图)关系是 nullify 不 cascade,
// 必须手动 unlink Vault 文件 + 删 Asset 记录,否则留下孤儿图片(违反 §6 隐私承诺)。
// 对照正确写法见 TimelineEntryDetailView.deleteIndicator。
for old in indicators {
if let asset = old.asset {
try? FileVault.shared.remove(relativePath: asset.relativePath)
ctx.delete(asset)
}
ctx.delete(old)
}
indicators.removeAll()
// 新 indicators 重新插入
for p in parsed.indicators {
let i = Indicator(
name: p.name,
value: p.value,
unit: p.unit,
range: p.range,
status: p.status,
capturedAt: reportDate,
report: self,
source: .report,
sourcePageIndex: p.sourcePageIndex,
sourceBoxX: p.sourceBoxX,
sourceBoxY: p.sourceBoxY,
sourceBoxWidth: p.sourceBoxWidth,
sourceBoxHeight: p.sourceBoxHeight
)
ctx.insert(i)
}
try? ctx.save()
}
}