import Foundation import UIKit import ImageIO import SwiftData /// VL 解析结果(已结构化,可直接喂 SwiftData 模型构造)。 /// 与 Indicator/Report 字段近似但解耦 —— 这样 prompt schema 调整不污染数据层。 struct ParsedReport: Sendable { var title: String var typeRaw: String var reportDate: Date var institution: String var summary: String var pageCount: Int var indicators: [ParsedIndicator] struct ParsedIndicator: Sendable, Identifiable { // 稳定身份:供可编辑列表 ForEach 用,避免按 indices 作 id 在增删时错配输入。 let id = UUID() var name: String var value: String var unit: String var range: String var status: IndicatorStatus var sourcePageIndex: Int? var sourceBoxX: Double? var sourceBoxY: Double? var sourceBoxWidth: Double? var sourceBoxHeight: Double? } /// 一项都没识别出来 = 视作失败,UI 走手动录入回退。 var isEmpty: Bool { indicators.isEmpty } /// 占位空结果,失败回退时给 UI。 static func empty(date: Date = .now) -> ParsedReport { ParsedReport( title: "", typeRaw: ReportType.other.rawValue, reportDate: date, institution: "", summary: "", pageCount: 1, indicators: [] ) } } /// CaptureService 错误 — UI 决定怎么呈现(回退表单 vs 重试)。 enum CaptureError: Error, LocalizedError { case modelNotReady case inferenceFailed(String) case parseFailed(String) var errorDescription: String? { switch self { case .modelNotReady: return String(appLoc: "VL 模型尚未就绪") case .inferenceFailed(let m): return String(appLoc: "识别失败:\(m)") case .parseFailed(let m): return String(appLoc: "结构化失败:\(m)") } } } /// `CaptureService` 是 actor 是因为它的方法会等 AIRuntime(也是 actor), /// 但本身不持任何可变状态 —— 单例 stateless,纯粹是 §3.1 模块边界的"门面"。 actor CaptureService { static let shared = CaptureService() private init() {} /// 对已写入 Vault 的 Asset 跑 VL,返回结构化 ParsedReport。 /// 用于: /// - UnifiedCaptureFlow 的初次识别(UI 先写图、再调本方法,失败/取消都能保留 assets 走手动录入) /// - 录入表单顶部的「重新识别」按钮 /// - C2「重新解读」(W5) /// SwiftData 写回由调用方(MainActor)负责,见 `Report.applyReanalyzed(_:in:)`。 /// 不直接接 @Model 类型,避免把非 Sendable 引用抛过 actor 边界。 func reanalyze(assets: [FileVault.SavedAsset]) async throws -> ParsedReport { try await runVL(on: assets) } /// 「拍照识别」OCR 链路:把 Vision OCR 出的纯文本交给 LLM(Qwen3-1.7B)结构化抽指标。 /// 不建 Report、不留图;失败抛 `CaptureError`,UI 回退手动录入(§3.2)。 /// 调用方(MainActor)先做 OCR,再把文本传进来——OCR 不需进 actor,也避免 UIImage 跨 actor。 func recognizeIndicators(fromOCRText text: String) async throws -> [ParsedReport.ParsedIndicator] { do { try await AIRuntime.shared.prepare() // 载 LLM(会先卸 VL,OOM 闸门已处理) } catch { throw CaptureError.modelNotReady } let prompt = VLPrompts.indicatorsFromText(text) var collected = "" do { // 整张化验单十余项,给足 token;LLM 解码与任何 VL 解码由 AIRuntime 闸门串行。 let stream = await AIRuntime.shared.generate(prompt: prompt, maxTokens: 2048) for try await chunk in stream { collected += chunk.text } } catch { throw CaptureError.inferenceFailed("\(error)") } // Qwen3 可能吐 ,先剥掉再抠 JSON。 let cleaned = CaptureService.stripThink(collected) #if DEBUG // 取证:原始输出(含可能未闭合的 )+ strip 后,定位「空/非法 JSON」根因。 // 用 NSLog(走统一日志)而非 print(stdout 被 Xcode lldb 接管,idevicesyslog 抓不到)。 NSLog("KKDBG-VL RAW LLM output (%d chars):\n%@\n--- end RAW ---", collected.count, collected) NSLog("KKDBG-VL cleaned (%d chars):\n%@\n--- end cleaned ---", cleaned.count, cleaned) #endif do { return try CaptureService.parseIndicatorsJSON(cleaned) } catch let CaptureError.parseFailed(msg) { // 把模型实际输出的特征带到屏幕上,便于现场定位(原始长度 / strip 后长度 / 前缀)。 let rawLen = collected.count let cleanLen = cleaned.count let preview = cleaned.isEmpty ? "(strip 后为空)" : String(cleaned.prefix(60)) throw CaptureError.parseFailed("\(msg)〔raw \(rawLen)字/clean \(cleanLen)字·前缀:\(preview)〕") } catch { throw CaptureError.parseFailed("\(error)") } } /// 剥掉 Qwen3 的 (配对块 / 未闭合开标签 / 孤立闭标签),再 trim 顶部空白。 /// 与 HealthExportService.stripThinkBlocks 同逻辑,但本类是非 MainActor actor,放一份 nonisolated 版避免跨隔离调用。 nonisolated static func stripThink(_ raw: String) -> String { var s = raw while let openR = s.range(of: ""), let closeR = s.range(of: "", range: openR.upperBound..") { s = String(s[..") { s = String(s[closeR.upperBound...]) } while let first = s.first, first.isWhitespace { s.removeFirst() } return s } /// VL 推理 + JSON 解析的纯阶段。assets 必须已写入 Vault。 private func runVL(on assets: [FileVault.SavedAsset]) async throws -> ParsedReport { do { try await AIRuntime.shared.prepareVL() } catch { throw CaptureError.modelNotReady } let urls = assets.map { FileVault.shared.rootURL.appendingPathComponent($0.relativePath) } // OCR 参考(Vision 本地,<1s/页):给 2B 多模态当数字「抄写员」,降低小字误读。 // 任何失败都静默回退为空串,绝不阻断识别主流程(§3.2)。 let ocr = await Self.ocrReference(for: urls) let raw: String do { raw = try await AIRuntime.shared.analyzeReport( imageURLs: urls, prompt: VLPrompts.reportExtraction(ocrText: ocr) ) } catch { throw CaptureError.inferenceFailed("\(error)") } do { return try CaptureService.parseReportJSON(raw, pageCount: assets.count) } catch let CaptureError.parseFailed(msg) { throw CaptureError.parseFailed(msg) } catch { throw CaptureError.parseFailed("\(error)") } } /// 对 Vault 报告图逐页 OCR 拼参考文本。最多 4 页;失败/空文本返回 ""。 /// 用 ImageIO 直取 CGImage(不经 UIImage,避免跨 actor 传非 Sendable 引用)。 private static func ocrReference(for urls: [URL]) async -> String { var pages: [String] = [] for (idx, url) in urls.prefix(4).enumerated() { guard let src = CGImageSourceCreateWithURL(url as CFURL, nil), let cg = CGImageSourceCreateImageAtIndex(src, 0, nil) else { continue } guard let text = try? await OCRService.recognizeText(in: cg), !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { continue } pages.append(urls.count > 1 ? "【第 \(idx + 1) 页】\n\(text)" : text) } return pages.joined(separator: "\n") } // MARK: - JSON parse(static + 纯函数 → 方便单测) /// 从 VL 输出里抠出第一段合法 JSON 对象并解析。 /// 容错: /// - 去掉 ```json``` markdown 围栏 /// - 去掉首尾非 JSON 文字 /// - 缺字段填默认值 /// 解析不到任何 indicator 也算成功,但 ParsedReport.isEmpty = true, /// UI 走「手动录入」分支。 static func parseReportJSON(_ raw: String, pageCount: Int = 1) throws -> ParsedReport { // 用 extractBalancedJSON(而非只认 {} 的 extractJSONObject):VL 多项时偶尔直接吐 // 裸数组 [{...},{...}],只认对象会从第一个 { 配对,只截出第一个 indicator、静默丢掉 // 其余 —— 这是影像档案核心卖点上的数据丢失。顶层是数组时整体视作 indicators。 let jsonString = repairJSON(extractBalancedJSON(from: raw)) guard let data = jsonString.data(using: .utf8) else { throw CaptureError.parseFailed("非 UTF-8 输出") } let obj: Any do { obj = try JSONSerialization.jsonObject(with: data, options: [.fragmentsAllowed]) } catch { throw CaptureError.parseFailed("JSON 不合法:\(error.localizedDescription)") } let dict: [String: Any] if let d = obj as? [String: Any] { dict = d } else if let arr = obj as? [[String: Any]] { dict = ["indicators": arr] } else { throw CaptureError.parseFailed("根节点既不是对象也不是数组") } let title = (dict["title"] as? String)?.trimmingCharacters(in: .whitespaces) ?? "" let typeRaw = parseReportType(dict["type"] as? String) let reportDate = parseDate(dict["report_date"] as? String) ?? .now let institution = (dict["institution"] as? String) ?? "" let summary = (dict["summary"] as? String) ?? "" let pages = (dict["page_count"] as? Int) ?? pageCount let indicatorsRaw = arrayValue(dict, keys: ["indicators", "indicator", "items", "指标", "指标列表", "项目"]) let indicators: [ParsedReport.ParsedIndicator] = indicatorsRaw.compactMap { parseIndicator($0) } return ParsedReport( title: title.isEmpty ? String(appLoc: "拍摄识别") : title, typeRaw: typeRaw, reportDate: reportDate, institution: institution, summary: summary, pageCount: max(pages, pageCount), indicators: indicators ) } /// 局部识别解析:VL 输出 `{"indicators":[...]}`,只抠 indicators 数组。 /// 复用 `extractJSONObject` + `parseIndicator`。解析不到任何 indicator 返回空数组(不抛), /// UI 据此走「没读出指标,手动补充」分支。JSON 本身不合法才抛 `parseFailed`。 static func parseIndicatorsJSON(_ raw: String) throws -> [ParsedReport.ParsedIndicator] { let jsonString = repairJSON(extractBalancedJSON(from: raw)) guard let data = jsonString.data(using: .utf8) else { throw CaptureError.parseFailed("非 UTF-8 输出") } let obj: Any do { obj = try JSONSerialization.jsonObject(with: data, options: [.fragmentsAllowed]) } catch { throw CaptureError.parseFailed("JSON 不合法:\(error.localizedDescription)") } // 兼容两种形态:{"indicators":[...]} 或直接 [...](模型偶尔省外层 key) let indicatorsRaw: [[String: Any]] if let dict = obj as? [String: Any] { indicatorsRaw = arrayValue(dict, keys: ["indicators", "indicator", "items", "指标", "指标列表", "项目"]) } else if let arr = obj as? [[String: Any]] { indicatorsRaw = arr } else { throw CaptureError.parseFailed("根节点既不是对象也不是数组") } return indicatorsRaw.compactMap { parseIndicator($0) } } /// 从字符串里抠出第一段平衡的 {...}。处理 markdown 围栏、前后乱码。 /// 失败返回原字符串(后续 JSONSerialization 报错)。 static func extractJSONObject(from raw: String) -> String { var s = raw.trimmingCharacters(in: .whitespacesAndNewlines) // 去 markdown 围栏 if s.hasPrefix("```") { // 砍掉首行 ```json 或 ``` if let firstNewline = s.firstIndex(of: "\n") { s = String(s[s.index(after: firstNewline)...]) } // 砍掉末尾 ``` if let endRange = s.range(of: "```", options: .backwards) { s = String(s[.. String { var t = s t = t.replacingOccurrences(of: "\u{201C}", with: "\"") // “ t = t.replacingOccurrences(of: "\u{201D}", with: "\"") // ” if let re = try? NSRegularExpression(pattern: ",\\s*([}\\]])") { t = re.stringByReplacingMatches( in: t, range: NSRange(t.startIndex..., in: t), withTemplate: "$1") } return t } /// 抠出第一段平衡的 JSON 值,`{...}` 或 `[...]` 以先出现者为准。 /// 用于局部识别(模型可能输出 `{"indicators":[...]}` 或裸 `[...]`)。 /// 失败返回去围栏后的原串(后续 JSONSerialization 报错)。 static func extractBalancedJSON(from raw: String) -> String { var s = raw.trimmingCharacters(in: .whitespacesAndNewlines) if s.hasPrefix("```") { if let firstNewline = s.firstIndex(of: "\n") { s = String(s[s.index(after: firstNewline)...]) } if let endRange = s.range(of: "```", options: .backwards) { s = String(s[.. String { guard let raw = raw?.lowercased() else { return ReportType.other.rawValue } return ReportType(rawValue: raw)?.rawValue ?? ReportType.other.rawValue } private static func parseDate(_ raw: String?) -> Date? { guard let s = raw?.trimmingCharacters(in: .whitespaces), !s.isEmpty else { return nil } let f = DateFormatter() f.locale = Locale(identifier: "en_US_POSIX") // VL 不同来源会吐多种日期格式;逐一尝试,避免解析失败回退到「今天」(parseReportJSON 里 // ?? .now)导致归档按 reportDate 分年份时错位(C1)。 let patterns = ["yyyy-MM-dd", "yyyy/MM/dd", "yyyy.MM.dd", "yyyy年MM月dd日", "yyyy年M月d日", "yyyy年MM月", "yyyy-MM", "yyyy/MM"] for p in patterns { f.dateFormat = p if let d = f.date(from: s) { return d } } return nil } private static func parseIndicator(_ d: [String: Any]) -> ParsedReport.ParsedIndicator? { guard let name = stringValue(d, keys: ["name", "item", "indicator", "test", "项目", "指标", "指标名", "指标名称", "检查项目", "检验项目"])?.trimmingCharacters(in: .whitespaces), !name.isEmpty else { return nil } let value: String if let v = stringValue(d, keys: ["value", "result", "reading", "结果", "数值", "检测值", "测定值"]) { value = v } else { value = "" } let unit = stringValue(d, keys: ["unit", "单位"]) ?? "" let range = stringValue(d, keys: ["range", "reference", "reference_range", "ref", "参考", "参考值", "参考范围", "正常范围"]) ?? "" let statusRaw = stringValue(d, keys: ["status", "flag", "abnormal", "异常", "提示", "标记"]) let status = parseIndicatorStatus(raw: statusRaw, value: value, range: range) let evidence = parseEvidenceLocation(d) return .init( name: name, value: value, unit: unit, range: range, status: status, sourcePageIndex: evidence?.pageIndex, sourceBoxX: evidence?.box.x, sourceBoxY: evidence?.box.y, sourceBoxWidth: evidence?.box.width, sourceBoxHeight: evidence?.box.height ) } private static func parseEvidenceLocation(_ d: [String: Any]) -> (pageIndex: Int, box: (x: Double, y: Double, width: Double, height: Double))? { guard let page = intValue(d, keys: ["source_page", "sourcePage", "page", "页码", "来源页码"]), page >= 1, let box = numberArrayValue(d, keys: ["source_box", "sourceBox", "box", "bbox", "位置", "来源位置"]), box.count == 4 else { return nil } let x = box[0] let y = box[1] let width = box[2] let height = box[3] guard x >= 0, y >= 0, width > 0, height > 0, x + width <= 1, y + height <= 1 else { return nil } return (page - 1, (x, y, width, height)) } private static func stringValue(_ d: [String: Any], keys: [String]) -> String? { for key in keys { if let s = d[key] as? String { return s } if let n = d[key] as? NSNumber { return n.stringValue } } return nil } private static func intValue(_ d: [String: Any], keys: [String]) -> Int? { for key in keys { if let i = d[key] as? Int { return i } if let n = d[key] as? NSNumber { return n.intValue } if let s = d[key] as? String, let i = Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) { return i } } return nil } private static func numberArrayValue(_ d: [String: Any], keys: [String]) -> [Double]? { for key in keys { if let arr = d[key] as? [Double] { return arr } if let arr = d[key] as? [NSNumber] { return arr.map(\.doubleValue) } if let arr = d[key] as? [Any] { let values = arr.compactMap { item -> Double? in if let d = item as? Double { return d } if let n = item as? NSNumber { return n.doubleValue } if let s = item as? String { return Double(s.trimmingCharacters(in: .whitespacesAndNewlines)) } return nil } if values.count == arr.count { return values } } } return nil } private static func arrayValue(_ d: [String: Any], keys: [String]) -> [[String: Any]] { for key in keys { if let arr = d[key] as? [[String: Any]] { return arr } } return [] } private static func parseIndicatorStatus(raw: String?, value: String, range: String) -> IndicatorStatus { let normalized = raw? .trimmingCharacters(in: .whitespacesAndNewlines) .lowercased() ?? "" if ["high", "h", "hi", "above", "up", "↑", "⬆", "+", "偏高", "高", "增高", "升高", "偏高↑", "h↑"].contains(normalized) { return .high } if ["low", "l", "lo", "below", "down", "↓", "⬇", "-", "偏低", "低", "降低", "偏低↓", "l↓"].contains(normalized) { return .low } if ["normal", "n", "ok", "正常", "阴性", "无异常"].contains(normalized) { return .normal } return inferStatus(value: value, range: range) ?? .normal } private static func inferStatus(value: String, range: String) -> IndicatorStatus? { guard let v = firstNumber(in: value) else { return nil } let compact = range .replacingOccurrences(of: "—", with: "-") .replacingOccurrences(of: "–", with: "-") .replacingOccurrences(of: "~", with: "-") .replacingOccurrences(of: "~", with: "-") .replacingOccurrences(of: "至", with: "-") .trimmingCharacters(in: .whitespacesAndNewlines) guard !compact.isEmpty else { return nil } let numbers = numbers(in: compact) if compact.contains("<") || compact.contains("≤") || compact.contains("<") { guard let upper = numbers.first else { return nil } return v > upper ? .high : .normal } if compact.contains(">") || compact.contains("≥") || compact.contains(">") { guard let lower = numbers.first else { return nil } return v < lower ? .low : .normal } if numbers.count >= 2 { let lower = numbers[0] let upper = numbers[1] if v < lower { return .low } if v > upper { return .high } return .normal } return nil } private static func firstNumber(in text: String) -> Double? { numbers(in: text).first } private static func numbers(in text: String) -> [Double] { let pattern = #"-?\d+(?:\.\d+)?"# guard let regex = try? NSRegularExpression(pattern: pattern) else { return [] } let ns = text as NSString let range = NSRange(location: 0, length: ns.length) return regex.matches(in: text, range: range).compactMap { Double(ns.substring(with: $0.range)) } } } // MARK: - Report ↔ CaptureService 桥接(MainActor 侧) // // CaptureService 是 actor,不能直接收 Report(@Model 非 Sendable)。 // C2「重新解读」UI 走这条路径: // ``` // let assets = report.savedAssets // let parsed = try await CaptureService.shared.reanalyze(assets: assets) // report.applyReanalyzed(parsed, in: ctx) // ``` extension Report { /// 关联 Asset 转 SavedAsset,直接喂 CaptureService.reanalyze。 var savedAssets: [FileVault.SavedAsset] { assets.map { .init(relativePath: $0.relativePath, bytes: $0.bytes) } } /// 把 VL 重新识别结果写回 Report。 /// - indicators:旧的全删,新的整批插入并维持关联(cascade delete 会清缓存) /// - summary / institution:非空才覆盖,避免空摘要把好结果清掉 /// 必须在 MainActor / SwiftData 主上下文里调用。 @MainActor func applyReanalyzed(_ parsed: ParsedReport, in ctx: ModelContext) { if !parsed.summary.isEmpty { self.summary = parsed.summary } if !parsed.institution.isEmpty { self.institution = parsed.institution } // 旧 indicators 全删。各自挂的 Asset(若有局部快拍图)关系是 nullify 不 cascade, // 必须手动 unlink Vault 文件 + 删 Asset 记录,否则留下孤儿图片(违反 §6 隐私承诺)。 // 对照正确写法见 TimelineEntryDetailView.deleteIndicator。 for old in indicators { if let asset = old.asset { try? FileVault.shared.remove(relativePath: asset.relativePath) ctx.delete(asset) } ctx.delete(old) } indicators.removeAll() // 新 indicators 重新插入 for p in parsed.indicators { let i = Indicator( name: p.name, value: p.value, unit: p.unit, range: p.range, status: p.status, capturedAt: reportDate, report: self, source: .report, sourcePageIndex: p.sourcePageIndex, sourceBoxX: p.sourceBoxX, sourceBoxY: p.sourceBoxY, sourceBoxWidth: p.sourceBoxWidth, sourceBoxHeight: p.sourceBoxHeight ) ctx.insert(i) } try? ctx.save() } }