Files
kangkang/康康/Services/CaptureService.swift
link2026 9d856fcfc4 ```
feat(AI): 集成MNN推理引擎替换MLX作为主AI运行时

- 引入MNN(alibaba) + Arm SME2 + CPU作为主AI运行时,支持A19/iPhone17的
  SME2和A17的NEON加速
- 添加MLX Swift作为兜底GPU推理方案,实现双后端切换机制
- 使用单一Qwen3.5-2B多模态模型(1.2GB),替代原有的LLM+VL分离架构
- 实现InferenceEngine.current引擎选择逻辑,真机默认MNN,模拟器回退MLX
- 更新AIAgent架构,通过MNNLLMBridge(ObjC++) → MNNBackend进行推理
- 修改队列机制防止并发推理导致OOM,使用信号量闸门控制显存占用
- 更新文档中的技术栈说明、模块边界和周次交付计划
```
2026-06-15 09:24:59 +08:00

660 lines
29 KiB
Swift
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import Foundation
import UIKit
import ImageIO
import SwiftData
/// VL (, SwiftData )
/// Indicator/Report prompt schema
struct ParsedReport: Sendable {
var title: String
var typeRaw: String
var reportDate: Date
var institution: String
var summary: String
var pageCount: Int
var indicators: [ParsedIndicator]
struct ParsedIndicator: Sendable, Identifiable {
// : ForEach , indices id
let id = UUID()
var name: String
var value: String
var unit: String
var range: String
var status: IndicatorStatus
var sourcePageIndex: Int?
var sourceBoxX: Double?
var sourceBoxY: Double?
var sourceBoxWidth: Double?
var sourceBoxHeight: Double?
}
/// = ,UI 退
var isEmpty: Bool { indicators.isEmpty }
/// ,退 UI
/// nonisolated: MainActor , CaptureService(actor) extractReportMeta
/// actor , nonisolated (Swift 6)
nonisolated static func empty(date: Date = .now) -> ParsedReport {
ParsedReport(
title: "",
typeRaw: ReportType.other.rawValue,
reportDate: date,
institution: "",
summary: "",
pageCount: 1,
indicators: []
)
}
}
/// CaptureService UI (退 vs )
enum CaptureError: Error, LocalizedError {
case modelNotReady
case inferenceFailed(String)
case parseFailed(String)
var errorDescription: String? {
switch self {
case .modelNotReady: return String(appLoc: "VL 模型尚未就绪")
case .inferenceFailed(let m): return String(appLoc: "识别失败:\(m)")
case .parseFailed(let m): return String(appLoc: "结构化失败:\(m)")
}
}
}
/// `CaptureService` actor AIRuntime( actor),
/// stateless, §3.1 ""
actor CaptureService {
static let shared = CaptureService()
private init() {}
/// Vault Asset VL, ParsedReport
/// :
/// - UnifiedCaptureFlow (UI ,/ assets )
/// -
/// - C2(W5)
/// SwiftData (MainActor), `Report.applyReanalyzed(_:in:)`
/// @Model , Sendable actor
func reanalyze(assets: [FileVault.SavedAsset]) async throws -> ParsedReport {
try await runVL(on: assets)
}
/// meta :**,**
/// 2B OOM(jetsam = ),
/// :Vision OCR(,<1s/) LLM {title,type,date,institution}(~50 token)
/// :OCR / / ( meta, recognized:false),(§3.2)
/// indicators
func extractReportMeta(assets: [FileVault.SavedAsset]) async -> (meta: ParsedReport, recognized: Bool) {
let urls = assets.map { FileVault.shared.rootURL.appendingPathComponent($0.relativePath) }
let ocr = await Self.ocrReference(for: urls)
guard !ocr.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
return (.empty(), false)
}
do {
try await AIRuntime.shared.prepare() // LLM();OOM VL
} catch {
return (.empty(), false)
}
var collected = ""
do {
// meta ,256 token , 2048
let stream = await AIRuntime.shared.generate(prompt: VLPrompts.reportMetaFromText(ocr),
maxTokens: 256)
for try await chunk in stream { collected += chunk.text }
} catch {
return (.empty(), false)
}
let cleaned = CaptureService.stripThink(collected)
guard var parsed = try? CaptureService.parseReportJSON(cleaned, pageCount: assets.count) else {
return (.empty(), false)
}
// meta + ,
parsed.indicators = []
return (parsed, true)
}
/// OCR : Vision OCR LLM(Qwen3-1.7B)
/// Report; `CaptureError`,UI 退(§3.2)
/// (MainActor) OCR,OCR actor, UIImage actor
func recognizeIndicators(fromOCRText text: String) async throws -> [ParsedReport.ParsedIndicator] {
do {
try await AIRuntime.shared.prepare() // LLM( VL,OOM )
} catch {
throw CaptureError.modelNotReady
}
let prompt = VLPrompts.indicatorsFromText(text)
var collected = ""
do {
// , token;LLM VL AIRuntime
let stream = await AIRuntime.shared.generate(prompt: prompt, maxTokens: 2048)
for try await chunk in stream {
collected += chunk.text
}
} catch {
throw CaptureError.inferenceFailed("\(error)")
}
// Qwen3 <think></think>, JSON
let cleaned = CaptureService.stripThink(collected)
#if DEBUG
// :( <think>)+ strip ,/ JSON
// NSLog() print(stdout Xcode lldb ,idevicesyslog )
NSLog("KKDBG-VL RAW LLM output (%d chars):\n%@\n--- end RAW ---", collected.count, collected)
NSLog("KKDBG-VL cleaned (%d chars):\n%@\n--- end cleaned ---", cleaned.count, cleaned)
#endif
do {
return try CaptureService.parseIndicatorsJSON(cleaned)
} catch let CaptureError.parseFailed(msg) {
// ,便( / strip / )
let rawLen = collected.count
let cleanLen = cleaned.count
let preview = cleaned.isEmpty ? "(strip 后为空)" : String(cleaned.prefix(60))
throw CaptureError.parseFailed("\(msg)raw \(rawLen)字/clean \(cleanLen)字·前缀:\(preview)")
} catch {
throw CaptureError.parseFailed("\(error)")
}
}
/// Qwen3 <think></think>( / / ), trim
/// HealthExportService.stripThinkBlocks , MainActor actor, nonisolated
nonisolated static func stripThink(_ raw: String) -> String {
var s = raw
while let openR = s.range(of: "<think>"),
let closeR = s.range(of: "</think>", range: openR.upperBound..<s.endIndex) {
s.removeSubrange(openR.lowerBound..<closeR.upperBound)
}
if let openR = s.range(of: "<think>") { s = String(s[..<openR.lowerBound]) }
if let closeR = s.range(of: "</think>") { s = String(s[closeR.upperBound...]) }
while let first = s.first, first.isWhitespace { s.removeFirst() }
return s
}
/// VL + JSON assets Vault
private func runVL(on assets: [FileVault.SavedAsset]) async throws -> ParsedReport {
do {
try await AIRuntime.shared.prepareVL()
} catch {
throw CaptureError.modelNotReady
}
let urls = assets.map { FileVault.shared.rootURL.appendingPathComponent($0.relativePath) }
// OCR (Vision ,<1s/): 2B ,
// 退,(§3.2)
let ocr = await Self.ocrReference(for: urls)
let raw: String
do {
raw = try await AIRuntime.shared.analyzeReport(
imageURLs: urls,
prompt: VLPrompts.reportExtraction(ocrText: ocr)
)
} catch {
throw CaptureError.inferenceFailed("\(error)")
}
do {
return try CaptureService.parseReportJSON(raw, pageCount: assets.count)
} catch let CaptureError.parseFailed(msg) {
throw CaptureError.parseFailed(msg)
} catch {
throw CaptureError.parseFailed("\(error)")
}
}
/// Vault OCR 4 ;/ ""
/// ImageIO CGImage( UIImage, actor Sendable )
private static func ocrReference(for urls: [URL]) async -> String {
var pages: [String] = []
for (idx, url) in urls.prefix(4).enumerated() {
guard let src = CGImageSourceCreateWithURL(url as CFURL, nil) else { continue }
// OCR : 4000px 48MB, VL ,
// jetsam 3000px Vision,;
// VL ,OCR ,
let thumbOptions: [CFString: Any] = [
kCGImageSourceCreateThumbnailFromImageAlways: true,
kCGImageSourceCreateThumbnailWithTransform: true,
kCGImageSourceShouldCacheImmediately: true,
kCGImageSourceThumbnailMaxPixelSize: 3000
]
guard let cg = CGImageSourceCreateThumbnailAtIndex(src, 0, thumbOptions as CFDictionary) else { continue }
guard let text = try? await OCRService.recognizeText(in: cg),
!text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { continue }
pages.append(urls.count > 1 ? "【第 \(idx + 1) 页】\n\(text)" : text)
}
return pages.joined(separator: "\n")
}
// MARK: - JSON parse(static + 便)
/// VL JSON
/// :
/// - ```json``` markdown
/// - JSON
/// -
/// indicator , ParsedReport.isEmpty = true,
/// UI
static func parseReportJSON(_ raw: String, pageCount: Int = 1) throws -> ParsedReport {
// extractBalancedJSON( {} extractJSONObject):VL
// [{...},{...}], { , indicator
// indicators
let jsonString = repairJSON(extractBalancedJSON(from: raw))
guard let data = jsonString.data(using: .utf8) else {
throw CaptureError.parseFailed("非 UTF-8 输出")
}
let obj: Any
do {
obj = try JSONSerialization.jsonObject(with: data, options: [.fragmentsAllowed])
} catch {
throw CaptureError.parseFailed("JSON 不合法:\(error.localizedDescription)")
}
let dict: [String: Any]
if let d = obj as? [String: Any] {
dict = d
} else if let arr = obj as? [[String: Any]] {
dict = ["indicators": arr]
} else {
throw CaptureError.parseFailed("根节点既不是对象也不是数组")
}
let title = (dict["title"] as? String)?.trimmingCharacters(in: .whitespaces) ?? ""
let typeRaw = parseReportType(dict["type"] as? String)
let reportDate = parseDate(dict["report_date"] as? String) ?? .now
let institution = (dict["institution"] as? String) ?? ""
let summary = (dict["summary"] as? String) ?? ""
let pages = (dict["page_count"] as? Int) ?? pageCount
let indicatorsRaw = arrayValue(dict, keys: ["indicators", "indicator", "items", "指标", "指标列表", "项目"])
let indicators: [ParsedReport.ParsedIndicator] = indicatorsRaw.compactMap {
parseIndicator($0)
}
return ParsedReport(
title: title.isEmpty ? String(appLoc: "拍摄识别") : title,
typeRaw: typeRaw,
reportDate: reportDate,
institution: institution,
summary: summary,
pageCount: max(pages, pageCount),
indicators: indicators
)
}
/// :VL `{"indicators":[...]}`, indicators
/// `extractJSONObject` + `parseIndicator` indicator (),
/// UI ,JSON `parseFailed`
static func parseIndicatorsJSON(_ raw: String) throws -> [ParsedReport.ParsedIndicator] {
let jsonString = repairJSON(extractBalancedJSON(from: raw))
guard let data = jsonString.data(using: .utf8) else {
throw CaptureError.parseFailed("非 UTF-8 输出")
}
let obj: Any
do {
obj = try JSONSerialization.jsonObject(with: data, options: [.fragmentsAllowed])
} catch {
throw CaptureError.parseFailed("JSON 不合法:\(error.localizedDescription)")
}
// :{"indicators":[...]} [...]( key)
let indicatorsRaw: [[String: Any]]
if let dict = obj as? [String: Any] {
indicatorsRaw = arrayValue(dict, keys: ["indicators", "indicator", "items", "指标", "指标列表", "项目"])
} else if let arr = obj as? [[String: Any]] {
indicatorsRaw = arr
} else {
throw CaptureError.parseFailed("根节点既不是对象也不是数组")
}
return indicatorsRaw.compactMap { parseIndicator($0) }
}
/// {...} markdown
/// ( JSONSerialization )
static func extractJSONObject(from raw: String) -> String {
var s = raw.trimmingCharacters(in: .whitespacesAndNewlines)
// markdown
if s.hasPrefix("```") {
// ```json ```
if let firstNewline = s.firstIndex(of: "\n") {
s = String(s[s.index(after: firstNewline)...])
}
// ```
if let endRange = s.range(of: "```", options: .backwards) {
s = String(s[..<endRange.lowerBound])
}
s = s.trimmingCharacters(in: .whitespacesAndNewlines)
}
// {, }
guard let start = s.firstIndex(of: "{") else { return s }
var depth = 0
var inString = false
var escape = false
var idx = start
while idx < s.endIndex {
let ch = s[idx]
if escape { escape = false }
else if ch == "\\" { escape = true }
else if ch == "\"" { inString.toggle() }
else if !inString {
if ch == "{" { depth += 1 }
else if ch == "}" {
depth -= 1
if depth == 0 {
return String(s[start...idx])
}
}
}
idx = s.index(after: idx)
}
return String(s[start...])
}
/// (2B) JSON , JSONSerialization :
/// - "( key/value )
/// - /(`,}` / `,]` `}` / `]`)
/// ;,
static func repairJSON(_ s: String) -> String {
var t = s
t = t.replacingOccurrences(of: "\u{201C}", with: "\"") //
t = t.replacingOccurrences(of: "\u{201D}", with: "\"") //
if let re = try? NSRegularExpression(pattern: ",\\s*([}\\]])") {
t = re.stringByReplacingMatches(
in: t, range: NSRange(t.startIndex..., in: t), withTemplate: "$1")
}
return t
}
/// JSON ,`{...}` `[...]`
/// ( `{"indicators":[...]}` `[...]`)
/// ( JSONSerialization )
static func extractBalancedJSON(from raw: String) -> String {
var s = raw.trimmingCharacters(in: .whitespacesAndNewlines)
if s.hasPrefix("```") {
if let firstNewline = s.firstIndex(of: "\n") {
s = String(s[s.index(after: firstNewline)...])
}
if let endRange = s.range(of: "```", options: .backwards) {
s = String(s[..<endRange.lowerBound])
}
s = s.trimmingCharacters(in: .whitespacesAndNewlines)
}
let firstBrace = s.firstIndex(of: "{")
let firstBracket = s.firstIndex(of: "[")
let start: String.Index
let open: Character
let close: Character
switch (firstBrace, firstBracket) {
case let (b?, k?):
if b < k { start = b; open = "{"; close = "}" }
else { start = k; open = "["; close = "]" }
case let (b?, nil): start = b; open = "{"; close = "}"
case let (nil, k?): start = k; open = "["; close = "]"
default: return s
}
var depth = 0
var inString = false
var escape = false
var idx = start
while idx < s.endIndex {
let ch = s[idx]
if escape { escape = false }
else if ch == "\\" { escape = true }
else if ch == "\"" { inString.toggle() }
else if !inString {
if ch == open { depth += 1 }
else if ch == close {
depth -= 1
if depth == 0 { return String(s[start...idx]) }
}
}
idx = s.index(after: idx)
}
return String(s[start...])
}
private static func parseReportType(_ raw: String?) -> String {
guard let raw = raw?.lowercased() else { return ReportType.other.rawValue }
return ReportType(rawValue: raw)?.rawValue ?? ReportType.other.rawValue
}
private static func parseDate(_ raw: String?) -> Date? {
guard let s = raw?.trimmingCharacters(in: .whitespaces), !s.isEmpty else { return nil }
let f = DateFormatter()
f.locale = Locale(identifier: "en_US_POSIX")
// VL ;,退(parseReportJSON
// ?? .now) reportDate (C1)
let patterns = ["yyyy-MM-dd", "yyyy/MM/dd", "yyyy.MM.dd",
"yyyy年MM月dd日", "yyyy年M月d日", "yyyy年MM月", "yyyy-MM", "yyyy/MM"]
for p in patterns {
f.dateFormat = p
if let d = f.date(from: s) { return d }
}
return nil
}
private static func parseIndicator(_ d: [String: Any]) -> ParsedReport.ParsedIndicator? {
guard let name = stringValue(d, keys: ["name", "item", "indicator", "test", "项目", "指标", "指标名", "指标名称", "检查项目", "检验项目"])?.trimmingCharacters(in: .whitespaces),
!name.isEmpty else { return nil }
let value: String
if let v = stringValue(d, keys: ["value", "result", "reading", "结果", "数值", "检测值", "测定值"]) { value = v }
else { value = "" }
let unit = stringValue(d, keys: ["unit", "单位"]) ?? ""
let range = stringValue(d, keys: ["range", "reference", "reference_range", "ref", "参考", "参考值", "参考范围", "正常范围"]) ?? ""
let statusRaw = stringValue(d, keys: ["status", "flag", "abnormal", "异常", "提示", "标记"])
let status = parseIndicatorStatus(raw: statusRaw, value: value, range: range)
let evidence = parseEvidenceLocation(d)
return .init(
name: name,
value: value,
unit: unit,
range: range,
status: status,
sourcePageIndex: evidence?.pageIndex,
sourceBoxX: evidence?.box.x,
sourceBoxY: evidence?.box.y,
sourceBoxWidth: evidence?.box.width,
sourceBoxHeight: evidence?.box.height
)
}
private static func parseEvidenceLocation(_ d: [String: Any]) -> (pageIndex: Int, box: (x: Double, y: Double, width: Double, height: Double))? {
guard let page = intValue(d, keys: ["source_page", "sourcePage", "page", "页码", "来源页码"]),
page >= 1,
let box = numberArrayValue(d, keys: ["source_box", "sourceBox", "box", "bbox", "位置", "来源位置"]),
box.count == 4 else {
return nil
}
let x = box[0]
let y = box[1]
let width = box[2]
let height = box[3]
guard x >= 0, y >= 0, width > 0, height > 0, x + width <= 1, y + height <= 1 else {
return nil
}
return (page - 1, (x, y, width, height))
}
private static func stringValue(_ d: [String: Any], keys: [String]) -> String? {
for key in keys {
if let s = d[key] as? String {
return s
}
if let n = d[key] as? NSNumber {
return n.stringValue
}
}
return nil
}
private static func intValue(_ d: [String: Any], keys: [String]) -> Int? {
for key in keys {
if let i = d[key] as? Int {
return i
}
if let n = d[key] as? NSNumber {
return n.intValue
}
if let s = d[key] as? String, let i = Int(s.trimmingCharacters(in: .whitespacesAndNewlines)) {
return i
}
}
return nil
}
private static func numberArrayValue(_ d: [String: Any], keys: [String]) -> [Double]? {
for key in keys {
if let arr = d[key] as? [Double] {
return arr
}
if let arr = d[key] as? [NSNumber] {
return arr.map(\.doubleValue)
}
if let arr = d[key] as? [Any] {
let values = arr.compactMap { item -> Double? in
if let d = item as? Double { return d }
if let n = item as? NSNumber { return n.doubleValue }
if let s = item as? String { return Double(s.trimmingCharacters(in: .whitespacesAndNewlines)) }
return nil
}
if values.count == arr.count {
return values
}
}
}
return nil
}
private static func arrayValue(_ d: [String: Any], keys: [String]) -> [[String: Any]] {
for key in keys {
if let arr = d[key] as? [[String: Any]] {
return arr
}
}
return []
}
private static func parseIndicatorStatus(raw: String?, value: String, range: String) -> IndicatorStatus {
let normalized = raw?
.trimmingCharacters(in: .whitespacesAndNewlines)
.lowercased() ?? ""
if ["high", "h", "hi", "above", "up", "", "", "+", "偏高", "", "增高", "升高", "偏高↑", "h↑"].contains(normalized) {
return .high
}
if ["low", "l", "lo", "below", "down", "", "", "-", "偏低", "", "降低", "偏低↓", "l↓"].contains(normalized) {
return .low
}
if ["normal", "n", "ok", "正常", "阴性", "无异常"].contains(normalized) {
return .normal
}
return inferStatus(value: value, range: range) ?? .normal
}
private static func inferStatus(value: String, range: String) -> IndicatorStatus? {
guard let v = firstNumber(in: value) else { return nil }
let compact = range
.replacingOccurrences(of: "", with: "-")
.replacingOccurrences(of: "", with: "-")
.replacingOccurrences(of: "", with: "-")
.replacingOccurrences(of: "~", with: "-")
.replacingOccurrences(of: "", with: "-")
.trimmingCharacters(in: .whitespacesAndNewlines)
guard !compact.isEmpty else { return nil }
let numbers = numbers(in: compact)
if compact.contains("<") || compact.contains("") || compact.contains("") {
guard let upper = numbers.first else { return nil }
return v > upper ? .high : .normal
}
if compact.contains(">") || compact.contains("") || compact.contains("") {
guard let lower = numbers.first else { return nil }
return v < lower ? .low : .normal
}
if numbers.count >= 2 {
let lower = numbers[0]
let upper = numbers[1]
if v < lower { return .low }
if v > upper { return .high }
return .normal
}
return nil
}
private static func firstNumber(in text: String) -> Double? {
numbers(in: text).first
}
private static func numbers(in text: String) -> [Double] {
let pattern = #"-?\d+(?:\.\d+)?"#
guard let regex = try? NSRegularExpression(pattern: pattern) else { return [] }
let ns = text as NSString
let range = NSRange(location: 0, length: ns.length)
return regex.matches(in: text, range: range).compactMap {
Double(ns.substring(with: $0.range))
}
}
}
// MARK: - Report CaptureService (MainActor )
//
// CaptureService actor, Report(@Model Sendable)
// C2UI :
// ```
// let assets = report.savedAssets
// let parsed = try await CaptureService.shared.reanalyze(assets: assets)
// report.applyReanalyzed(parsed, in: ctx)
// ```
extension Report {
/// Asset SavedAsset, CaptureService.reanalyze
var savedAssets: [FileVault.SavedAsset] {
assets.map { .init(relativePath: $0.relativePath, bytes: $0.bytes) }
}
/// VL Report
/// - indicators:,(cascade delete )
/// - summary / institution:,
/// MainActor / SwiftData
@MainActor
func applyReanalyzed(_ parsed: ParsedReport, in ctx: ModelContext) {
if !parsed.summary.isEmpty {
self.summary = parsed.summary
}
if !parsed.institution.isEmpty {
self.institution = parsed.institution
}
// indicators Asset() nullify cascade,
// unlink Vault + Asset ,( §6 )
// TimelineEntryDetailView.deleteIndicator
for old in indicators {
if let asset = old.asset {
try? FileVault.shared.remove(relativePath: asset.relativePath)
ctx.delete(asset)
}
ctx.delete(old)
}
indicators.removeAll()
// indicators
for p in parsed.indicators {
let i = Indicator(
name: p.name,
value: p.value,
unit: p.unit,
range: p.range,
status: p.status,
capturedAt: reportDate,
report: self,
source: .report,
sourcePageIndex: p.sourcePageIndex,
sourceBoxX: p.sourceBoxX,
sourceBoxY: p.sourceBoxY,
sourceBoxWidth: p.sourceBoxWidth,
sourceBoxHeight: p.sourceBoxHeight
)
ctx.insert(i)
}
try? ctx.save()
}
}