feat(quick): 异常项快拍改为局部小框 + VL 识别

将「异常项快拍」从复用整页报告归档流程,改造成独立的局部识别路径:
小框拍局部 → Qwen-VL 只抽 indicators → 用户确认逐项编辑 → 存成独立
Indicator(不建 Report、不留原图,与「记录指标」统一落库)。

- RegionCameraView: AVFoundation 实时预览 + 居中小框,快门后按
  metadataOutputRectConverted 裁剪到框内区域;含裁剪纯函数与权限态。
- VLPrompts.regionExtraction(): 局部识别 prompt,严格 JSON 只要 indicators。
- CaptureService.recognizeRegion(): 临时文件推理后即删,不写 Vault;
  新增 parseIndicatorsJSON / extractBalancedJSON 解析容错。
- QuickRegionConfirmView: 异常项高亮置顶、默认勾选,可编辑/增删/选纳入。
- QuickRegionCaptureFlow: 状态机 idle→analyzing→confirm,30s 超时回退手动。
- RootView: .quick 路由改指向新流程(.archive 仍走 UnifiedCaptureFlow)。
- 删除 5 个无引用的旧 mockup(A1/A2/A3/SmartFramer/QuickCaptureFlow)。

模拟器无相机退化为相册整图;小框裁剪坐标需真机验证。
设计见 docs/superpowers/specs/2026-05-31-abnormal-quick-capture-design.md

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
link2026
2026-05-31 17:12:36 +08:00
parent da6223e051
commit adb589af16
12 changed files with 1163 additions and 625 deletions

View File

@@ -70,6 +70,45 @@ actor CaptureService {
try await runVL(on: assets)
}
/// :****(JPEG data) VL, indicators, Report
/// - `NSTemporaryDirectory`(`.completeFileProtection`), `defer`
/// (§ )线(§6), Vault Asset
/// - `CaptureError`,UI 退(§3.2 退线)
/// (MainActor) Indicator
func recognizeRegion(imageData: Data) async throws -> [ParsedReport.ParsedIndicator] {
do {
try await AIRuntime.shared.prepareVL()
} catch {
throw CaptureError.modelNotReady
}
let tmpURL = URL(fileURLWithPath: NSTemporaryDirectory())
.appendingPathComponent("region-\(UUID().uuidString).jpg")
do {
try imageData.write(to: tmpURL, options: [.completeFileProtection, .atomic])
} catch {
throw CaptureError.inferenceFailed("临时图片写入失败:\(error.localizedDescription)")
}
defer { try? FileManager.default.removeItem(at: tmpURL) }
let raw: String
do {
raw = try await AIRuntime.shared.analyzeReport(
imageURLs: [tmpURL],
prompt: VLPrompts.regionExtraction()
)
} catch {
throw CaptureError.inferenceFailed("\(error)")
}
do {
return try CaptureService.parseIndicatorsJSON(raw)
} catch let CaptureError.parseFailed(msg) {
throw CaptureError.parseFailed(msg)
} catch {
throw CaptureError.parseFailed("\(error)")
}
}
/// VL + JSON assets Vault
private func runVL(on assets: [FileVault.SavedAsset]) async throws -> ParsedReport {
do {
@@ -143,6 +182,32 @@ actor CaptureService {
)
}
/// :VL `{"indicators":[...]}`, indicators
/// `extractJSONObject` + `parseIndicator` indicator (),
/// UI ,JSON `parseFailed`
static func parseIndicatorsJSON(_ raw: String) throws -> [ParsedReport.ParsedIndicator] {
let jsonString = extractBalancedJSON(from: raw)
guard let data = jsonString.data(using: .utf8) else {
throw CaptureError.parseFailed("非 UTF-8 输出")
}
let obj: Any
do {
obj = try JSONSerialization.jsonObject(with: data, options: [.fragmentsAllowed])
} catch {
throw CaptureError.parseFailed("JSON 不合法:\(error.localizedDescription)")
}
// :{"indicators":[...]} [...]( key)
let indicatorsRaw: [[String: Any]]
if let dict = obj as? [String: Any] {
indicatorsRaw = (dict["indicators"] as? [[String: Any]]) ?? []
} else if let arr = obj as? [[String: Any]] {
indicatorsRaw = arr
} else {
throw CaptureError.parseFailed("根节点既不是对象也不是数组")
}
return indicatorsRaw.compactMap { parseIndicator($0) }
}
/// {...} markdown
/// ( JSONSerialization )
static func extractJSONObject(from raw: String) -> String {
@@ -186,6 +251,56 @@ actor CaptureService {
return String(s[start...])
}
/// JSON ,`{...}` `[...]`
/// ( `{"indicators":[...]}` `[...]`)
/// ( JSONSerialization )
static func extractBalancedJSON(from raw: String) -> String {
var s = raw.trimmingCharacters(in: .whitespacesAndNewlines)
if s.hasPrefix("```") {
if let firstNewline = s.firstIndex(of: "\n") {
s = String(s[s.index(after: firstNewline)...])
}
if let endRange = s.range(of: "```", options: .backwards) {
s = String(s[..<endRange.lowerBound])
}
s = s.trimmingCharacters(in: .whitespacesAndNewlines)
}
let firstBrace = s.firstIndex(of: "{")
let firstBracket = s.firstIndex(of: "[")
let start: String.Index
let open: Character
let close: Character
switch (firstBrace, firstBracket) {
case let (b?, k?):
if b < k { start = b; open = "{"; close = "}" }
else { start = k; open = "["; close = "]" }
case let (b?, nil): start = b; open = "{"; close = "}"
case let (nil, k?): start = k; open = "["; close = "]"
default: return s
}
var depth = 0
var inString = false
var escape = false
var idx = start
while idx < s.endIndex {
let ch = s[idx]
if escape { escape = false }
else if ch == "\\" { escape = true }
else if ch == "\"" { inString.toggle() }
else if !inString {
if ch == open { depth += 1 }
else if ch == close {
depth -= 1
if depth == 0 { return String(s[start...idx]) }
}
}
idx = s.index(after: idx)
}
return String(s[start...])
}
private static func parseReportType(_ raw: String?) -> String {
guard let raw = raw?.lowercased() else { return ReportType.other.rawValue }
return ReportType(rawValue: raw)?.rawValue ?? ReportType.other.rawValue