feat(Capture): 报告识别注入 Vision OCR 参考文本,提升 2B 多模态数字准确率

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
link2026
2026-06-10 07:12:48 +08:00
parent 0dd60d6021
commit 77139f5e32
3 changed files with 80 additions and 4 deletions

View File

@@ -1,5 +1,6 @@
import Foundation
import UIKit
import ImageIO
import SwiftData
/// VL (, SwiftData )
@@ -142,11 +143,14 @@ actor CaptureService {
throw CaptureError.modelNotReady
}
let urls = assets.map { FileVault.shared.rootURL.appendingPathComponent($0.relativePath) }
// OCR (Vision ,<1s/): 2B ,
// 退,(§3.2)
let ocr = await Self.ocrReference(for: urls)
let raw: String
do {
raw = try await AIRuntime.shared.analyzeReport(
imageURLs: urls,
prompt: VLPrompts.reportExtraction()
prompt: VLPrompts.reportExtraction(ocrText: ocr)
)
} catch {
throw CaptureError.inferenceFailed("\(error)")
@@ -160,6 +164,20 @@ actor CaptureService {
}
}
/// Vault OCR 4 ;/ ""
/// ImageIO CGImage( UIImage, actor Sendable )
private static func ocrReference(for urls: [URL]) async -> String {
var pages: [String] = []
for (idx, url) in urls.prefix(4).enumerated() {
guard let src = CGImageSourceCreateWithURL(url as CFURL, nil),
let cg = CGImageSourceCreateImageAtIndex(src, 0, nil) else { continue }
guard let text = try? await OCRService.recognizeText(in: cg),
!text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { continue }
pages.append(urls.count > 1 ? "【第 \(idx + 1) 页】\n\(text)" : text)
}
return pages.joined(separator: "\n")
}
// MARK: - JSON parse(static + 便)
/// VL JSON