feat(Capture): 报告识别注入 Vision OCR 参考文本,提升 2B 多模态数字准确率

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
link2026
2026-06-10 07:12:48 +08:00
parent 0dd60d6021
commit 77139f5e32
3 changed files with 80 additions and 4 deletions

View File

@@ -31,12 +31,38 @@ nonisolated enum VLPrompts {
/// VL "", few-shot ,
/// prompt,退
static func reportExtraction(today: Date = .now) -> String {
/// ocrText Vision OCR Vision
/// 2B ;
static func reportExtraction(today: Date = .now, ocrText: String = "") -> String {
let f = DateFormatter()
f.locale = Locale(identifier: "en_US_POSIX")
f.dateFormat = "yyyy-MM-dd"
let todayStr = f.string(from: today)
return reportExtractionTemplate.replacingOccurrences(of: "{{TODAY}}", with: todayStr)
let ocrSection: String
if ocrText.isEmpty {
ocrSection = ""
} else {
ocrSection = """
OCR 参考文本(系统对同一报告做文字识别的结果,可能有错字、串行或漏行;版面与表格结构以图片为准,但数值、小数点以 OCR 文字更可靠):
\(clipOCR(ocrText))
"""
}
return reportExtractionTemplate
.replacingOccurrences(of: "{{TODAY}}", with: todayStr)
.replacingOccurrences(of: "{{OCR_SECTION}}", with: ocrSection)
}
/// OCR : prompt (2B )
static func clipOCR(_ text: String, limit: Int = 1800) -> String {
guard text.count > limit else { return text }
let clipped = String(text.prefix(limit))
if let lastNewline = clipped.lastIndex(of: "\n") {
return String(clipped[..<lastNewline]) + "\n(后续内容过长已截断)"
}
return clipped + "\n(后续内容过长已截断)"
}
private static let reportExtractionTemplate: String = #"""
@@ -84,7 +110,7 @@ JSON schema(严格):
输入: 一份春季体检,3 项可读
输出:
{"title":"","type":"checkup","report_date":"2026-04-12","institution":"","page_count":1,"summary":"","indicators":[{"name":"","value":"3.84","unit":"mmol/L","range":"< 3.40","status":"high","source_page":1,"source_box":[0.12,0.31,0.76,0.07]},{"name":"","value":"32","unit":"U/L","range":"9 - 50","status":"normal","source_page":1,"source_box":[0.12,0.39,0.76,0.07]},{"name":"","value":"5.2","unit":"mmol/L","range":"3.9 - 6.1","status":"normal","source_page":1,"source_box":[0.12,0.47,0.76,0.07]}]}
{{OCR_SECTION}}
现在请识别图片并输出 JSON:
"""#