From 77139f5e32bc7c1101312f63b394c6ba3823660d Mon Sep 17 00:00:00 2001 From: link2026 Date: Wed, 10 Jun 2026 07:12:48 +0800 Subject: [PATCH] =?UTF-8?q?feat(Capture):=20=E6=8A=A5=E5=91=8A=E8=AF=86?= =?UTF-8?q?=E5=88=AB=E6=B3=A8=E5=85=A5=20Vision=20OCR=20=E5=8F=82=E8=80=83?= =?UTF-8?q?=E6=96=87=E6=9C=AC,=E6=8F=90=E5=8D=87=202B=20=E5=A4=9A=E6=A8=A1?= =?UTF-8?q?=E6=80=81=E6=95=B0=E5=AD=97=E5=87=86=E7=A1=AE=E7=8E=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 --- 康康/AI/Prompts/VLPrompts.swift | 32 +++++++++++++++++++++++++++--- 康康/Services/CaptureService.swift | 20 ++++++++++++++++++- 康康Tests/VLPromptsOCRTests.swift | 32 ++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 康康Tests/VLPromptsOCRTests.swift diff --git a/康康/AI/Prompts/VLPrompts.swift b/康康/AI/Prompts/VLPrompts.swift index 20d0801..43a2dcc 100644 --- a/康康/AI/Prompts/VLPrompts.swift +++ b/康康/AI/Prompts/VLPrompts.swift @@ -31,12 +31,38 @@ nonisolated enum VLPrompts { /// VL 模型不知"今天"是哪天,且 few-shot 示例里写死了日期, /// 必须把当天日期显式注入 prompt,模型在无报告日期时才会用对正确的回退值。 - static func reportExtraction(today: Date = .now) -> String { + /// ocrText 非空时把 Vision OCR 的结果作为参考文本注入 —— Vision 抄数字比 + /// 2B 多模态读密集小字稳;版面与表格结构仍以图片为准。 + static func reportExtraction(today: Date = .now, ocrText: String = "") -> String { let f = DateFormatter() f.locale = Locale(identifier: "en_US_POSIX") f.dateFormat = "yyyy-MM-dd" let todayStr = f.string(from: today) - return reportExtractionTemplate.replacingOccurrences(of: "{{TODAY}}", with: todayStr) + let ocrSection: String + if ocrText.isEmpty { + ocrSection = "" + } else { + ocrSection = """ + + + OCR 参考文本(系统对同一报告做文字识别的结果,可能有错字、串行或漏行;版面与表格结构以图片为准,但数值、小数点以 OCR 文字更可靠): + \(clipOCR(ocrText)) + + """ + } + return reportExtractionTemplate + .replacingOccurrences(of: "{{TODAY}}", with: todayStr) + .replacingOccurrences(of: "{{OCR_SECTION}}", with: ocrSection) + } + + /// OCR 文本截断:限制进入 prompt 的体量(2B 模型上下文有限)。截到最后一个完整行。 + static func clipOCR(_ text: String, limit: Int = 1800) -> String { + guard text.count > limit else { return text } + let clipped = String(text.prefix(limit)) + if let lastNewline = clipped.lastIndex(of: "\n") { + return String(clipped[.. String { + var pages: [String] = [] + for (idx, url) in urls.prefix(4).enumerated() { + guard let src = CGImageSourceCreateWithURL(url as CFURL, nil), + let cg = CGImageSourceCreateImageAtIndex(src, 0, nil) else { continue } + guard let text = try? await OCRService.recognizeText(in: cg), + !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { continue } + pages.append(urls.count > 1 ? "【第 \(idx + 1) 页】\n\(text)" : text) + } + return pages.joined(separator: "\n") + } + // MARK: - JSON parse(static + 纯函数 → 方便单测) /// 从 VL 输出里抠出第一段合法 JSON 对象并解析。 diff --git a/康康Tests/VLPromptsOCRTests.swift b/康康Tests/VLPromptsOCRTests.swift new file mode 100644 index 0000000..c5a37a8 --- /dev/null +++ b/康康Tests/VLPromptsOCRTests.swift @@ -0,0 +1,32 @@ +import Testing +@testable import 康康 + +struct VLPromptsOCRTests { + + @Test func emptyOCRKeepsPromptClean() { + let p = VLPrompts.reportExtraction(ocrText: "") + #expect(!p.contains("OCR 参考文本")) + #expect(!p.contains("{{OCR_SECTION}}")) + #expect(p.contains("现在请识别图片并输出 JSON")) + } + + @Test func ocrTextIsInjectedBeforeFinalInstruction() { + let p = VLPrompts.reportExtraction(ocrText: "尿酸 486 208-428 μmol/L") + #expect(p.contains("OCR 参考文本")) + #expect(p.contains("尿酸 486")) + let ocrPos = p.range(of: "尿酸 486")!.lowerBound + let endPos = p.range(of: "现在请识别图片并输出 JSON")!.lowerBound + #expect(ocrPos < endPos) + } + + @Test func clipKeepsShortTextIntact() { + #expect(VLPrompts.clipOCR("短文本") == "短文本") + } + + @Test func clipCutsAtLineBoundary() { + let long = Array(repeating: "指标行 1.23 mmol/L", count: 400).joined(separator: "\n") + let clipped = VLPrompts.clipOCR(long, limit: 200) + #expect(clipped.count < 260) + #expect(clipped.hasSuffix("(后续内容过长已截断)")) + } +}