diff --git a/康康/AI/Prompts/VLPrompts.swift b/康康/AI/Prompts/VLPrompts.swift index 20d0801..43a2dcc 100644 --- a/康康/AI/Prompts/VLPrompts.swift +++ b/康康/AI/Prompts/VLPrompts.swift @@ -31,12 +31,38 @@ nonisolated enum VLPrompts { /// VL 模型不知"今天"是哪天,且 few-shot 示例里写死了日期, /// 必须把当天日期显式注入 prompt,模型在无报告日期时才会用对正确的回退值。 - static func reportExtraction(today: Date = .now) -> String { + /// ocrText 非空时把 Vision OCR 的结果作为参考文本注入 —— Vision 抄数字比 + /// 2B 多模态读密集小字稳;版面与表格结构仍以图片为准。 + static func reportExtraction(today: Date = .now, ocrText: String = "") -> String { let f = DateFormatter() f.locale = Locale(identifier: "en_US_POSIX") f.dateFormat = "yyyy-MM-dd" let todayStr = f.string(from: today) - return reportExtractionTemplate.replacingOccurrences(of: "{{TODAY}}", with: todayStr) + let ocrSection: String + if ocrText.isEmpty { + ocrSection = "" + } else { + ocrSection = """ + + + OCR 参考文本(系统对同一报告做文字识别的结果,可能有错字、串行或漏行;版面与表格结构以图片为准,但数值、小数点以 OCR 文字更可靠): + \(clipOCR(ocrText)) + + """ + } + return reportExtractionTemplate + .replacingOccurrences(of: "{{TODAY}}", with: todayStr) + .replacingOccurrences(of: "{{OCR_SECTION}}", with: ocrSection) + } + + /// OCR 文本截断:限制进入 prompt 的体量(2B 模型上下文有限)。截到最后一个完整行。 + static func clipOCR(_ text: String, limit: Int = 1800) -> String { + guard text.count > limit else { return text } + let clipped = String(text.prefix(limit)) + if let lastNewline = clipped.lastIndex(of: "\n") { + return String(clipped[.. String { + var pages: [String] = [] + for (idx, url) in urls.prefix(4).enumerated() { + guard let src = CGImageSourceCreateWithURL(url as CFURL, nil), + let cg = CGImageSourceCreateImageAtIndex(src, 0, nil) else { continue } + guard let text = try? await OCRService.recognizeText(in: cg), + !text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { continue } + pages.append(urls.count > 1 ? "【第 \(idx + 1) 页】\n\(text)" : text) + } + return pages.joined(separator: "\n") + } + // MARK: - JSON parse(static + 纯函数 → 方便单测) /// 从 VL 输出里抠出第一段合法 JSON 对象并解析。 diff --git a/康康Tests/VLPromptsOCRTests.swift b/康康Tests/VLPromptsOCRTests.swift new file mode 100644 index 0000000..c5a37a8 --- /dev/null +++ b/康康Tests/VLPromptsOCRTests.swift @@ -0,0 +1,32 @@ +import Testing +@testable import 康康 + +struct VLPromptsOCRTests { + + @Test func emptyOCRKeepsPromptClean() { + let p = VLPrompts.reportExtraction(ocrText: "") + #expect(!p.contains("OCR 参考文本")) + #expect(!p.contains("{{OCR_SECTION}}")) + #expect(p.contains("现在请识别图片并输出 JSON")) + } + + @Test func ocrTextIsInjectedBeforeFinalInstruction() { + let p = VLPrompts.reportExtraction(ocrText: "尿酸 486 208-428 μmol/L") + #expect(p.contains("OCR 参考文本")) + #expect(p.contains("尿酸 486")) + let ocrPos = p.range(of: "尿酸 486")!.lowerBound + let endPos = p.range(of: "现在请识别图片并输出 JSON")!.lowerBound + #expect(ocrPos < endPos) + } + + @Test func clipKeepsShortTextIntact() { + #expect(VLPrompts.clipOCR("短文本") == "短文本") + } + + @Test func clipCutsAtLineBoundary() { + let long = Array(repeating: "指标行 1.23 mmol/L", count: 400).joined(separator: "\n") + let clipped = VLPrompts.clipOCR(long, limit: 200) + #expect(clipped.count < 260) + #expect(clipped.hasSuffix("(后续内容过长已截断)")) + } +}