feat(AI): 统一多模态模型架构,整合文本和视觉推理路径 - 将文本生成和VL(图→文)功能合并到单一的Qwen3.5-4B多模态MNN模型 - 移除独立的Qwen3-VL-4B模型依赖,MLX VL改为使用.llm的多模态模型 - 更新ModelKind枚举,新增userFacing集合用于面向用户展示 - MNN后端现在同时支持文本和视觉任务,模拟器回退到MLX refactor(models): 模型管理和界面调整以适应新的多模态架构 - 更新模型管理界面,只显示统一的Qwen3.5-4B(MNN)模型给用户 - 修改就绪状态检查逻辑,使用ModelKind.userFacing替代allCases - 更新模型文件清单,从Qwen3.5-2B升级到Qwen3.5-4B-4bit - 调整模型管理页面UI,突出MNN+SME2端侧加速功能 feat(camera): 添加拍照识别引擎切换功能 - 实现双路径拍照识别:Apple Vision OCR + 文本模型 和 Qwen3-VL直接识别 - 添加预处理逻辑,优化Qwen3-VL对窄长区域图片的识别效果 - 在模型管理页面添加拍照识别引擎选择组件 - 提供用户界面选项,在两种识别方式间切换 style(ui): 优化输入框样式和颜色主题一致性 - 为指标快速表单添加浅色主题偏好 - 统一所有文本输入框的颜色样式(theme) - 创建EntryInputField组件,替换原有的单行输入+按钮模式 - 实现聊天框风格的条目输入,支持多行自适应和圆形发送按钮 fix(build): 修正Xcode项目配置中的重复框架搜索路径 - 清理project.pbxproj中重复的FRAMEWORK_SEARCH_PATHS配置 - 重新排列Swift桥接头文件配置确保正确引用 - 修复因路径配置重复导致的编译警告问题 test: 增加区域图片预处理和模型清单测试覆盖 - 添加RegionImageCropper.prepareForQwenVL的单元测试 - 验证宽而矮图片的放大和填充逻辑 - 更新ModelManifestTests中的字节数预期值以匹配新模型 - 修正OCRService中VNRecognizedTextObservation类型的处理 ```
205 lines
8.4 KiB
Swift
205 lines
8.4 KiB
Swift
import SwiftUI
|
|
import SwiftData
|
|
import UIKit
|
|
|
|
/// 异常项快拍 · 统一流程。
|
|
/// 整幅单拍(真机)/ 相册(模拟器)→ 静态图手动框选 → 框内 OCR+LLM 抽指标 → 核对 → 存独立 Indicator。
|
|
///
|
|
/// 状态机:
|
|
/// ```
|
|
/// idle(相机/相册) → adjust(静态图框选,可反复识别) → confirm(核对) → save → dismiss
|
|
/// confirm → 重拍 → idle
|
|
/// ```
|
|
/// 识别失败/超时不卡死:停在 adjust 提示挪框重试,或直接进 confirm 手动补(§3.2 失败回退红线)。
|
|
struct QuickRegionCaptureFlow: View {
|
|
@Environment(\.modelContext) private var ctx
|
|
let onClose: () -> Void
|
|
|
|
@AppStorage(QuickRegionRecognitionEngine.storageKey)
|
|
private var recognitionEngineRaw = QuickRegionRecognitionEngine.defaultValue.rawValue
|
|
@State private var phase: Phase = .idle
|
|
|
|
enum Phase {
|
|
case idle
|
|
case adjust(image: UIImage)
|
|
case confirm(image: UIImage?, items: [QuickRegionItem], warning: String?)
|
|
}
|
|
|
|
var body: some View {
|
|
content
|
|
.background(Tj.Palette.sand.ignoresSafeArea())
|
|
}
|
|
|
|
@ViewBuilder
|
|
private var content: some View {
|
|
switch phase {
|
|
case .idle:
|
|
captureEntry
|
|
.ignoresSafeArea()
|
|
|
|
case .adjust(let image):
|
|
RegionAdjustView(
|
|
image: image,
|
|
recognize: { await recognizeRegion($0) },
|
|
onProceed: { items in
|
|
phase = .confirm(image: image, items: items, warning: nil)
|
|
},
|
|
onRetake: { phase = .idle },
|
|
onCancel: { onClose() }
|
|
)
|
|
.ignoresSafeArea()
|
|
|
|
case .confirm(let image, let items, let warning):
|
|
NavigationStack {
|
|
QuickRegionConfirmView(
|
|
image: image,
|
|
items: items,
|
|
warning: warning,
|
|
onSave: { finalItems, capturedAt in save(items: finalItems, capturedAt: capturedAt) },
|
|
onCancel: { onClose() },
|
|
onRetake: { phase = .idle }
|
|
)
|
|
.navigationTitle(String(appLoc: "核对异常项"))
|
|
.navigationBarTitleDisplayMode(.inline)
|
|
.toolbar {
|
|
ToolbarItem(placement: .topBarLeading) {
|
|
Button("取消") { onClose() }
|
|
.foregroundStyle(Tj.Palette.text)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - 入口:整幅单拍(真机)/ 相册(模拟器或无相机)
|
|
|
|
@ViewBuilder
|
|
private var captureEntry: some View {
|
|
#if targetEnvironment(simulator)
|
|
PhotoPickerSheet(
|
|
onFinish: { handlePicked($0) },
|
|
onCancel: onClose
|
|
)
|
|
#else
|
|
SingleShotCameraView(
|
|
onCapture: { phase = .adjust(image: $0) },
|
|
onCancel: onClose
|
|
)
|
|
#endif
|
|
}
|
|
|
|
/// 拍/选回来:取首张进框选;无图则关闭(「只能拍一张」=只用第一张)。
|
|
private func handlePicked(_ images: [UIImage]) {
|
|
if let first = images.first {
|
|
phase = .adjust(image: first)
|
|
} else {
|
|
onClose()
|
|
}
|
|
}
|
|
|
|
// MARK: - 识别(框内子图 → OCR → LLM)
|
|
|
|
/// 对已裁好的框内子图跑识别。失败/超时返回提示文案,绝不抛出(由 RegionAdjustView 展示)。
|
|
/// 链路由「我的 → 模型管理 → 拍照识别引擎」决定:
|
|
/// - Apple Vision:Vision 端侧 OCR → Qwen3-1.7B 结构化抽指标
|
|
/// - Qwen3-VL:局部图片 → Qwen3-VL 直接结构化抽指标
|
|
private func recognizeRegion(_ image: UIImage) async -> (items: [QuickRegionItem], warning: String?) {
|
|
let engine = QuickRegionRecognitionEngine(storedValue: recognitionEngineRaw)
|
|
switch engine {
|
|
case .appleVision:
|
|
return await recognizeWithAppleVision(image)
|
|
case .qwenVL:
|
|
return await recognizeWithQwenVL(image)
|
|
}
|
|
}
|
|
|
|
private func recognizeWithAppleVision(_ image: UIImage) async -> (items: [QuickRegionItem], warning: String?) {
|
|
do {
|
|
let text = try await OCRService.recognizeText(in: image)
|
|
if Task.isCancelled { return ([], nil) } // 超时:文案由调用方给
|
|
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
|
#if DEBUG
|
|
print("🔤 [OCR · region] recognized text:\n\(trimmed)\n--- end OCR ---")
|
|
#endif
|
|
if trimmed.isEmpty {
|
|
return ([], String(appLoc: "没识别到文字,挪一下框再试"))
|
|
}
|
|
let parsed = try await CaptureService.shared.recognizeIndicators(fromOCRText: trimmed)
|
|
if Task.isCancelled { return ([], nil) }
|
|
let items = Self.buildItems(from: parsed)
|
|
return (items, items.isEmpty ? String(appLoc: "没读出指标,挪一下框再试") : nil)
|
|
} catch CaptureError.modelNotReady {
|
|
return ([], String(appLoc: "AI 模型未就绪,手动补充"))
|
|
} catch let CaptureError.parseFailed(msg) {
|
|
return ([], String(appLoc: "解析失败:\(msg)"))
|
|
} catch let CaptureError.inferenceFailed(msg) {
|
|
return ([], Task.isCancelled ? nil : String(appLoc: "识别失败:\(msg)"))
|
|
} catch {
|
|
return ([], Task.isCancelled ? nil : String(appLoc: "未知错误:\(error.localizedDescription)"))
|
|
}
|
|
}
|
|
|
|
private func recognizeWithQwenVL(_ image: UIImage) async -> (items: [QuickRegionItem], warning: String?) {
|
|
let prepared = RegionImageCropper.prepareForQwenVL(image)
|
|
guard let data = prepared.jpegData(compressionQuality: 0.95) else {
|
|
return ([], String(appLoc: "图片编码失败,手动补充"))
|
|
}
|
|
#if DEBUG
|
|
print("🖼️ [Qwen3-VL region] prepared image=\(Int(prepared.size.width))x\(Int(prepared.size.height)), bytes=\(data.count)")
|
|
#endif
|
|
do {
|
|
let parsed = try await CaptureService.shared.recognizeRegion(imageData: data)
|
|
if Task.isCancelled { return ([], nil) }
|
|
let items = Self.buildItems(from: parsed)
|
|
return (items, items.isEmpty ? String(appLoc: "没读出指标,挪一下框再试") : nil)
|
|
} catch CaptureError.modelNotReady {
|
|
return ([], String(appLoc: "模型未就绪,请在模型管理下载或切回 Apple Vision"))
|
|
} catch let CaptureError.parseFailed(msg) {
|
|
return ([], String(appLoc: "解析失败:\(msg)"))
|
|
} catch let CaptureError.inferenceFailed(msg) {
|
|
return ([], Task.isCancelled ? nil : String(appLoc: "识别失败:\(msg)"))
|
|
} catch {
|
|
return ([], Task.isCancelled ? nil : String(appLoc: "未知错误:\(error.localizedDescription)"))
|
|
}
|
|
}
|
|
|
|
/// LLM 结果 → 可编辑行,异常项(high/low)置顶、默认勾选。
|
|
private static func buildItems(from parsed: [ParsedReport.ParsedIndicator]) -> [QuickRegionItem] {
|
|
let mapped = parsed.map {
|
|
QuickRegionItem(name: $0.name, value: $0.value, unit: $0.unit,
|
|
range: $0.range, status: $0.status, include: true)
|
|
}
|
|
return mapped.enumerated().sorted { a, b in
|
|
let aAbn = a.element.status != .normal
|
|
let bAbn = b.element.status != .normal
|
|
if aAbn != bAbn { return aAbn && !bAbn }
|
|
return a.offset < b.offset
|
|
}.map { $0.element }
|
|
}
|
|
|
|
// MARK: - 保存
|
|
|
|
/// 勾选项各存一条独立 Indicator(与「记录指标」自由输入一致):无 Report、无 Asset、无 seriesKey。
|
|
private func save(items: [QuickRegionItem], capturedAt: Date) {
|
|
let selected = items.filter {
|
|
$0.include
|
|
&& !$0.name.trimmingCharacters(in: .whitespaces).isEmpty
|
|
&& !$0.value.trimmingCharacters(in: .whitespaces).isEmpty
|
|
}
|
|
for item in selected {
|
|
let indicator = Indicator(
|
|
name: item.name.trimmingCharacters(in: .whitespaces),
|
|
value: item.value.trimmingCharacters(in: .whitespaces),
|
|
unit: item.unit.trimmingCharacters(in: .whitespaces),
|
|
range: item.range.trimmingCharacters(in: .whitespaces),
|
|
status: item.status,
|
|
capturedAt: capturedAt,
|
|
source: .quickCapture
|
|
)
|
|
ctx.insert(indicator)
|
|
}
|
|
try? ctx.save()
|
|
onClose()
|
|
}
|
|
}
|