```
feat(iOS): 更新MNN后端模型配置优化性能 将MNN主模型从Qwen3.5-4B(~2.64GiB)降级为Qwen3.5-2B(~1.1GiB),因为4B版本 实测运行过慢,影响用户体验。iPhone17+/SME2设备使用2B模型,保留MLX 兜底方案用于模拟器和备用场景,确保AI推理性能和存储效率的平衡。 ```
This commit is contained in:
@@ -2,7 +2,7 @@ import SwiftUI
|
||||
import SwiftData
|
||||
import UIKit
|
||||
|
||||
/// 异常项快拍 · 统一流程。
|
||||
/// 指标速记 · 统一流程。
|
||||
/// 整幅单拍(真机)/ 相册(模拟器)→ 静态图手动框选 → 框内 OCR+LLM 抽指标 → 核对 → 存独立 Indicator。
|
||||
///
|
||||
/// 状态机:
|
||||
@@ -15,8 +15,6 @@ struct QuickRegionCaptureFlow: View {
|
||||
@Environment(\.modelContext) private var ctx
|
||||
let onClose: () -> Void
|
||||
|
||||
@AppStorage(QuickRegionRecognitionEngine.storageKey)
|
||||
private var recognitionEngineRaw = QuickRegionRecognitionEngine.defaultValue.rawValue
|
||||
@State private var phase: Phase = .idle
|
||||
|
||||
enum Phase {
|
||||
@@ -59,7 +57,7 @@ struct QuickRegionCaptureFlow: View {
|
||||
onCancel: { onClose() },
|
||||
onRetake: { phase = .idle }
|
||||
)
|
||||
.navigationTitle(String(appLoc: "核对异常项"))
|
||||
.navigationTitle(String(appLoc: "核对指标"))
|
||||
.navigationBarTitleDisplayMode(.inline)
|
||||
.toolbar {
|
||||
ToolbarItem(placement: .topBarLeading) {
|
||||
@@ -97,29 +95,18 @@ struct QuickRegionCaptureFlow: View {
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - 识别(框内子图 → OCR → LLM)
|
||||
// MARK: - 识别(框内子图 → Vision OCR → Qwen3 整理)
|
||||
|
||||
/// 对已裁好的框内子图跑识别。失败/超时返回提示文案,绝不抛出(由 RegionAdjustView 展示)。
|
||||
/// 链路由「我的 → 模型管理 → 拍照识别引擎」决定:
|
||||
/// - Apple Vision:Vision 端侧 OCR → Qwen3-1.7B 结构化抽指标
|
||||
/// - Qwen3-VL:局部图片 → Qwen3-VL 直接结构化抽指标
|
||||
/// 固定链路:Vision 端侧 OCR 出文字 → Qwen3 跑一次结构化整理抽指标。
|
||||
/// (旧的「大模型直读」VL 路径已移除:端侧看图慢且易卡,OCR→整理又快又准。)
|
||||
private func recognizeRegion(_ image: UIImage) async -> (items: [QuickRegionItem], warning: String?) {
|
||||
let engine = QuickRegionRecognitionEngine(storedValue: recognitionEngineRaw)
|
||||
switch engine {
|
||||
case .appleVision:
|
||||
return await recognizeWithAppleVision(image)
|
||||
case .qwenVL:
|
||||
return await recognizeWithQwenVL(image)
|
||||
}
|
||||
}
|
||||
|
||||
private func recognizeWithAppleVision(_ image: UIImage) async -> (items: [QuickRegionItem], warning: String?) {
|
||||
do {
|
||||
let text = try await OCRService.recognizeText(in: image)
|
||||
if Task.isCancelled { return ([], nil) } // 超时:文案由调用方给
|
||||
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
#if DEBUG
|
||||
print("🔤 [OCR · region] recognized text:\n\(trimmed)\n--- end OCR ---")
|
||||
NSLog("KKDBG-OCR region text:\n%@\n--- end OCR ---", trimmed)
|
||||
#endif
|
||||
if trimmed.isEmpty {
|
||||
return ([], String(appLoc: "没识别到文字,挪一下框再试"))
|
||||
@@ -139,30 +126,6 @@ struct QuickRegionCaptureFlow: View {
|
||||
}
|
||||
}
|
||||
|
||||
private func recognizeWithQwenVL(_ image: UIImage) async -> (items: [QuickRegionItem], warning: String?) {
|
||||
let prepared = RegionImageCropper.prepareForQwenVL(image)
|
||||
guard let data = prepared.jpegData(compressionQuality: 0.95) else {
|
||||
return ([], String(appLoc: "图片编码失败,手动补充"))
|
||||
}
|
||||
#if DEBUG
|
||||
print("🖼️ [Qwen3-VL region] prepared image=\(Int(prepared.size.width))x\(Int(prepared.size.height)), bytes=\(data.count)")
|
||||
#endif
|
||||
do {
|
||||
let parsed = try await CaptureService.shared.recognizeRegion(imageData: data)
|
||||
if Task.isCancelled { return ([], nil) }
|
||||
let items = Self.buildItems(from: parsed)
|
||||
return (items, items.isEmpty ? String(appLoc: "没读出指标,挪一下框再试") : nil)
|
||||
} catch CaptureError.modelNotReady {
|
||||
return ([], String(appLoc: "模型未就绪,请在模型管理下载或切回 Apple Vision"))
|
||||
} catch let CaptureError.parseFailed(msg) {
|
||||
return ([], String(appLoc: "解析失败:\(msg)"))
|
||||
} catch let CaptureError.inferenceFailed(msg) {
|
||||
return ([], Task.isCancelled ? nil : String(appLoc: "识别失败:\(msg)"))
|
||||
} catch {
|
||||
return ([], Task.isCancelled ? nil : String(appLoc: "未知错误:\(error.localizedDescription)"))
|
||||
}
|
||||
}
|
||||
|
||||
/// LLM 结果 → 可编辑行,异常项(high/low)置顶、默认勾选。
|
||||
private static func buildItems(from parsed: [ParsedReport.ParsedIndicator]) -> [QuickRegionItem] {
|
||||
let mapped = parsed.map {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import SwiftUI
|
||||
import UIKit
|
||||
|
||||
/// 异常项快拍 · 确认页。VL 识别结果逐项可编辑 + 勾选纳入,确认后只存数值(不留图)。
|
||||
/// 指标速记 · 确认页。VL 识别结果逐项可编辑 + 勾选纳入,确认后只存数值(不留图)。
|
||||
/// 与「记录指标」自由输入落库一致 —— 每个勾选项 = 一条独立 Indicator。
|
||||
struct QuickRegionConfirmView: View {
|
||||
let image: UIImage?
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
import Foundation
|
||||
|
||||
enum QuickRegionRecognitionEngine: String, CaseIterable, Identifiable, Sendable {
|
||||
case appleVision
|
||||
case qwenVL
|
||||
|
||||
static let storageKey = "quickRegionRecognitionEngine"
|
||||
static let defaultValue: QuickRegionRecognitionEngine = .appleVision
|
||||
|
||||
var id: String { rawValue }
|
||||
|
||||
init(storedValue: String) {
|
||||
self = QuickRegionRecognitionEngine(rawValue: storedValue) ?? Self.defaultValue
|
||||
}
|
||||
|
||||
var title: String {
|
||||
switch self {
|
||||
case .appleVision: return String(appLoc: "Apple Vision")
|
||||
case .qwenVL: return String(appLoc: "大模型直读")
|
||||
}
|
||||
}
|
||||
|
||||
var detail: String {
|
||||
switch self {
|
||||
case .appleVision:
|
||||
return String(appLoc: "系统 OCR + 文本模型解析")
|
||||
case .qwenVL:
|
||||
return String(appLoc: "Qwen3.5-4B 多模态直接看图(MNN/MLX)")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,7 @@ import SwiftUI
|
||||
import AVFoundation
|
||||
import UIKit
|
||||
|
||||
/// 异常项快拍 · 静态图框选识别。
|
||||
/// 指标速记 · 静态图框选识别。
|
||||
/// 拍/选一张后,在静态照片上手动拖动 + 缩放一个方框,点「识别」只对框内做 OCR+LLM。
|
||||
/// 可反复挪框重识别,满意后进入核对页;0 项也能进核对手动补(失败回退红线)。
|
||||
struct RegionAdjustView: View {
|
||||
|
||||
@@ -3,7 +3,7 @@ import AVFoundation
|
||||
import UIKit
|
||||
import Combine
|
||||
|
||||
/// 异常项快拍 · 整幅单拍相机。
|
||||
/// 指标速记 · 整幅单拍相机。
|
||||
/// 全屏实时预览 + 一个快门 → 返回**整幅** upright UIImage(不裁剪)。
|
||||
/// 拍完后由 `RegionAdjustView` 在静态图上手动框选识别区域。
|
||||
/// 只在真机可用(模拟器无相机,`QuickRegionCaptureFlow` 退化到 PhotoPicker)。
|
||||
@@ -60,7 +60,7 @@ struct SingleShotCameraView: View {
|
||||
|
||||
Spacer()
|
||||
|
||||
Text("拍一张含异常指标的照片 · 拍完再框选")
|
||||
Text("拍一张含目标指标的照片 · 拍完再框选")
|
||||
.font(.tjScaled( 13, weight: .medium))
|
||||
.foregroundStyle(.white)
|
||||
.padding(.horizontal, 12)
|
||||
@@ -97,7 +97,7 @@ struct SingleShotCameraView: View {
|
||||
Text("相机权限未开启")
|
||||
.font(.tjH2())
|
||||
.foregroundStyle(.white)
|
||||
Text("异常项快拍需要相机。去「设置 → 康康 → 相机」打开后再回来。")
|
||||
Text("指标速记需要相机。去「设置 → 康康 → 相机」打开后再回来。")
|
||||
.font(.tjScaled( 13))
|
||||
.foregroundStyle(.white.opacity(0.7))
|
||||
.multilineTextAlignment(.center)
|
||||
@@ -352,49 +352,6 @@ enum RegionImageCropper {
|
||||
guard rect.width >= 1, rect.height >= 1, let cropped = cg.cropping(to: rect) else { return up }
|
||||
return UIImage(cgImage: cropped, scale: up.scale, orientation: .up)
|
||||
}
|
||||
|
||||
/// Qwen3-VL 局部图预处理:宽而矮的小框直接喂 VL 时,processor 再缩放容易把小字压没。
|
||||
/// 这里只用于 Qwen3-VL 分支,Apple Vision OCR 保持吃原始裁剪图。
|
||||
static func prepareForQwenVL(_ image: UIImage,
|
||||
minimumShortEdge: CGFloat = 448,
|
||||
maximumLongEdge: CGFloat = 2400,
|
||||
padding: CGFloat = 64) -> UIImage {
|
||||
let up = image.normalizedUp()
|
||||
guard let cg = up.cgImage else { return up }
|
||||
|
||||
let sourceSize = CGSize(width: cg.width, height: cg.height)
|
||||
guard sourceSize.width > 0, sourceSize.height > 0 else { return up }
|
||||
|
||||
let short = min(sourceSize.width, sourceSize.height)
|
||||
let long = max(sourceSize.width, sourceSize.height)
|
||||
var scale = max(1, minimumShortEdge / short)
|
||||
if long * scale > maximumLongEdge {
|
||||
scale = maximumLongEdge / long
|
||||
}
|
||||
|
||||
let contentSize = CGSize(
|
||||
width: max(1, (sourceSize.width * scale).rounded()),
|
||||
height: max(1, (sourceSize.height * scale).rounded())
|
||||
)
|
||||
let canvasSize = CGSize(
|
||||
width: contentSize.width + padding * 2,
|
||||
height: contentSize.height + padding * 2
|
||||
)
|
||||
|
||||
let format = UIGraphicsImageRendererFormat.default()
|
||||
format.scale = 1
|
||||
format.opaque = true
|
||||
let renderer = UIGraphicsImageRenderer(size: canvasSize, format: format)
|
||||
return renderer.image { ctx in
|
||||
UIColor.white.setFill()
|
||||
ctx.fill(CGRect(origin: .zero, size: canvasSize))
|
||||
|
||||
UIImage(cgImage: cg, scale: 1, orientation: .up).draw(
|
||||
in: CGRect(x: padding, y: padding,
|
||||
width: contentSize.width, height: contentSize.height)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension UIImage {
|
||||
|
||||
Reference in New Issue
Block a user