feat(AI): 统一多模态模型架构,整合文本和视觉推理路径 - 将文本生成和VL(图→文)功能合并到单一的Qwen3.5-4B多模态MNN模型 - 移除独立的Qwen3-VL-4B模型依赖,MLX VL改为使用.llm的多模态模型 - 更新ModelKind枚举,新增userFacing集合用于面向用户展示 - MNN后端现在同时支持文本和视觉任务,模拟器回退到MLX refactor(models): 模型管理和界面调整以适应新的多模态架构 - 更新模型管理界面,只显示统一的Qwen3.5-4B(MNN)模型给用户 - 修改就绪状态检查逻辑,使用ModelKind.userFacing替代allCases - 更新模型文件清单,从Qwen3.5-2B升级到Qwen3.5-4B-4bit - 调整模型管理页面UI,突出MNN+SME2端侧加速功能 feat(camera): 添加拍照识别引擎切换功能 - 实现双路径拍照识别:Apple Vision OCR + 文本模型 和 Qwen3-VL直接识别 - 添加预处理逻辑,优化Qwen3-VL对窄长区域图片的识别效果 - 在模型管理页面添加拍照识别引擎选择组件 - 提供用户界面选项,在两种识别方式间切换 style(ui): 优化输入框样式和颜色主题一致性 - 为指标快速表单添加浅色主题偏好 - 统一所有文本输入框的颜色样式(theme) - 创建EntryInputField组件,替换原有的单行输入+按钮模式 - 实现聊天框风格的条目输入,支持多行自适应和圆形发送按钮 fix(build): 修正Xcode项目配置中的重复框架搜索路径 - 清理project.pbxproj中重复的FRAMEWORK_SEARCH_PATHS配置 - 重新排列Swift桥接头文件配置确保正确引用 - 修复因路径配置重复导致的编译警告问题 test: 增加区域图片预处理和模型清单测试覆盖 - 添加RegionImageCropper.prepareForQwenVL的单元测试 - 验证宽而矮图片的放大和填充逻辑 - 更新ModelManifestTests中的字节数预期值以匹配新模型 - 修正OCRService中VNRecognizedTextObservation类型的处理 ```
410 lines
16 KiB
Swift
410 lines
16 KiB
Swift
import SwiftUI
|
|
import AVFoundation
|
|
import UIKit
|
|
import Combine
|
|
|
|
/// 异常项快拍 · 整幅单拍相机。
|
|
/// 全屏实时预览 + 一个快门 → 返回**整幅** upright UIImage(不裁剪)。
|
|
/// 拍完后由 `RegionAdjustView` 在静态图上手动框选识别区域。
|
|
/// 只在真机可用(模拟器无相机,`QuickRegionCaptureFlow` 退化到 PhotoPicker)。
|
|
struct SingleShotCameraView: View {
|
|
let onCapture: (UIImage) -> Void
|
|
let onCancel: () -> Void
|
|
|
|
@StateObject private var controller = RegionCameraController()
|
|
@State private var authState: AuthState = .checking
|
|
@State private var isCapturing = false
|
|
@State private var flash = false
|
|
|
|
enum AuthState { case checking, authorized, denied }
|
|
|
|
var body: some View {
|
|
ZStack {
|
|
Color.black.ignoresSafeArea()
|
|
|
|
switch authState {
|
|
case .checking:
|
|
ProgressView().tint(.white)
|
|
case .denied:
|
|
deniedView
|
|
case .authorized:
|
|
RegionCameraPreview(controller: controller, cropsToBox: false)
|
|
.ignoresSafeArea()
|
|
controlsOverlay
|
|
}
|
|
|
|
if flash {
|
|
Color.white.ignoresSafeArea().transition(.opacity)
|
|
}
|
|
}
|
|
.task { await resolveAuth() }
|
|
}
|
|
|
|
private var controlsOverlay: some View {
|
|
VStack {
|
|
HStack {
|
|
Button {
|
|
onCancel()
|
|
} label: {
|
|
Text("取消")
|
|
.font(.tjScaled( 16, weight: .medium))
|
|
.foregroundStyle(.white)
|
|
.padding(.horizontal, 14)
|
|
.padding(.vertical, 8)
|
|
.background(Capsule().fill(.black.opacity(0.35)))
|
|
}
|
|
Spacer()
|
|
}
|
|
.padding(.horizontal, 18)
|
|
.padding(.top, 8)
|
|
|
|
Spacer()
|
|
|
|
Text("拍一张含异常指标的照片 · 拍完再框选")
|
|
.font(.tjScaled( 13, weight: .medium))
|
|
.foregroundStyle(.white)
|
|
.padding(.horizontal, 12)
|
|
.padding(.vertical, 6)
|
|
.background(Capsule().fill(.black.opacity(0.4)))
|
|
.padding(.bottom, 14)
|
|
|
|
shutterButton
|
|
.padding(.bottom, 36)
|
|
}
|
|
}
|
|
|
|
private var shutterButton: some View {
|
|
Button {
|
|
capture()
|
|
} label: {
|
|
ZStack {
|
|
Circle().fill(.white).frame(width: 72, height: 72)
|
|
Circle().strokeBorder(.white.opacity(0.6), lineWidth: 3).frame(width: 84, height: 84)
|
|
if isCapturing {
|
|
ProgressView().tint(.black)
|
|
}
|
|
}
|
|
}
|
|
.disabled(isCapturing)
|
|
.accessibilityLabel("拍摄照片")
|
|
}
|
|
|
|
private var deniedView: some View {
|
|
VStack(spacing: 16) {
|
|
Image(systemName: "camera.fill")
|
|
.font(.tjScaled( 40))
|
|
.foregroundStyle(.white.opacity(0.8))
|
|
Text("相机权限未开启")
|
|
.font(.tjH2())
|
|
.foregroundStyle(.white)
|
|
Text("异常项快拍需要相机。去「设置 → 康康 → 相机」打开后再回来。")
|
|
.font(.tjScaled( 13))
|
|
.foregroundStyle(.white.opacity(0.7))
|
|
.multilineTextAlignment(.center)
|
|
.padding(.horizontal, 36)
|
|
HStack(spacing: 12) {
|
|
Button("取消") { onCancel() }
|
|
.font(.tjScaled( 15))
|
|
.foregroundStyle(.white)
|
|
.padding(.horizontal, 18).padding(.vertical, 10)
|
|
.background(Capsule().strokeBorder(.white.opacity(0.5), lineWidth: 1))
|
|
Button("去设置") {
|
|
if let url = URL(string: UIApplication.openSettingsURLString) {
|
|
UIApplication.shared.open(url)
|
|
}
|
|
}
|
|
.font(.tjScaled( 15, weight: .semibold))
|
|
.foregroundStyle(.black)
|
|
.padding(.horizontal, 18).padding(.vertical, 10)
|
|
.background(Capsule().fill(.white))
|
|
}
|
|
}
|
|
}
|
|
|
|
private func capture() {
|
|
guard !isCapturing else { return }
|
|
isCapturing = true
|
|
withAnimation(.easeOut(duration: 0.08)) { flash = true }
|
|
controller.capture { image in
|
|
withAnimation(.easeIn(duration: 0.15)) { flash = false }
|
|
isCapturing = false
|
|
guard let image else { return }
|
|
onCapture(image)
|
|
}
|
|
}
|
|
|
|
private func resolveAuth() async {
|
|
switch AVCaptureDevice.authorizationStatus(for: .video) {
|
|
case .authorized:
|
|
authState = .authorized
|
|
case .notDetermined:
|
|
let granted = await AVCaptureDevice.requestAccess(for: .video)
|
|
authState = granted ? .authorized : .denied
|
|
default:
|
|
authState = .denied
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - AVFoundation 桥接
|
|
|
|
/// SwiftUI 持有,作为快门触发的句柄(weak 指向真正的 UIView)。
|
|
final class RegionCameraController: ObservableObject {
|
|
weak var view: RegionPreviewUIView?
|
|
func capture(_ completion: @escaping (UIImage?) -> Void) {
|
|
guard let view else { completion(nil); return }
|
|
view.capture(completion: completion)
|
|
}
|
|
}
|
|
|
|
struct RegionCameraPreview: UIViewRepresentable {
|
|
let controller: RegionCameraController
|
|
/// 是否在拍摄后裁到居中小框。整幅单拍传 false(返回整图)。
|
|
var cropsToBox: Bool = false
|
|
|
|
func makeUIView(context: Context) -> RegionPreviewUIView {
|
|
let v = RegionPreviewUIView()
|
|
v.cropsToBox = cropsToBox
|
|
controller.view = v
|
|
return v
|
|
}
|
|
|
|
func updateUIView(_ uiView: RegionPreviewUIView, context: Context) {}
|
|
|
|
static func dismantleUIView(_ uiView: RegionPreviewUIView, coordinator: ()) {
|
|
uiView.stop()
|
|
}
|
|
}
|
|
|
|
/// 实时预览 + 单张拍摄。`cropsToBox` 为真时按居中小框裁剪,否则返回整幅 upright 图。
|
|
final class RegionPreviewUIView: UIView, AVCapturePhotoCaptureDelegate {
|
|
var cropsToBox = false
|
|
|
|
private let session = AVCaptureSession()
|
|
private let output = AVCapturePhotoOutput()
|
|
private var previewLayer: AVCaptureVideoPreviewLayer?
|
|
private var setupDone = false
|
|
private var captureCompletion: ((UIImage?) -> Void)?
|
|
|
|
override func didMoveToWindow() {
|
|
super.didMoveToWindow()
|
|
guard !setupDone, window != nil else { return }
|
|
setupDone = true
|
|
configure()
|
|
}
|
|
|
|
private func configure() {
|
|
session.beginConfiguration()
|
|
session.sessionPreset = .photo
|
|
guard let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back),
|
|
let input = try? AVCaptureDeviceInput(device: device),
|
|
session.canAddInput(input) else {
|
|
session.commitConfiguration()
|
|
return
|
|
}
|
|
session.addInput(input)
|
|
if session.canAddOutput(output) { session.addOutput(output) }
|
|
session.commitConfiguration()
|
|
|
|
let preview = AVCaptureVideoPreviewLayer(session: session)
|
|
preview.videoGravity = .resizeAspectFill
|
|
preview.frame = bounds
|
|
layer.addSublayer(preview)
|
|
self.previewLayer = preview
|
|
applyPortrait(preview.connection)
|
|
|
|
DispatchQueue.global(qos: .userInitiated).async { [weak self] in
|
|
self?.session.startRunning()
|
|
}
|
|
}
|
|
|
|
/// 锁竖屏(iOS 17+ 用 videoRotationAngle,避免 videoOrientation 弃用告警)。
|
|
private func applyPortrait(_ connection: AVCaptureConnection?) {
|
|
guard let connection else { return }
|
|
if connection.isVideoRotationAngleSupported(90) {
|
|
connection.videoRotationAngle = 90
|
|
}
|
|
}
|
|
|
|
override func layoutSubviews() {
|
|
super.layoutSubviews()
|
|
previewLayer?.frame = bounds
|
|
}
|
|
|
|
func capture(completion: @escaping (UIImage?) -> Void) {
|
|
guard session.isRunning else { completion(nil); return }
|
|
captureCompletion = completion
|
|
applyPortrait(output.connection(with: .video))
|
|
output.capturePhoto(with: AVCapturePhotoSettings(), delegate: self)
|
|
}
|
|
|
|
func stop() {
|
|
guard session.isRunning else { return }
|
|
DispatchQueue.global(qos: .userInitiated).async { [weak self] in
|
|
self?.session.stopRunning()
|
|
}
|
|
}
|
|
|
|
func photoOutput(_ output: AVCapturePhotoOutput,
|
|
didFinishProcessingPhoto photo: AVCapturePhoto,
|
|
error: Error?) {
|
|
let completion = captureCompletion
|
|
captureCompletion = nil
|
|
// 代理回调在 AVFoundation 私有队列,SwiftUI 状态更新必须切回主线程。
|
|
let deliver: (UIImage?) -> Void = { result in
|
|
DispatchQueue.main.async { completion?(result) }
|
|
}
|
|
guard error == nil,
|
|
let data = photo.fileDataRepresentation(),
|
|
let image = UIImage(data: data) else {
|
|
deliver(nil)
|
|
return
|
|
}
|
|
let upright = image.normalizedUp()
|
|
// 整幅单拍:直接返回整图,框选在静态图阶段做。
|
|
guard cropsToBox, previewLayer != nil else {
|
|
deliver(upright)
|
|
return
|
|
}
|
|
DispatchQueue.main.async {
|
|
let viewSize = self.bounds.size
|
|
let box = RegionFraming.box(in: viewSize)
|
|
let cropped = RegionImageCropper.crop(upright, box: box, viewSize: viewSize)
|
|
completion?(cropped)
|
|
}
|
|
}
|
|
}
|
|
|
|
// MARK: - 小框几何(旧 fill 裁剪路径保留,供 cropsToBox 用)
|
|
|
|
enum RegionFraming {
|
|
/// 居中、略高于中心的小框。宽 84% 屏宽,高取 160 与 28% 屏高的较小值。
|
|
static func box(in size: CGSize) -> CGRect {
|
|
guard size.width > 0, size.height > 0 else { return .zero }
|
|
let w = size.width * 0.84
|
|
let h = min(160, size.height * 0.28)
|
|
let x = (size.width - w) / 2
|
|
let y = (size.height - h) / 2 - size.height * 0.06
|
|
return CGRect(x: x, y: y, width: w, height: h)
|
|
}
|
|
}
|
|
|
|
// MARK: - 裁剪纯函数
|
|
|
|
enum RegionImageCropper {
|
|
/// 把屏上小框(view 点坐标)按 `.resizeAspectFill` 反算到 `.up` 照片的像素裁剪 rect。
|
|
/// 前提:预览以 aspect-fill 铺满 viewSize,照片与预览同源、同为竖屏方向。
|
|
/// 单边越界自动夹紧。仅 cropsToBox 实时相机路径用。
|
|
static func cropRect(photoPixelSize p: CGSize, box: CGRect, in viewSize: CGSize) -> CGRect {
|
|
guard p.width > 0, p.height > 0, viewSize.width > 0, viewSize.height > 0 else { return .zero }
|
|
let scale = max(viewSize.width / p.width, viewSize.height / p.height)
|
|
let scaledW = p.width * scale
|
|
let scaledH = p.height * scale
|
|
let ox = (viewSize.width - scaledW) / 2
|
|
let oy = (viewSize.height - scaledH) / 2
|
|
var x = (box.minX - ox) / scale
|
|
var y = (box.minY - oy) / scale
|
|
var w = box.width / scale
|
|
var h = box.height / scale
|
|
x = max(0, min(p.width, x))
|
|
y = max(0, min(p.height, y))
|
|
w = max(0, min(p.width - x, w))
|
|
h = max(0, min(p.height - y, h))
|
|
return CGRect(x: x, y: y, width: w, height: h).integral
|
|
}
|
|
|
|
/// 按屏上小框裁 `.up` 照片(aspect-fill 路径);失败回退原图。
|
|
static func crop(_ image: UIImage, box: CGRect, viewSize: CGSize) -> UIImage {
|
|
guard let cg = image.cgImage else { return image }
|
|
let rect = cropRect(photoPixelSize: CGSize(width: cg.width, height: cg.height),
|
|
box: box, in: viewSize)
|
|
guard rect.width >= 1, rect.height >= 1, let cropped = cg.cropping(to: rect) else { return image }
|
|
return UIImage(cgImage: cropped, scale: image.scale, orientation: .up)
|
|
}
|
|
|
|
/// aspect-FIT 版:静态图以 `.scaledToFit` 显示在 `imageFrame`(view 点坐标,通常用
|
|
/// `AVMakeRect(aspectRatio:insideRect:)` 算得)内,把屏上选框反算到照片像素 rect。
|
|
/// `RegionAdjustView` 框选识别用。越界自动夹紧。
|
|
static func cropRectAspectFit(photoPixelSize p: CGSize, box: CGRect, imageFrame f: CGRect) -> CGRect {
|
|
guard p.width > 0, p.height > 0, f.width > 0, f.height > 0 else { return .zero }
|
|
// aspect-fit:照片完整显示在 imageFrame 内,缩放系数两轴一致。
|
|
let scale = f.width / p.width
|
|
guard scale > 0 else { return .zero }
|
|
var x = (box.minX - f.minX) / scale
|
|
var y = (box.minY - f.minY) / scale
|
|
var w = box.width / scale
|
|
var h = box.height / scale
|
|
x = max(0, min(p.width, x))
|
|
y = max(0, min(p.height, y))
|
|
w = max(0, min(p.width - x, w))
|
|
h = max(0, min(p.height - y, h))
|
|
return CGRect(x: x, y: y, width: w, height: h).integral
|
|
}
|
|
|
|
/// 按静态图上的选框(aspect-fit)裁子图;失败回退原图。
|
|
static func cropAspectFit(_ image: UIImage, box: CGRect, imageFrame: CGRect) -> UIImage {
|
|
let up = image.normalizedUp()
|
|
guard let cg = up.cgImage else { return image }
|
|
let rect = cropRectAspectFit(
|
|
photoPixelSize: CGSize(width: cg.width, height: cg.height),
|
|
box: box, imageFrame: imageFrame
|
|
)
|
|
guard rect.width >= 1, rect.height >= 1, let cropped = cg.cropping(to: rect) else { return up }
|
|
return UIImage(cgImage: cropped, scale: up.scale, orientation: .up)
|
|
}
|
|
|
|
/// Qwen3-VL 局部图预处理:宽而矮的小框直接喂 VL 时,processor 再缩放容易把小字压没。
|
|
/// 这里只用于 Qwen3-VL 分支,Apple Vision OCR 保持吃原始裁剪图。
|
|
static func prepareForQwenVL(_ image: UIImage,
|
|
minimumShortEdge: CGFloat = 448,
|
|
maximumLongEdge: CGFloat = 2400,
|
|
padding: CGFloat = 64) -> UIImage {
|
|
let up = image.normalizedUp()
|
|
guard let cg = up.cgImage else { return up }
|
|
|
|
let sourceSize = CGSize(width: cg.width, height: cg.height)
|
|
guard sourceSize.width > 0, sourceSize.height > 0 else { return up }
|
|
|
|
let short = min(sourceSize.width, sourceSize.height)
|
|
let long = max(sourceSize.width, sourceSize.height)
|
|
var scale = max(1, minimumShortEdge / short)
|
|
if long * scale > maximumLongEdge {
|
|
scale = maximumLongEdge / long
|
|
}
|
|
|
|
let contentSize = CGSize(
|
|
width: max(1, (sourceSize.width * scale).rounded()),
|
|
height: max(1, (sourceSize.height * scale).rounded())
|
|
)
|
|
let canvasSize = CGSize(
|
|
width: contentSize.width + padding * 2,
|
|
height: contentSize.height + padding * 2
|
|
)
|
|
|
|
let format = UIGraphicsImageRendererFormat.default()
|
|
format.scale = 1
|
|
format.opaque = true
|
|
let renderer = UIGraphicsImageRenderer(size: canvasSize, format: format)
|
|
return renderer.image { ctx in
|
|
UIColor.white.setFill()
|
|
ctx.fill(CGRect(origin: .zero, size: canvasSize))
|
|
|
|
UIImage(cgImage: cg, scale: 1, orientation: .up).draw(
|
|
in: CGRect(x: padding, y: padding,
|
|
width: contentSize.width, height: contentSize.height)
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
extension UIImage {
|
|
/// 把 EXIF 方向 bake 进像素,返回 `.up` 方向图,便于按归一化 rect 直接裁 CGImage。
|
|
func normalizedUp() -> UIImage {
|
|
if imageOrientation == .up { return self }
|
|
let format = UIGraphicsImageRendererFormat.default()
|
|
format.scale = scale
|
|
let renderer = UIGraphicsImageRenderer(size: size, format: format)
|
|
return renderer.image { _ in draw(in: CGRect(origin: .zero, size: size)) }
|
|
}
|
|
}
|