Files
kangkang/康康/AI/AIRuntime.swift
link2026 836f3d4234 ```
feat(AI): 统一多模态模型架构,整合文本和视觉推理路径

- 将文本生成和VL(图→文)功能合并到单一的Qwen3.5-4B多模态MNN模型
- 移除独立的Qwen3-VL-4B模型依赖,MLX VL改为使用.llm的多模态模型
- 更新ModelKind枚举,新增userFacing集合用于面向用户展示
- MNN后端现在同时支持文本和视觉任务,模拟器回退到MLX

refactor(models): 模型管理和界面调整以适应新的多模态架构

- 更新模型管理界面,只显示统一的Qwen3.5-4B(MNN)模型给用户
- 修改就绪状态检查逻辑,使用ModelKind.userFacing替代allCases
- 更新模型文件清单,从Qwen3.5-2B升级到Qwen3.5-4B-4bit
- 调整模型管理页面UI,突出MNN+SME2端侧加速功能

feat(camera): 添加拍照识别引擎切换功能

- 实现双路径拍照识别:Apple Vision OCR + 文本模型 和 Qwen3-VL直接识别
- 添加预处理逻辑,优化Qwen3-VL对窄长区域图片的识别效果
- 在模型管理页面添加拍照识别引擎选择组件
- 提供用户界面选项,在两种识别方式间切换

style(ui): 优化输入框样式和颜色主题一致性

- 为指标快速表单添加浅色主题偏好
- 统一所有文本输入框的颜色样式(theme)
- 创建EntryInputField组件,替换原有的单行输入+按钮模式
- 实现聊天框风格的条目输入,支持多行自适应和圆形发送按钮

fix(build): 修正Xcode项目配置中的重复框架搜索路径

- 清理project.pbxproj中重复的FRAMEWORK_SEARCH_PATHS配置
- 重新排列Swift桥接头文件配置确保正确引用
- 修复因路径配置重复导致的编译警告问题

test: 增加区域图片预处理和模型清单测试覆盖

- 添加RegionImageCropper.prepareForQwenVL的单元测试
- 验证宽而矮图片的放大和填充逻辑
- 更新ModelManifestTests中的字节数预期值以匹配新模型
- 修正OCRService中VNRecognizedTextObservation类型的处理
```
2026-06-08 23:25:31 +08:00

350 lines
14 KiB
Swift

import Foundation
import MLX
enum AIRuntimeError: Error, LocalizedError {
case notReady
case modelLoadFailed(String)
case inferenceFailed(String)
var errorDescription: String? {
switch self {
case .notReady: return String(appLoc: "AI 模型尚未准备好")
case .modelLoadFailed(let m): return String(appLoc: "模型加载失败:\(m)")
case .inferenceFailed(let m): return String(appLoc: "推理失败:\(m)")
}
}
}
actor AIRuntime {
static let shared = AIRuntime()
enum Status: Sendable, Equatable {
case notReady
case loading
case ready
case error(String)
}
private(set) var status: Status = .notReady
private(set) var vlStatus: Status = .notReady
private(set) var lastDecodeRate: Double = 0
private var llmSession: LLMSession?
private var vlSession: VLSession?
// MARK: - MNN (CPU/SME2,)
// .mnn , VL() Qwen3.5-4B MNN ()
// MNN,VL 退 MLX Qwen3-VL-4B
private let mnn = MNNBackend()
private(set) var mnnStatus: Status = .notReady
/// MNN (/ Models/Qwen3.5-4B-MNN)
nonisolated static var mnnModelFolder: URL {
ModelStore.shared.localURL(for: .mnnLLM)
}
// MARK: - (§3.1 OOM )
//
// actor , generate() Task;
// analyzeReport await actor,LLM VL,
// GPU App jetsam
//(MEMORY in-flight )
//
// actor (count = 1):( + )
// await acquireGate(), releaseGate()actor
// gateBusy / gateWaiters
private var gateBusy = false
private var gateWaiters: [CheckedContinuation<Void, Never>] = []
private func acquireGate() async {
if !gateBusy {
gateBusy = true
return
}
await withCheckedContinuation { (cont: CheckedContinuation<Void, Never>) in
gateWaiters.append(cont)
}
// releaseGate (gateBusy true)
}
private func releaseGate() {
if gateWaiters.isEmpty {
gateBusy = false
} else {
// ,gateBusy true,
let next = gateWaiters.removeFirst()
next.resume()
}
}
private init() {}
/// App : MLX GPU , reuse cache
/// App ( CPU, Metal abort)
/// increased-memory-limit entitlement + LLM/VL , jetsam OOM
nonisolated static func configureMLXMemory() {
#if !targetEnvironment(simulator)
// 256MB cache : 3GB MB
MLX.Memory.cacheLimit = 256 * 1024 * 1024
#endif
}
/// ,
/// :.mnn MNN(CPU/SME2);.mlx MLX(GPU)
func prepare() async throws {
// MNN MNN;( MLX, MNN )退 MLX,
// App (Phase 5)
let mnnReady = ModelStore.shared.isComplete(for: .mnnLLM)
if InferenceEngine.current == .mnn, mnnReady {
try await prepareMNN()
return
}
// MLX: MNN ()
await unloadMNN()
// ,
// return: ready, generate
// `guard status == .ready` ()
while status == .loading {
try await Task.sleep(nanoseconds: 80_000_000)
}
if status == .ready { return }
// isComplete() isReady( config.json):config.json ,
// isReady true safetensors ModelDownloadService
// ( isComplete)
guard ModelStore.shared.isComplete(for: .llm) else {
status = .error("LLM 模型未就绪")
throw AIRuntimeError.notReady
}
// :( VL ), VL + LLM,
// VL + LLM OOM
await acquireGate()
defer { releaseGate() }
// :, load
if status == .ready { return }
// OOM (§3.1):LLM(~1GB) VL(~3GB), App jetsam
unloadVL()
status = .loading
do {
let session = try await LLMSession.load(
folderURL: ModelStore.shared.localURL(for: .llm)
)
self.llmSession = session
status = .ready
} catch {
status = .error("\(error)")
throw AIRuntimeError.modelLoadFailed("\(error)")
}
}
/// MNN : MLX LLM/VL
private func prepareMNN() async throws {
while mnnStatus == .loading {
try await Task.sleep(nanoseconds: 80_000_000)
}
if mnnStatus == .ready { return }
let folder = Self.mnnModelFolder
guard ModelStore.shared.isComplete(for: .mnnLLM) else {
mnnStatus = .error("MNN 模型未就绪")
throw AIRuntimeError.notReady
}
await acquireGate()
defer { releaseGate() }
if mnnStatus == .ready { return }
// : MLX LLM/VL, MNN
unloadLLM()
unloadVL()
mnnStatus = .loading
do {
try await mnn.load(folderURL: folder)
mnnStatus = .ready
} catch {
mnnStatus = .error("\(error)")
throw AIRuntimeError.modelLoadFailed("\(error)")
}
}
/// MNN,
private func unloadMNN() async {
guard mnnStatus != .notReady else { return }
await mnn.unload()
mnnStatus = .notReady
MLX.Memory.clearCache()
}
/// await prepare()
/// :, actor LLMSession await
func generate(prompt: String, maxTokens: Int = 256) -> AsyncThrowingStream<TokenChunk, Error> {
if InferenceEngine.current == .mnn, mnnStatus == .ready {
return mnnGenerate(prompt: prompt, maxTokens: maxTokens)
}
// actor ,Task 访 self.status / self.llmSession
let snapshotStatus = status
let snapshotSession = llmSession
return AsyncThrowingStream { continuation in
let task = Task {
guard snapshotStatus == .ready, let session = snapshotSession else {
continuation.finish(throwing: AIRuntimeError.notReady)
return
}
// : LLM VL / ,
await self.acquireGate()
do {
// session.generate actor , await
let stream = await session.generate(prompt: prompt, maxTokens: maxTokens)
for try await chunk in stream {
// (UI)/, checkCancellation Task 退,
// session onTermination, MLX , GPU
try Task.checkCancellation()
// Task generate() , AIRuntime actor ;
// actor recordRate await
self.recordRate(chunk.decodeRate)
continuation.yield(chunk)
}
continuation.finish()
} catch {
continuation.finish(throwing: AIRuntimeError.inferenceFailed("\(error)"))
}
// / / (checkCancellation catch ),
// ,
self.releaseGate()
}
// / Task( LLMSession / HealthExportService )
continuation.onTermination = { _ in task.cancel() }
}
}
/// MNN(CPU/SME2) MLX :
private func mnnGenerate(prompt: String, maxTokens: Int) -> AsyncThrowingStream<TokenChunk, Error> {
let ready = (mnnStatus == .ready)
return AsyncThrowingStream { continuation in
let task = Task {
guard ready else {
continuation.finish(throwing: AIRuntimeError.notReady)
return
}
await self.acquireGate()
do {
let stream = await self.mnn.generate(prompt: prompt, maxTokens: maxTokens)
for try await chunk in stream {
try Task.checkCancellation()
self.recordRate(chunk.decodeRate)
continuation.yield(chunk)
}
continuation.finish()
} catch {
continuation.finish(throwing: AIRuntimeError.inferenceFailed("\(error)"))
}
self.releaseGate()
}
continuation.onTermination = { _ in task.cancel() }
}
}
private func recordRate(_ rate: Double) {
if rate > 0 { lastDecodeRate = rate }
}
// MARK: - VL
/// VL , load
func prepareVL() async throws {
// MNN :VL MNN (+), prepareMNN
if InferenceEngine.current == .mnn, ModelStore.shared.isComplete(for: .mnnLLM) {
try await prepareMNN()
return
}
while vlStatus == .loading {
try await Task.sleep(nanoseconds: 80_000_000)
}
if vlStatus == .ready { return }
// MLX VL .llm Qwen3.5-4B (VLMModelFactory qwen3_5 ),
// Qwen3-VL-4B isComplete ,
guard ModelStore.shared.isComplete(for: .llm) else {
vlStatus = .error("VL 模型未就绪")
throw AIRuntimeError.notReady
}
// :( LLM ), LLM + VL
// App 退
await acquireGate()
defer { releaseGate() }
if vlStatus == .ready { return }
// OOM (§3.1): VL(~3GB) LLM(~1GB), jetsam
unloadLLM()
await unloadMNN()
vlStatus = .loading
do {
let session = try await VLSession.load(
folderURL: ModelStore.shared.localURL(for: .llm)
)
self.vlSession = session
vlStatus = .ready
} catch {
vlStatus = .error("\(error)")
throw AIRuntimeError.modelLoadFailed("\(error)")
}
}
// MARK: - (OOM )
/// LLM, ModelContainer MLX
/// :(prepareVL ), LLM ,
private func unloadLLM() {
guard llmSession != nil else { return }
llmSession = nil
status = .notReady
MLX.Memory.clearCache()
}
/// VL, ModelContainer MLX
private func unloadVL() {
guard vlSession != nil else { return }
vlSession = nil
vlStatus = .notReady
MLX.Memory.clearCache()
}
/// JSON ( VLPrompts.reportExtraction )
/// + 退(§3.2)
/// LLM.generate() , OOM
func analyzeReport(imageURLs: [URL],
prompt: String,
maxTokens: Int = 512) async throws -> String {
// MNN : MNN
if InferenceEngine.current == .mnn, mnnStatus == .ready {
await acquireGate()
defer { releaseGate() }
do {
return try await mnn.analyze(imageURLs: imageURLs, prompt: prompt, maxTokens: maxTokens)
} catch {
throw AIRuntimeError.inferenceFailed("\(error)")
}
}
guard vlStatus == .ready, let session = vlSession else {
throw AIRuntimeError.notReady
}
await acquireGate()
defer { releaseGate() }
do {
return try await session.analyze(
imageURLs: imageURLs,
prompt: prompt,
maxTokens: maxTokens
)
} catch {
throw AIRuntimeError.inferenceFailed("\(error)")
}
}
}