feat(AI): 集成MNN推理引擎替换MLX作为主AI运行时

- 引入MNN(alibaba) + Arm SME2 + CPU作为主AI运行时,支持A19/iPhone17的
  SME2和A17的NEON加速
- 添加MLX Swift作为兜底GPU推理方案,实现双后端切换机制
- 使用单一Qwen3.5-2B多模态模型(1.2GB),替代原有的LLM+VL分离架构
- 实现InferenceEngine.current引擎选择逻辑,真机默认MNN,模拟器回退MLX
- 更新AIAgent架构,通过MNNLLMBridge(ObjC++) → MNNBackend进行推理
- 修改队列机制防止并发推理导致OOM,使用信号量闸门控制显存占用
- 更新文档中的技术栈说明、模块边界和周次交付计划
```
This commit is contained in:
link2026
2026-06-15 09:24:59 +08:00
parent 6c6a950140
commit 9d856fcfc4
37 changed files with 2605 additions and 430 deletions

View File

@@ -33,7 +33,9 @@ struct ParsedReport: Sendable {
var isEmpty: Bool { indicators.isEmpty }
/// ,退 UI
static func empty(date: Date = .now) -> ParsedReport {
/// nonisolated: MainActor , CaptureService(actor) extractReportMeta
/// actor , nonisolated (Swift 6)
nonisolated static func empty(date: Date = .now) -> ParsedReport {
ParsedReport(
title: "",
typeRaw: ReportType.other.rawValue,
@@ -78,6 +80,40 @@ actor CaptureService {
try await runVL(on: assets)
}
/// meta :**,**
/// 2B OOM(jetsam = ),
/// :Vision OCR(,<1s/) LLM {title,type,date,institution}(~50 token)
/// :OCR / / ( meta, recognized:false),(§3.2)
/// indicators
func extractReportMeta(assets: [FileVault.SavedAsset]) async -> (meta: ParsedReport, recognized: Bool) {
let urls = assets.map { FileVault.shared.rootURL.appendingPathComponent($0.relativePath) }
let ocr = await Self.ocrReference(for: urls)
guard !ocr.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
return (.empty(), false)
}
do {
try await AIRuntime.shared.prepare() // LLM();OOM VL
} catch {
return (.empty(), false)
}
var collected = ""
do {
// meta ,256 token , 2048
let stream = await AIRuntime.shared.generate(prompt: VLPrompts.reportMetaFromText(ocr),
maxTokens: 256)
for try await chunk in stream { collected += chunk.text }
} catch {
return (.empty(), false)
}
let cleaned = CaptureService.stripThink(collected)
guard var parsed = try? CaptureService.parseReportJSON(cleaned, pageCount: assets.count) else {
return (.empty(), false)
}
// meta + ,
parsed.indicators = []
return (parsed, true)
}
/// OCR : Vision OCR LLM(Qwen3-1.7B)
/// Report; `CaptureError`,UI 退(§3.2)
/// (MainActor) OCR,OCR actor, UIImage actor
@@ -169,8 +205,17 @@ actor CaptureService {
private static func ocrReference(for urls: [URL]) async -> String {
var pages: [String] = []
for (idx, url) in urls.prefix(4).enumerated() {
guard let src = CGImageSourceCreateWithURL(url as CFURL, nil),
let cg = CGImageSourceCreateImageAtIndex(src, 0, nil) else { continue }
guard let src = CGImageSourceCreateWithURL(url as CFURL, nil) else { continue }
// OCR : 4000px 48MB, VL ,
// jetsam 3000px Vision,;
// VL ,OCR ,
let thumbOptions: [CFString: Any] = [
kCGImageSourceCreateThumbnailFromImageAlways: true,
kCGImageSourceCreateThumbnailWithTransform: true,
kCGImageSourceShouldCacheImmediately: true,
kCGImageSourceThumbnailMaxPixelSize: 3000
]
guard let cg = CGImageSourceCreateThumbnailAtIndex(src, 0, thumbOptions as CFDictionary) else { continue }
guard let text = try? await OCRService.recognizeText(in: cg),
!text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { continue }
pages.append(urls.count > 1 ? "【第 \(idx + 1) 页】\n\(text)" : text)