feat(iOS): 更新MNN后端模型配置优化性能

将MNN主模型从Qwen3.5-4B(~2.64GiB)降级为Qwen3.5-2B(~1.1GiB),因为4B版本
实测运行过慢,影响用户体验。iPhone17+/SME2设备使用2B模型,保留MLX
兜底方案用于模拟器和备用场景,确保AI推理性能和存储效率的平衡。
```
This commit is contained in:
link2026
2026-06-09 22:20:07 +08:00
parent ca5a3fa38b
commit b79ae54b7b
40 changed files with 1327 additions and 452 deletions

View File

@@ -33,11 +33,11 @@ actor AIRuntime {
private var vlSession: VLSession?
// MARK: - MNN (CPU/SME2,)
// .mnn , VL() Qwen3.5-4B MNN ()
// .mnn , VL() Qwen3.5-2B MNN ()
// MNN,VL 退 MLX Qwen3-VL-4B
private let mnn = MNNBackend()
private(set) var mnnStatus: Status = .notReady
/// MNN (/ Models/Qwen3.5-4B-MNN)
/// MNN (/ Models/Qwen3.5-2B-MNN)
nonisolated static var mnnModelFolder: URL {
ModelStore.shared.localURL(for: .mnnLLM)
}
@@ -266,7 +266,7 @@ actor AIRuntime {
}
if vlStatus == .ready { return }
// MLX VL .llm Qwen3.5-4B (VLMModelFactory qwen3_5 ),
// MLX VL .llm Qwen3.5-2B (VLMModelFactory qwen3_5 ),
// Qwen3-VL-4B isComplete ,
guard ModelStore.shared.isComplete(for: .llm) else {
vlStatus = .error("VL 模型未就绪")
@@ -274,7 +274,7 @@ actor AIRuntime {
}
// :( LLM ), LLM + VL
// App 退
// App 退
await acquireGate()
defer { releaseGate() }
if vlStatus == .ready { return }