feat(iOS): 更新MNN后端模型配置优化性能

将MNN主模型从Qwen3.5-4B(~2.64GiB)降级为Qwen3.5-2B(~1.1GiB),因为4B版本
实测运行过慢,影响用户体验。iPhone17+/SME2设备使用2B模型,保留MLX
兜底方案用于模拟器和备用场景,确保AI推理性能和存储效率的平衡。
```
This commit is contained in:
link2026
2026-06-09 22:20:07 +08:00
parent ca5a3fa38b
commit b79ae54b7b
40 changed files with 1327 additions and 452 deletions

View File

@@ -2,19 +2,19 @@ import Foundation
nonisolated enum ModelKind: String, CaseIterable {
/// Models/ / CDN
/// Qwen3.5-4B,:
/// - mnnLLM:MNN(CPU/SME2,)+,taobao-mnn ,
/// - llm:MLX(GPU),Qwen3.5-4B-4bit (, qwen3_5)
/// Qwen3.5-2B,:
/// - mnnLLM:MNN(CPU/SME2,)+,taobao-mnn iPhone17+(A19/SME2),
/// - llm:MLX(GPU),Qwen3.5-2B-4bit (, qwen3_5)
/// - vl:(MLX VL .llm ), switch,/
case llm = "Qwen3.5-4B-4bit"
case llm = "Qwen3.5-2B-4bit"
case vl = "Qwen3-VL-4B-Instruct-4bit"
case mnnLLM = "Qwen3.5-4B-MNN"
case mnnLLM = "Qwen3.5-2B-MNN"
var displayName: String {
switch self {
case .llm: return "Qwen3.5-4B (MLX)"
case .llm: return "Qwen3.5-2B (MLX)"
case .vl: return "Qwen3-VL-4B"
case .mnnLLM: return "Qwen3.5-4B (MNN/SME2)"
case .mnnLLM: return "Qwen3.5-2B (MNN/SME2)"
}
}
@@ -25,7 +25,7 @@ nonisolated enum ModelKind: String, CaseIterable {
var sentinelFilename: String { "config.json" }
/// : / /
/// Qwen3.5-4B(MNN,+,)
/// Qwen3.5-2B(MNN,+,iPhone17+ )
/// MLX .llm/.vl ,(),
/// · ,
static let userFacing: [ModelKind] = [.mnnLLM]