feat(iOS): 更新MNN后端模型配置优化性能

将MNN主模型从Qwen3.5-4B(~2.64GiB)降级为Qwen3.5-2B(~1.1GiB),因为4B版本
实测运行过慢,影响用户体验。iPhone17+/SME2设备使用2B模型,保留MLX
兜底方案用于模拟器和备用场景,确保AI推理性能和存储效率的平衡。
```
This commit is contained in:
link2026
2026-06-09 22:20:07 +08:00
parent ca5a3fa38b
commit b79ae54b7b
40 changed files with 1327 additions and 452 deletions

View File

@@ -28,7 +28,7 @@ final class HealthExport {
var inferredLabelCN: String?
// demo
/// tag, "Qwen3.5-2B-4bit"
/// tag, "Qwen3.5-2B-MNN"(iPhone17+ ) "Qwen3.5-2B-4bit"(MLX )
var modelTag: String
/// tok/s, demo #6 Live Activity
var decodeRate: Double
@@ -44,7 +44,7 @@ final class HealthExport {
inferredTimeToDate: Date? = nil,
inferredIntent: String? = nil,
inferredLabelCN: String? = nil,
modelTag: String = "Qwen3.5-2B-4bit",
modelTag: String = "Qwen3.5-2B-MNN",
decodeRate: Double = 0) {
self.prompt = prompt
self.content = content