```

feat(iOS): 更新MNN后端模型配置优化性能将MNN主模型从Qwen3.5-4B(~2.64GiB)降级为Qwen3.5-2B(~1.1GiB)，因为4B版本实测运行过慢，影响用户体验。iPhone17+/SME2设备使用2B模型，保留MLX 兜底方案用于模拟器和备用场景，确保AI推理性能和存储效率的平衡。 ```
2026-06-09 22:20:07 +08:00
parent ca5a3fa38b
commit b79ae54b7b
40 changed files with 1327 additions and 452 deletions
--- a/康康/AI/InferenceEngine.swift
+++ b/康康/AI/InferenceEngine.swift
@@ -26,16 +26,52 @@ nonisolated enum InferenceEngine: String, CaseIterable, Sendable {

    private static let key = "kk.inferenceEngine"

-    /// 当前选择。无效/不可用时回退到 .mlx(保证总有可用引擎)。真机默认 .mnn。
+    /// 由偏好(可能是 .auto)解析出的、本次调用实际使用的具体引擎。
+    /// AIRuntime / MeView 等消费方只看这个,永远拿到 .mnn 或 .mlx。
+    /// 解析后仍做一次可用性兜底,保证总有可用引擎。
    static var current: InferenceEngine {
-        get {
-            let raw = UserDefaults.standard.string(forKey: key)
-            let chosen = raw.flatMap(InferenceEngine.init(rawValue:)) ?? .mnn
-            return chosen.isAvailable ? chosen : .mlx
-        }
-        set { UserDefaults.standard.set(newValue.rawValue, forKey: key) }
+        let resolved = preference.resolved
+        return resolved.isAvailable ? resolved : .mlx
    }

    /// 运行时探测:CPU 是否支持 SME2(A19/iPhone17+)。用于 UI 展示加速状态。
    static var cpuSupportsSME2: Bool { MNNLLMBridge.cpuSupportsSME2() }
+
+    // MARK: - 用户偏好(auto / mnn / mlx)
+
+    /// 用户在设置页的选择。默认 .auto:按本机配置自动择优。
+    /// 与具体引擎共用同一 UserDefaults key——历史写入的 "mnn"/"mlx" 仍兼容。
+    static var preference: EnginePreference {
+        get {
+            let raw = UserDefaults.standard.string(forKey: key)
+            return raw.flatMap(EnginePreference.init(rawValue:)) ?? .auto
+        }
+        set { UserDefaults.standard.set(newValue.rawValue, forKey: key) }
+    }
+}
+
+/// 推理引擎的「用户偏好」,比具体引擎多一个 .auto。
+/// - auto:按本机配置自动选——真机优先 MNN(考核路径,含 SME2/NEON),
+///         MNN 不可用(模拟器)时回退 MLX。
+nonisolated enum EnginePreference: String, CaseIterable, Sendable {
+    case auto
+    case mnn
+    case mlx
+
+    var displayName: String {
+        switch self {
+        case .auto: return "自动"
+        case .mnn:  return InferenceEngine.mnn.displayName
+        case .mlx:  return InferenceEngine.mlx.displayName
+        }
+    }
+
+    /// 把偏好解析成具体引擎(不做可用性兜底,那一步留给 `InferenceEngine.current`)。
+    var resolved: InferenceEngine {
+        switch self {
+        case .mnn:  return .mnn
+        case .mlx:  return .mlx
+        case .auto: return InferenceEngine.mnn.isAvailable ? .mnn : .mlx
+        }
+    }
 }