Files
kangkang/康康/AI/InferenceEngine.swift
link2026 b79ae54b7b ```
feat(iOS): 更新MNN后端模型配置优化性能

将MNN主模型从Qwen3.5-4B(~2.64GiB)降级为Qwen3.5-2B(~1.1GiB),因为4B版本
实测运行过慢,影响用户体验。iPhone17+/SME2设备使用2B模型,保留MLX
兜底方案用于模拟器和备用场景,确保AI推理性能和存储效率的平衡。
```
2026-06-09 22:20:07 +08:00

78 lines
2.7 KiB
Swift

import Foundation
///
/// - mnn:Qwen + MNN + SME2(CPU),,
/// - mlx:Qwen + MLX(Metal GPU), /
nonisolated enum InferenceEngine: String, CaseIterable, Sendable {
case mnn
case mlx
var displayName: String {
switch self {
case .mnn: return "MNN · CPU/SME2"
case .mlx: return "MLX · GPU"
}
}
/// /MNN device ,退 MLX
var isAvailable: Bool {
switch self {
case .mlx: return true
case .mnn: return MNNLLMBridge.isAvailable()
}
}
// MARK: - (UserDefaults, actor )
private static let key = "kk.inferenceEngine"
/// ( .auto)使
/// AIRuntime / MeView , .mnn .mlx
/// ,
static var current: InferenceEngine {
let resolved = preference.resolved
return resolved.isAvailable ? resolved : .mlx
}
/// :CPU SME2(A19/iPhone17+) UI
static var cpuSupportsSME2: Bool { MNNLLMBridge.cpuSupportsSME2() }
// MARK: - (auto / mnn / mlx)
/// .auto:
/// UserDefaults key "mnn"/"mlx"
static var preference: EnginePreference {
get {
let raw = UserDefaults.standard.string(forKey: key)
return raw.flatMap(EnginePreference.init(rawValue:)) ?? .auto
}
set { UserDefaults.standard.set(newValue.rawValue, forKey: key) }
}
}
/// , .auto
/// - auto: MNN(, SME2/NEON),
/// MNN ()退 MLX
nonisolated enum EnginePreference: String, CaseIterable, Sendable {
case auto
case mnn
case mlx
var displayName: String {
switch self {
case .auto: return "自动"
case .mnn: return InferenceEngine.mnn.displayName
case .mlx: return InferenceEngine.mlx.displayName
}
}
/// (, `InferenceEngine.current`)
var resolved: InferenceEngine {
switch self {
case .mnn: return .mnn
case .mlx: return .mlx
case .auto: return InferenceEngine.mnn.isAvailable ? .mnn : .mlx
}
}
}