feat(iOS): 更新MNN后端模型配置优化性能 将MNN主模型从Qwen3.5-4B(~2.64GiB)降级为Qwen3.5-2B(~1.1GiB),因为4B版本 实测运行过慢,影响用户体验。iPhone17+/SME2设备使用2B模型,保留MLX 兜底方案用于模拟器和备用场景,确保AI推理性能和存储效率的平衡。 ```
154 lines
5.7 KiB
Swift
154 lines
5.7 KiB
Swift
import SwiftUI
|
|
|
|
/// 推理引擎设置:在 MNN(CPU/SME2,考核路径)与 MLX(GPU,兜底)间切换,并展示 SME2 探测状态。
|
|
/// 切换只改持久化选择;下一次 AI 调用(prepare/generate)按新引擎加载。
|
|
struct InferenceSettingsView: View {
|
|
@AppStorage("kk.inferenceEngine") private var engineRaw = EnginePreference.auto.rawValue
|
|
|
|
private var selected: EnginePreference {
|
|
EnginePreference(rawValue: engineRaw) ?? .auto
|
|
}
|
|
|
|
var body: some View {
|
|
ScrollView {
|
|
VStack(spacing: 12) {
|
|
HStack {
|
|
Text("推理引擎")
|
|
.font(.tjTitle())
|
|
.foregroundStyle(Tj.Palette.text)
|
|
Spacer()
|
|
}
|
|
.padding(.top, 4)
|
|
.padding(.bottom, 6)
|
|
|
|
ForEach(EnginePreference.allCases, id: \.self) { engine in
|
|
engineRow(engine)
|
|
}
|
|
|
|
sme2Card
|
|
noteCard
|
|
}
|
|
.padding(.horizontal, 16)
|
|
.padding(.vertical, 20)
|
|
}
|
|
.background(Tj.Palette.sand.ignoresSafeArea())
|
|
}
|
|
|
|
private func engineRow(_ engine: EnginePreference) -> some View {
|
|
let available = isAvailable(engine)
|
|
let isOn = (selected == engine)
|
|
return Button {
|
|
guard available else { return }
|
|
engineRaw = engine.rawValue
|
|
} label: {
|
|
HStack(spacing: 12) {
|
|
ZStack {
|
|
Circle().fill(isOn ? Tj.Palette.amber.opacity(0.25) : Tj.Palette.sand2)
|
|
Image(systemName: iconName(engine))
|
|
.font(.tjScaled(18))
|
|
.foregroundStyle(isOn ? Tj.Palette.ink : Tj.Palette.text2)
|
|
}
|
|
.frame(width: 44, height: 44)
|
|
|
|
VStack(alignment: .leading, spacing: 2) {
|
|
Text(engine.displayName)
|
|
.font(.tjScaled(15, weight: .semibold))
|
|
.foregroundStyle(Tj.Palette.text)
|
|
Text(subtitle(engine, available: available))
|
|
.font(.tjScaled(12))
|
|
.foregroundStyle(Tj.Palette.text3)
|
|
.lineLimit(2)
|
|
}
|
|
Spacer()
|
|
if isOn {
|
|
Image(systemName: "checkmark.circle.fill")
|
|
.font(.tjScaled(18))
|
|
.foregroundStyle(Tj.Palette.leaf)
|
|
}
|
|
}
|
|
.padding(14)
|
|
.tjCard()
|
|
.opacity(available ? 1 : 0.45)
|
|
}
|
|
.buttonStyle(.plain)
|
|
.disabled(!available)
|
|
}
|
|
|
|
/// .auto 永远可用;具体引擎看自身可用性。
|
|
private func isAvailable(_ engine: EnginePreference) -> Bool {
|
|
switch engine {
|
|
case .auto: return true
|
|
case .mnn: return InferenceEngine.mnn.isAvailable
|
|
case .mlx: return InferenceEngine.mlx.isAvailable
|
|
}
|
|
}
|
|
|
|
private func iconName(_ engine: EnginePreference) -> String {
|
|
switch engine {
|
|
case .auto: return "wand.and.stars"
|
|
case .mnn: return "cpu.fill"
|
|
case .mlx: return "bolt.fill"
|
|
}
|
|
}
|
|
|
|
private func subtitle(_ engine: EnginePreference, available: Bool) -> String {
|
|
switch engine {
|
|
case .auto:
|
|
// 显示自动解析后实际命中的引擎,让用户看清「这台机选了什么」。
|
|
let resolved = engine.resolved
|
|
if resolved == .mnn {
|
|
return InferenceEngine.cpuSupportsSME2
|
|
? String(appLoc: "按本机配置选择 · 当前 MNN + SME2")
|
|
: String(appLoc: "按本机配置选择 · 当前 MNN(NEON)")
|
|
} else {
|
|
return String(appLoc: "按本机配置选择 · 当前 MLX(MNN 不可用)")
|
|
}
|
|
case .mnn:
|
|
if !available { return String(appLoc: "本设备/模拟器不可用,自动回退 MLX") }
|
|
return InferenceEngine.cpuSupportsSME2
|
|
? String(appLoc: "端侧 CPU + SME2 加速 · 挑战赛考核路径")
|
|
: String(appLoc: "端侧 CPU(本机无 SME2,NEON 回退)")
|
|
case .mlx:
|
|
return String(appLoc: "Metal GPU · 兜底 / 对照")
|
|
}
|
|
}
|
|
|
|
private var sme2Card: some View {
|
|
let sme2 = InferenceEngine.cpuSupportsSME2
|
|
return HStack(spacing: 12) {
|
|
ZStack {
|
|
Circle().fill(sme2 ? Tj.Palette.leafSoft : Tj.Palette.sand2)
|
|
Image(systemName: sme2 ? "checkmark.seal.fill" : "minus.circle")
|
|
.font(.tjScaled(18))
|
|
.foregroundStyle(sme2 ? Tj.Palette.ink : Tj.Palette.text2)
|
|
}
|
|
.frame(width: 44, height: 44)
|
|
VStack(alignment: .leading, spacing: 2) {
|
|
Text("Arm SME2")
|
|
.font(.tjScaled(15, weight: .medium))
|
|
.foregroundStyle(Tj.Palette.text)
|
|
Text(sme2 ? String(appLoc: "本设备支持,MNN 已启用 SME2 加速")
|
|
: String(appLoc: "本设备不支持(需 A19/iPhone 17+)"))
|
|
.font(.tjScaled(12))
|
|
.foregroundStyle(Tj.Palette.text3)
|
|
}
|
|
Spacer()
|
|
}
|
|
.padding(14)
|
|
.tjCard()
|
|
}
|
|
|
|
private var noteCard: some View {
|
|
Text("MNN 在端侧 CPU 上以 Arm SME2 指令集加速 Qwen 推理(本地、不上云)。切换后下一次 AI 调用生效。")
|
|
.font(.tjScaled(12))
|
|
.foregroundStyle(Tj.Palette.text3)
|
|
.frame(maxWidth: .infinity, alignment: .leading)
|
|
.padding(14)
|
|
.tjCard()
|
|
}
|
|
}
|
|
|
|
#Preview {
|
|
InferenceSettingsView()
|
|
}
|