feat(AI): 双后端路由 MNN/MLX,AIRuntime 按引擎分发(Phase 3 核心)
- InferenceEngine:引擎枚举(.mnn 默认 / .mlx 兜底)+ UserDefaults 持久化 + 可用性/SME2 运行时探测(经 MNNLLMBridge) - MNNBackend:actor 封装 MNNLLMBridge 文本流式生成,detached 线程跑同步 response、按 UTF-8 边界 yield TokenChunk,串行化交给 AIRuntime 闸门 - AIRuntime:prepare/generate 按引擎分发;.mnn 且模型就绪→MNN,否则回退 MLX (过渡期 App 始终可用);prepareVL/单模型常驻时互卸 MNN↔MLX 释放内存 公有 API 不变,各 Service 零改动 模拟器 BUILD SUCCEEDED,0 error。引擎切换 UI + SME2 指示留待 Phase 5。 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
41
康康/AI/InferenceEngine.swift
Normal file
41
康康/AI/InferenceEngine.swift
Normal file
@@ -0,0 +1,41 @@
|
||||
import Foundation
|
||||
|
||||
/// 端侧推理引擎选择。
|
||||
/// - mnn:Qwen + MNN + SME2(CPU),挑战赛考核路径,真机默认。
|
||||
/// - mlx:Qwen + MLX(Metal GPU),兜底 / 对照。模拟器只有它可用。
|
||||
nonisolated enum InferenceEngine: String, CaseIterable, Sendable {
|
||||
case mnn
|
||||
case mlx
|
||||
|
||||
var displayName: String {
|
||||
switch self {
|
||||
case .mnn: return "MNN · CPU/SME2"
|
||||
case .mlx: return "MLX · GPU"
|
||||
}
|
||||
}
|
||||
|
||||
/// 本构建/设备是否可用。MNN 仅 device 切片有真实内核,模拟器回退 MLX。
|
||||
var isAvailable: Bool {
|
||||
switch self {
|
||||
case .mlx: return true
|
||||
case .mnn: return MNNLLMBridge.isAvailable()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - 持久化(UserDefaults,跨 actor 安全)
|
||||
|
||||
private static let key = "kk.inferenceEngine"
|
||||
|
||||
/// 当前选择。无效/不可用时回退到 .mlx(保证总有可用引擎)。真机默认 .mnn。
|
||||
static var current: InferenceEngine {
|
||||
get {
|
||||
let raw = UserDefaults.standard.string(forKey: key)
|
||||
let chosen = raw.flatMap(InferenceEngine.init(rawValue:)) ?? .mnn
|
||||
return chosen.isAvailable ? chosen : .mlx
|
||||
}
|
||||
set { UserDefaults.standard.set(newValue.rawValue, forKey: key) }
|
||||
}
|
||||
|
||||
/// 运行时探测:CPU 是否支持 SME2(A19/iPhone17+)。用于 UI 展示加速状态。
|
||||
static var cpuSupportsSME2: Bool { MNNLLMBridge.cpuSupportsSME2() }
|
||||
}
|
||||
Reference in New Issue
Block a user