import Foundation /// 单次性能自检结果。按后端标签归档,供「MNN·SME2 vs MLX·GPU」对比展示(§12 卖点 2/6)。 struct BenchmarkResult: Codable, Equatable { var backendLabel: String var promptTokens: Int var genTokens: Int var prefillTokensPerSecond: Double var decodeTokensPerSecond: Double var totalSeconds: Double var date: Date } /// 性能自检服务:跑固定 prompt,取 AIRuntime 的归一统计,按后端标签存 UserDefaults。 /// UI(ModelSelfTestView)只经本服务调 AIRuntime(§3.1)。 @MainActor struct BenchmarkService { static let shared = BenchmarkService() private init() {} nonisolated static let storeKey = "kk.benchmark.results" /// 固定测试 prompt:跨设备/引擎可比的前提。 static let fixedPrompt = "用中文一句话介绍肝功能里 ALT 这个指标。" /// 跑一次自检。onToken 把流式输出交给 UI 展示。 func run(onToken: @escaping @MainActor (String, Double) -> Void) async throws -> BenchmarkResult { try await AIRuntime.shared.prepare() let start = Date() let stream = await AIRuntime.shared.generate(prompt: Self.fixedPrompt, maxTokens: 128) for try await chunk in stream { onToken(chunk.text, chunk.decodeRate) } let total = Date().timeIntervalSince(start) let label = await AIRuntime.shared.activeBackendLabel let stats = await AIRuntime.shared.lastGenerateStats let result = BenchmarkResult( backendLabel: label, promptTokens: stats?.promptTokens ?? 0, genTokens: stats?.genTokens ?? 0, prefillTokensPerSecond: stats?.prefillTokensPerSecond ?? 0, decodeTokensPerSecond: stats?.decodeTokensPerSecond ?? 0, totalSeconds: total, date: .now ) Self.save(result) return result } // MARK: - 存档(静态纯函数,单测覆盖;nonisolated:纯 UserDefaults 操作,无需主线程) nonisolated static func save(_ result: BenchmarkResult, defaults: UserDefaults = .standard) { var all = load(defaults: defaults) all[result.backendLabel] = result if let data = try? JSONEncoder().encode(all) { defaults.set(data, forKey: storeKey) } } nonisolated static func load(defaults: UserDefaults = .standard) -> [String: BenchmarkResult] { guard let data = defaults.data(forKey: storeKey), let all = try? JSONDecoder().decode([String: BenchmarkResult].self, from: data) else { return [:] } return all } }