feat(Me): 性能自检卡 — 后端标识 + prefill/decode 实测 + 引擎对比存档

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
link2026
2026-06-10 06:42:59 +08:00
parent 8494e51823
commit a65c63947b
4 changed files with 260 additions and 65 deletions

View File

@@ -28,13 +28,13 @@ struct ModelManagementView: View {
actionButtons actionButtons
.padding(.top, 4) .padding(.top, 4)
if service.states[.mnnLLM]?.phase == .ready { if service.states[.mnnLLM]?.phase == .ready || service.states[.llm]?.phase == .ready {
NavigationLink { NavigationLink {
ModelSelfTestView() ModelSelfTestView()
} label: { } label: {
HStack(spacing: 6) { HStack(spacing: 6) {
Image(systemName: "play.circle") Image(systemName: "gauge.with.needle")
Text("运行推理自检") Text("性能自检")
} }
.frame(maxWidth: .infinity) .frame(maxWidth: .infinity)
} }

View File

@@ -1,11 +1,13 @@
import SwiftUI import SwiftUI
/// : LLM prompt, + tok/s /// : prompt,(MNN·SME2 / MNN·NEON / MLX·GPU)
/// · , /// prefill / decode , (§12 2/6)
struct ModelSelfTestView: View { struct ModelSelfTestView: View {
@State private var output = "" @State private var output = ""
@State private var phase: Phase = .idle @State private var phase: Phase = .idle
@State private var rate: Double = 0 @State private var rate: Double = 0
@State private var lastResult: BenchmarkResult?
@State private var history: [String: BenchmarkResult] = [:]
private enum Phase: Equatable { private enum Phase: Equatable {
case idle, loading, running, done, failed(String) case idle, loading, running, done, failed(String)
@@ -21,8 +23,6 @@ struct ModelSelfTestView: View {
} }
} }
private let prompt = "用中文一句话介绍肝功能里 ALT 这个指标。"
private var isBusy: Bool { phase == .loading || phase == .running } private var isBusy: Bool { phase == .loading || phase == .running }
private var statusColor: Color { private var statusColor: Color {
@@ -34,79 +34,162 @@ struct ModelSelfTestView: View {
} }
var body: some View { var body: some View {
VStack(alignment: .leading, spacing: 16) { ScrollView {
VStack(alignment: .leading, spacing: 6) { VStack(alignment: .leading, spacing: 16) {
Text("测试 PROMPT") promptCard
.font(.tjScaled( 11, weight: .semibold))
.tracking(0.5)
.foregroundStyle(Tj.Palette.text3)
Text(prompt)
.font(.tjScaled( 14))
.foregroundStyle(Tj.Palette.text)
}
.padding(14)
.frame(maxWidth: .infinity, alignment: .leading)
.tjCard()
HStack {
Text(phase.label)
.font(.tjScaled( 13, weight: .medium))
.foregroundStyle(statusColor)
.lineLimit(1)
Spacer()
if rate > 0 {
Text(String(format: "%.1f tok/s", rate))
.font(.tjScaled( 12, design: .monospaced))
.foregroundStyle(Tj.Palette.text3)
}
}
Button {
Task { await run() }
} label: {
Text(isBusy ? "运行中…" : "运行性能自检").frame(maxWidth: .infinity)
}
.buttonStyle(TjPrimaryButton())
.disabled(isBusy)
if isBusy { AIFlowBar() }
if let r = lastResult { statsCard(r) }
outputCard
if !history.isEmpty { historyCard }
}
.padding(16)
}
.background(Tj.Palette.sand.ignoresSafeArea())
.navigationTitle("性能自检")
.navigationBarTitleDisplayMode(.inline)
.onAppear { history = BenchmarkService.load() }
}
private var promptCard: some View {
VStack(alignment: .leading, spacing: 6) {
Text("测试 PROMPT")
.font(.tjScaled( 11, weight: .semibold))
.tracking(0.5)
.foregroundStyle(Tj.Palette.text3)
Text(BenchmarkService.fixedPrompt)
.font(.tjScaled( 14))
.foregroundStyle(Tj.Palette.text)
}
.padding(14)
.frame(maxWidth: .infinity, alignment: .leading)
.tjCard()
}
private func statsCard(_ r: BenchmarkResult) -> some View {
VStack(alignment: .leading, spacing: 10) {
HStack { HStack {
Text(phase.label) Text("本次结果")
.font(.tjScaled( 13, weight: .medium)) .font(.tjScaled( 12, weight: .semibold))
.foregroundStyle(statusColor) .foregroundStyle(Tj.Palette.text2)
.lineLimit(1)
Spacer() Spacer()
if rate > 0 { TjBadge(text: r.backendLabel, style: .leaf)
Text(String(format: "%.1f tok/s", rate)) }
.font(.tjScaled( 12, design: .monospaced)) HStack(spacing: 0) {
.foregroundStyle(Tj.Palette.text3) metric(String(appLoc: "读入"), r.prefillTokensPerSecond > 0
? String(format: "%.0f tok/s", r.prefillTokensPerSecond) : "")
metric(String(appLoc: "生成"), String(format: "%.1f tok/s", r.decodeTokensPerSecond))
metric(String(appLoc: "总耗时"), String(format: "%.1fs", r.totalSeconds))
}
Text(String(appLoc: "prompt \(r.promptTokens) tok · 生成 \(r.genTokens) tok · 100% 本地"))
.font(.tjScaled( 10, design: .monospaced))
.foregroundStyle(Tj.Palette.text3)
}
.padding(14)
.frame(maxWidth: .infinity, alignment: .leading)
.tjCard()
}
private func metric(_ label: String, _ value: String) -> some View {
VStack(spacing: 3) {
Text(value)
.font(.tjScaled( 15, weight: .semibold, design: .monospaced))
.foregroundStyle(Tj.Palette.text)
Text(label)
.font(.tjScaled( 10))
.foregroundStyle(Tj.Palette.text3)
}
.frame(maxWidth: .infinity)
}
private var outputCard: some View {
ScrollView {
Text(output.isEmpty ? "(暂无输出)" : output)
.font(.system(.footnote, design: .monospaced))
.foregroundStyle(Tj.Palette.text)
.frame(maxWidth: .infinity, alignment: .leading)
.textSelection(.enabled)
.padding(12)
}
.frame(maxHeight: 220)
.background(
RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous)
.fill(Tj.Palette.paper)
)
.overlay(
RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous)
.strokeBorder(Tj.Palette.lineSoft, lineWidth: 1)
)
}
private var historyCard: some View {
VStack(alignment: .leading, spacing: 10) {
Text("各引擎实测对比")
.font(.tjScaled( 12, weight: .semibold))
.foregroundStyle(Tj.Palette.text2)
ForEach(history.keys.sorted(), id: \.self) { key in
if let r = history[key] {
HStack {
Text(key)
.font(.tjScaled( 12, weight: .medium))
.foregroundStyle(Tj.Palette.text)
Spacer()
Text(String(format: String(appLoc: "生成 %.1f tok/s"), r.decodeTokensPerSecond))
.font(.tjScaled( 12, design: .monospaced))
.foregroundStyle(Tj.Palette.leaf)
Text(r.date.formatted(.dateTime.month().day()))
.font(.tjScaled( 10))
.foregroundStyle(Tj.Palette.text3)
}
} }
} }
Text("在「我的 · 推理引擎」切换引擎后再跑一次,即可对比 SME2 与 GPU。")
Button { .font(.tjScaled( 10))
Task { await run() } .foregroundStyle(Tj.Palette.text3)
} label: {
Text(isBusy ? "运行中…" : "运行推理自检").frame(maxWidth: .infinity)
}
.buttonStyle(TjPrimaryButton())
.disabled(isBusy)
ScrollView {
Text(output.isEmpty ? "(暂无输出)" : output)
.font(.system(.footnote, design: .monospaced))
.foregroundStyle(Tj.Palette.text)
.frame(maxWidth: .infinity, alignment: .leading)
.textSelection(.enabled)
.padding(12)
}
.frame(maxHeight: 280)
.background(
RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous)
.fill(Tj.Palette.paper)
)
.overlay(
RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous)
.strokeBorder(Tj.Palette.lineSoft, lineWidth: 1)
)
Spacer()
} }
.padding(16) .padding(14)
.background(Tj.Palette.sand.ignoresSafeArea()) .frame(maxWidth: .infinity, alignment: .leading)
.navigationTitle("推理自检") .tjCard()
.navigationBarTitleDisplayMode(.inline)
} }
@MainActor @MainActor
private func run() async { private func run() async {
output = "" output = ""
rate = 0 rate = 0
lastResult = nil
phase = .loading phase = .loading
do { do {
try await AIRuntime.shared.prepare() let result = try await BenchmarkService.shared.run { piece, r in
phase = .running output += piece
for try await chunk in await AIRuntime.shared.generate(prompt: prompt, maxTokens: 200) { if r > 0 { rate = r }
output += chunk.text if phase == .loading { phase = .running }
rate = chunk.decodeRate
} }
lastResult = result
history = BenchmarkService.load()
phase = .done phase = .done
} catch { } catch {
phase = .failed(error.localizedDescription) phase = .failed(error.localizedDescription)

View File

@@ -0,0 +1,67 @@
import Foundation
/// ,MNN·SME2 vs MLX·GPU(§12 2/6)
struct BenchmarkResult: Codable, Equatable {
var backendLabel: String
var promptTokens: Int
var genTokens: Int
var prefillTokensPerSecond: Double
var decodeTokensPerSecond: Double
var totalSeconds: Double
var date: Date
}
/// : prompt, AIRuntime , UserDefaults
/// UI(ModelSelfTestView) AIRuntime(§3.1)
@MainActor
struct BenchmarkService {
static let shared = BenchmarkService()
private init() {}
nonisolated static let storeKey = "kk.benchmark.results"
/// prompt:/
static let fixedPrompt = "用中文一句话介绍肝功能里 ALT 这个指标。"
/// onToken UI
func run(onToken: @escaping @MainActor (String, Double) -> Void) async throws -> BenchmarkResult {
try await AIRuntime.shared.prepare()
let start = Date()
let stream = await AIRuntime.shared.generate(prompt: Self.fixedPrompt, maxTokens: 128)
for try await chunk in stream {
onToken(chunk.text, chunk.decodeRate)
}
let total = Date().timeIntervalSince(start)
let label = await AIRuntime.shared.activeBackendLabel
let stats = await AIRuntime.shared.lastGenerateStats
let result = BenchmarkResult(
backendLabel: label,
promptTokens: stats?.promptTokens ?? 0,
genTokens: stats?.genTokens ?? 0,
prefillTokensPerSecond: stats?.prefillTokensPerSecond ?? 0,
decodeTokensPerSecond: stats?.decodeTokensPerSecond ?? 0,
totalSeconds: total,
date: .now
)
Self.save(result)
return result
}
// MARK: - (,;nonisolated: UserDefaults ,线)
nonisolated static func save(_ result: BenchmarkResult, defaults: UserDefaults = .standard) {
var all = load(defaults: defaults)
all[result.backendLabel] = result
if let data = try? JSONEncoder().encode(all) {
defaults.set(data, forKey: storeKey)
}
}
nonisolated static func load(defaults: UserDefaults = .standard) -> [String: BenchmarkResult] {
guard let data = defaults.data(forKey: storeKey),
let all = try? JSONDecoder().decode([String: BenchmarkResult].self, from: data) else {
return [:]
}
return all
}
}

View File

@@ -0,0 +1,45 @@
import Testing
import Foundation
@testable import
struct BenchmarkStoreTests {
/// suite, Swift Testing
private func freshDefaults(_ name: String) -> UserDefaults {
let suite = "test.kk.benchmark.\(name)"
let d = UserDefaults(suiteName: suite)!
d.removePersistentDomain(forName: suite)
return d
}
@Test func savesAndLoadsPerBackend() {
let d = freshDefaults("savesAndLoads")
let mnn = BenchmarkResult(backendLabel: "MNN · SME2", promptTokens: 30, genTokens: 80,
prefillTokensPerSecond: 120, decodeTokensPerSecond: 25,
totalSeconds: 4.2, date: .now)
let mlx = BenchmarkResult(backendLabel: "MLX · GPU", promptTokens: 30, genTokens: 80,
prefillTokensPerSecond: 300, decodeTokensPerSecond: 40,
totalSeconds: 2.5, date: .now)
BenchmarkService.save(mnn, defaults: d)
BenchmarkService.save(mlx, defaults: d)
let all = BenchmarkService.load(defaults: d)
#expect(all.count == 2)
#expect(all["MNN · SME2"]?.decodeTokensPerSecond == 25)
}
@Test func overwritesSameBackend() {
let d = freshDefaults("overwrites")
let old = BenchmarkResult(backendLabel: "MLX · GPU", promptTokens: 1, genTokens: 1,
prefillTokensPerSecond: 1, decodeTokensPerSecond: 1,
totalSeconds: 1, date: .now)
var new = old
new.decodeTokensPerSecond = 99
BenchmarkService.save(old, defaults: d)
BenchmarkService.save(new, defaults: d)
#expect(BenchmarkService.load(defaults: d)["MLX · GPU"]?.decodeTokensPerSecond == 99)
}
@Test func loadOnEmptyReturnsEmpty() {
#expect(BenchmarkService.load(defaults: freshDefaults("loadEmpty")).isEmpty)
}
}