feat(Me): 性能自检卡 — 后端标识 + prefill/decode 实测 + 引擎对比存档
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -28,13 +28,13 @@ struct ModelManagementView: View {
|
||||
actionButtons
|
||||
.padding(.top, 4)
|
||||
|
||||
if service.states[.mnnLLM]?.phase == .ready {
|
||||
if service.states[.mnnLLM]?.phase == .ready || service.states[.llm]?.phase == .ready {
|
||||
NavigationLink {
|
||||
ModelSelfTestView()
|
||||
} label: {
|
||||
HStack(spacing: 6) {
|
||||
Image(systemName: "play.circle")
|
||||
Text("运行推理自检")
|
||||
Image(systemName: "gauge.with.needle")
|
||||
Text("性能自检")
|
||||
}
|
||||
.frame(maxWidth: .infinity)
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import SwiftUI
|
||||
|
||||
/// 模型推理自检:加载 LLM 跑一段固定 prompt,流式显示输出 + tok/s。
|
||||
/// 模型就绪后从「我的 · 模型管理」进入,用于现场快速验证本地推理是否正常。
|
||||
/// 性能自检:跑固定 prompt,展示当前后端(MNN·SME2 / MNN·NEON / MLX·GPU)的
|
||||
/// prefill / decode 实测速度,并按后端存档对比 —— 挑战赛考核点的可见证据(§12 卖点 2/6)。
|
||||
struct ModelSelfTestView: View {
|
||||
@State private var output = ""
|
||||
@State private var phase: Phase = .idle
|
||||
@State private var rate: Double = 0
|
||||
@State private var lastResult: BenchmarkResult?
|
||||
@State private var history: [String: BenchmarkResult] = [:]
|
||||
|
||||
private enum Phase: Equatable {
|
||||
case idle, loading, running, done, failed(String)
|
||||
@@ -21,8 +23,6 @@ struct ModelSelfTestView: View {
|
||||
}
|
||||
}
|
||||
|
||||
private let prompt = "用中文一句话介绍肝功能里 ALT 这个指标。"
|
||||
|
||||
private var isBusy: Bool { phase == .loading || phase == .running }
|
||||
|
||||
private var statusColor: Color {
|
||||
@@ -34,79 +34,162 @@ struct ModelSelfTestView: View {
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack(alignment: .leading, spacing: 16) {
|
||||
VStack(alignment: .leading, spacing: 6) {
|
||||
Text("测试 PROMPT")
|
||||
.font(.tjScaled( 11, weight: .semibold))
|
||||
.tracking(0.5)
|
||||
.foregroundStyle(Tj.Palette.text3)
|
||||
Text(prompt)
|
||||
.font(.tjScaled( 14))
|
||||
.foregroundStyle(Tj.Palette.text)
|
||||
}
|
||||
.padding(14)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.tjCard()
|
||||
ScrollView {
|
||||
VStack(alignment: .leading, spacing: 16) {
|
||||
promptCard
|
||||
|
||||
HStack {
|
||||
Text(phase.label)
|
||||
.font(.tjScaled( 13, weight: .medium))
|
||||
.foregroundStyle(statusColor)
|
||||
.lineLimit(1)
|
||||
Spacer()
|
||||
if rate > 0 {
|
||||
Text(String(format: "%.1f tok/s", rate))
|
||||
.font(.tjScaled( 12, design: .monospaced))
|
||||
.foregroundStyle(Tj.Palette.text3)
|
||||
}
|
||||
}
|
||||
|
||||
Button {
|
||||
Task { await run() }
|
||||
} label: {
|
||||
Text(isBusy ? "运行中…" : "运行性能自检").frame(maxWidth: .infinity)
|
||||
}
|
||||
.buttonStyle(TjPrimaryButton())
|
||||
.disabled(isBusy)
|
||||
|
||||
if isBusy { AIFlowBar() }
|
||||
|
||||
if let r = lastResult { statsCard(r) }
|
||||
|
||||
outputCard
|
||||
|
||||
if !history.isEmpty { historyCard }
|
||||
}
|
||||
.padding(16)
|
||||
}
|
||||
.background(Tj.Palette.sand.ignoresSafeArea())
|
||||
.navigationTitle("性能自检")
|
||||
.navigationBarTitleDisplayMode(.inline)
|
||||
.onAppear { history = BenchmarkService.load() }
|
||||
}
|
||||
|
||||
private var promptCard: some View {
|
||||
VStack(alignment: .leading, spacing: 6) {
|
||||
Text("测试 PROMPT")
|
||||
.font(.tjScaled( 11, weight: .semibold))
|
||||
.tracking(0.5)
|
||||
.foregroundStyle(Tj.Palette.text3)
|
||||
Text(BenchmarkService.fixedPrompt)
|
||||
.font(.tjScaled( 14))
|
||||
.foregroundStyle(Tj.Palette.text)
|
||||
}
|
||||
.padding(14)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.tjCard()
|
||||
}
|
||||
|
||||
private func statsCard(_ r: BenchmarkResult) -> some View {
|
||||
VStack(alignment: .leading, spacing: 10) {
|
||||
HStack {
|
||||
Text(phase.label)
|
||||
.font(.tjScaled( 13, weight: .medium))
|
||||
.foregroundStyle(statusColor)
|
||||
.lineLimit(1)
|
||||
Text("本次结果")
|
||||
.font(.tjScaled( 12, weight: .semibold))
|
||||
.foregroundStyle(Tj.Palette.text2)
|
||||
Spacer()
|
||||
if rate > 0 {
|
||||
Text(String(format: "%.1f tok/s", rate))
|
||||
.font(.tjScaled( 12, design: .monospaced))
|
||||
.foregroundStyle(Tj.Palette.text3)
|
||||
TjBadge(text: r.backendLabel, style: .leaf)
|
||||
}
|
||||
HStack(spacing: 0) {
|
||||
metric(String(appLoc: "读入"), r.prefillTokensPerSecond > 0
|
||||
? String(format: "%.0f tok/s", r.prefillTokensPerSecond) : "—")
|
||||
metric(String(appLoc: "生成"), String(format: "%.1f tok/s", r.decodeTokensPerSecond))
|
||||
metric(String(appLoc: "总耗时"), String(format: "%.1fs", r.totalSeconds))
|
||||
}
|
||||
Text(String(appLoc: "prompt \(r.promptTokens) tok · 生成 \(r.genTokens) tok · 100% 本地"))
|
||||
.font(.tjScaled( 10, design: .monospaced))
|
||||
.foregroundStyle(Tj.Palette.text3)
|
||||
}
|
||||
.padding(14)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.tjCard()
|
||||
}
|
||||
|
||||
private func metric(_ label: String, _ value: String) -> some View {
|
||||
VStack(spacing: 3) {
|
||||
Text(value)
|
||||
.font(.tjScaled( 15, weight: .semibold, design: .monospaced))
|
||||
.foregroundStyle(Tj.Palette.text)
|
||||
Text(label)
|
||||
.font(.tjScaled( 10))
|
||||
.foregroundStyle(Tj.Palette.text3)
|
||||
}
|
||||
.frame(maxWidth: .infinity)
|
||||
}
|
||||
|
||||
private var outputCard: some View {
|
||||
ScrollView {
|
||||
Text(output.isEmpty ? "(暂无输出)" : output)
|
||||
.font(.system(.footnote, design: .monospaced))
|
||||
.foregroundStyle(Tj.Palette.text)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.textSelection(.enabled)
|
||||
.padding(12)
|
||||
}
|
||||
.frame(maxHeight: 220)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous)
|
||||
.fill(Tj.Palette.paper)
|
||||
)
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous)
|
||||
.strokeBorder(Tj.Palette.lineSoft, lineWidth: 1)
|
||||
)
|
||||
}
|
||||
|
||||
private var historyCard: some View {
|
||||
VStack(alignment: .leading, spacing: 10) {
|
||||
Text("各引擎实测对比")
|
||||
.font(.tjScaled( 12, weight: .semibold))
|
||||
.foregroundStyle(Tj.Palette.text2)
|
||||
ForEach(history.keys.sorted(), id: \.self) { key in
|
||||
if let r = history[key] {
|
||||
HStack {
|
||||
Text(key)
|
||||
.font(.tjScaled( 12, weight: .medium))
|
||||
.foregroundStyle(Tj.Palette.text)
|
||||
Spacer()
|
||||
Text(String(format: String(appLoc: "生成 %.1f tok/s"), r.decodeTokensPerSecond))
|
||||
.font(.tjScaled( 12, design: .monospaced))
|
||||
.foregroundStyle(Tj.Palette.leaf)
|
||||
Text(r.date.formatted(.dateTime.month().day()))
|
||||
.font(.tjScaled( 10))
|
||||
.foregroundStyle(Tj.Palette.text3)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Button {
|
||||
Task { await run() }
|
||||
} label: {
|
||||
Text(isBusy ? "运行中…" : "运行推理自检").frame(maxWidth: .infinity)
|
||||
}
|
||||
.buttonStyle(TjPrimaryButton())
|
||||
.disabled(isBusy)
|
||||
|
||||
ScrollView {
|
||||
Text(output.isEmpty ? "(暂无输出)" : output)
|
||||
.font(.system(.footnote, design: .monospaced))
|
||||
.foregroundStyle(Tj.Palette.text)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.textSelection(.enabled)
|
||||
.padding(12)
|
||||
}
|
||||
.frame(maxHeight: 280)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous)
|
||||
.fill(Tj.Palette.paper)
|
||||
)
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous)
|
||||
.strokeBorder(Tj.Palette.lineSoft, lineWidth: 1)
|
||||
)
|
||||
|
||||
Spacer()
|
||||
Text("在「我的 · 推理引擎」切换引擎后再跑一次,即可对比 SME2 与 GPU。")
|
||||
.font(.tjScaled( 10))
|
||||
.foregroundStyle(Tj.Palette.text3)
|
||||
}
|
||||
.padding(16)
|
||||
.background(Tj.Palette.sand.ignoresSafeArea())
|
||||
.navigationTitle("推理自检")
|
||||
.navigationBarTitleDisplayMode(.inline)
|
||||
.padding(14)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.tjCard()
|
||||
}
|
||||
|
||||
@MainActor
|
||||
private func run() async {
|
||||
output = ""
|
||||
rate = 0
|
||||
lastResult = nil
|
||||
phase = .loading
|
||||
do {
|
||||
try await AIRuntime.shared.prepare()
|
||||
phase = .running
|
||||
for try await chunk in await AIRuntime.shared.generate(prompt: prompt, maxTokens: 200) {
|
||||
output += chunk.text
|
||||
rate = chunk.decodeRate
|
||||
let result = try await BenchmarkService.shared.run { piece, r in
|
||||
output += piece
|
||||
if r > 0 { rate = r }
|
||||
if phase == .loading { phase = .running }
|
||||
}
|
||||
lastResult = result
|
||||
history = BenchmarkService.load()
|
||||
phase = .done
|
||||
} catch {
|
||||
phase = .failed(error.localizedDescription)
|
||||
|
||||
67
康康/Services/BenchmarkService.swift
Normal file
67
康康/Services/BenchmarkService.swift
Normal file
@@ -0,0 +1,67 @@
|
||||
import Foundation
|
||||
|
||||
/// 单次性能自检结果。按后端标签归档,供「MNN·SME2 vs MLX·GPU」对比展示(§12 卖点 2/6)。
|
||||
struct BenchmarkResult: Codable, Equatable {
|
||||
var backendLabel: String
|
||||
var promptTokens: Int
|
||||
var genTokens: Int
|
||||
var prefillTokensPerSecond: Double
|
||||
var decodeTokensPerSecond: Double
|
||||
var totalSeconds: Double
|
||||
var date: Date
|
||||
}
|
||||
|
||||
/// 性能自检服务:跑固定 prompt,取 AIRuntime 的归一统计,按后端标签存 UserDefaults。
|
||||
/// UI(ModelSelfTestView)只经本服务调 AIRuntime(§3.1)。
|
||||
@MainActor
|
||||
struct BenchmarkService {
|
||||
static let shared = BenchmarkService()
|
||||
private init() {}
|
||||
|
||||
nonisolated static let storeKey = "kk.benchmark.results"
|
||||
|
||||
/// 固定测试 prompt:跨设备/引擎可比的前提。
|
||||
static let fixedPrompt = "用中文一句话介绍肝功能里 ALT 这个指标。"
|
||||
|
||||
/// 跑一次自检。onToken 把流式输出交给 UI 展示。
|
||||
func run(onToken: @escaping @MainActor (String, Double) -> Void) async throws -> BenchmarkResult {
|
||||
try await AIRuntime.shared.prepare()
|
||||
let start = Date()
|
||||
let stream = await AIRuntime.shared.generate(prompt: Self.fixedPrompt, maxTokens: 128)
|
||||
for try await chunk in stream {
|
||||
onToken(chunk.text, chunk.decodeRate)
|
||||
}
|
||||
let total = Date().timeIntervalSince(start)
|
||||
let label = await AIRuntime.shared.activeBackendLabel
|
||||
let stats = await AIRuntime.shared.lastGenerateStats
|
||||
let result = BenchmarkResult(
|
||||
backendLabel: label,
|
||||
promptTokens: stats?.promptTokens ?? 0,
|
||||
genTokens: stats?.genTokens ?? 0,
|
||||
prefillTokensPerSecond: stats?.prefillTokensPerSecond ?? 0,
|
||||
decodeTokensPerSecond: stats?.decodeTokensPerSecond ?? 0,
|
||||
totalSeconds: total,
|
||||
date: .now
|
||||
)
|
||||
Self.save(result)
|
||||
return result
|
||||
}
|
||||
|
||||
// MARK: - 存档(静态纯函数,单测覆盖;nonisolated:纯 UserDefaults 操作,无需主线程)
|
||||
|
||||
nonisolated static func save(_ result: BenchmarkResult, defaults: UserDefaults = .standard) {
|
||||
var all = load(defaults: defaults)
|
||||
all[result.backendLabel] = result
|
||||
if let data = try? JSONEncoder().encode(all) {
|
||||
defaults.set(data, forKey: storeKey)
|
||||
}
|
||||
}
|
||||
|
||||
nonisolated static func load(defaults: UserDefaults = .standard) -> [String: BenchmarkResult] {
|
||||
guard let data = defaults.data(forKey: storeKey),
|
||||
let all = try? JSONDecoder().decode([String: BenchmarkResult].self, from: data) else {
|
||||
return [:]
|
||||
}
|
||||
return all
|
||||
}
|
||||
}
|
||||
45
康康Tests/BenchmarkStoreTests.swift
Normal file
45
康康Tests/BenchmarkStoreTests.swift
Normal file
@@ -0,0 +1,45 @@
|
||||
import Testing
|
||||
import Foundation
|
||||
@testable import 康康
|
||||
|
||||
struct BenchmarkStoreTests {
|
||||
|
||||
/// 每个用例独立 suite,避免 Swift Testing 并行执行时互相清空数据。
|
||||
private func freshDefaults(_ name: String) -> UserDefaults {
|
||||
let suite = "test.kk.benchmark.\(name)"
|
||||
let d = UserDefaults(suiteName: suite)!
|
||||
d.removePersistentDomain(forName: suite)
|
||||
return d
|
||||
}
|
||||
|
||||
@Test func savesAndLoadsPerBackend() {
|
||||
let d = freshDefaults("savesAndLoads")
|
||||
let mnn = BenchmarkResult(backendLabel: "MNN · SME2", promptTokens: 30, genTokens: 80,
|
||||
prefillTokensPerSecond: 120, decodeTokensPerSecond: 25,
|
||||
totalSeconds: 4.2, date: .now)
|
||||
let mlx = BenchmarkResult(backendLabel: "MLX · GPU", promptTokens: 30, genTokens: 80,
|
||||
prefillTokensPerSecond: 300, decodeTokensPerSecond: 40,
|
||||
totalSeconds: 2.5, date: .now)
|
||||
BenchmarkService.save(mnn, defaults: d)
|
||||
BenchmarkService.save(mlx, defaults: d)
|
||||
let all = BenchmarkService.load(defaults: d)
|
||||
#expect(all.count == 2)
|
||||
#expect(all["MNN · SME2"]?.decodeTokensPerSecond == 25)
|
||||
}
|
||||
|
||||
@Test func overwritesSameBackend() {
|
||||
let d = freshDefaults("overwrites")
|
||||
let old = BenchmarkResult(backendLabel: "MLX · GPU", promptTokens: 1, genTokens: 1,
|
||||
prefillTokensPerSecond: 1, decodeTokensPerSecond: 1,
|
||||
totalSeconds: 1, date: .now)
|
||||
var new = old
|
||||
new.decodeTokensPerSecond = 99
|
||||
BenchmarkService.save(old, defaults: d)
|
||||
BenchmarkService.save(new, defaults: d)
|
||||
#expect(BenchmarkService.load(defaults: d)["MLX · GPU"]?.decodeTokensPerSecond == 99)
|
||||
}
|
||||
|
||||
@Test func loadOnEmptyReturnsEmpty() {
|
||||
#expect(BenchmarkService.load(defaults: freshDefaults("loadEmpty")).isEmpty)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user