From a65c63947b452294b0b2d88652989f675512e9ab Mon Sep 17 00:00:00 2001 From: link2026 Date: Wed, 10 Jun 2026 06:42:59 +0800 Subject: [PATCH] =?UTF-8?q?feat(Me):=20=E6=80=A7=E8=83=BD=E8=87=AA?= =?UTF-8?q?=E6=A3=80=E5=8D=A1=20=E2=80=94=20=E5=90=8E=E7=AB=AF=E6=A0=87?= =?UTF-8?q?=E8=AF=86=20+=20prefill/decode=20=E5=AE=9E=E6=B5=8B=20+=20?= =?UTF-8?q?=E5=BC=95=E6=93=8E=E5=AF=B9=E6=AF=94=E5=AD=98=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 --- 康康/Features/Me/ModelManagementView.swift | 6 +- 康康/Features/Me/ModelSelfTestView.swift | 207 +++++++++++++++------ 康康/Services/BenchmarkService.swift | 67 +++++++ 康康Tests/BenchmarkStoreTests.swift | 45 +++++ 4 files changed, 260 insertions(+), 65 deletions(-) create mode 100644 康康/Services/BenchmarkService.swift create mode 100644 康康Tests/BenchmarkStoreTests.swift diff --git a/康康/Features/Me/ModelManagementView.swift b/康康/Features/Me/ModelManagementView.swift index e681b2d..54953db 100644 --- a/康康/Features/Me/ModelManagementView.swift +++ b/康康/Features/Me/ModelManagementView.swift @@ -28,13 +28,13 @@ struct ModelManagementView: View { actionButtons .padding(.top, 4) - if service.states[.mnnLLM]?.phase == .ready { + if service.states[.mnnLLM]?.phase == .ready || service.states[.llm]?.phase == .ready { NavigationLink { ModelSelfTestView() } label: { HStack(spacing: 6) { - Image(systemName: "play.circle") - Text("运行推理自检") + Image(systemName: "gauge.with.needle") + Text("性能自检") } .frame(maxWidth: .infinity) } diff --git a/康康/Features/Me/ModelSelfTestView.swift b/康康/Features/Me/ModelSelfTestView.swift index 45dbdb1..b9a429c 100644 --- a/康康/Features/Me/ModelSelfTestView.swift +++ b/康康/Features/Me/ModelSelfTestView.swift @@ -1,11 +1,13 @@ import SwiftUI -/// 模型推理自检:加载 LLM 跑一段固定 prompt,流式显示输出 + tok/s。 -/// 模型就绪后从「我的 · 模型管理」进入,用于现场快速验证本地推理是否正常。 +/// 性能自检:跑固定 prompt,展示当前后端(MNN·SME2 / MNN·NEON / MLX·GPU)的 +/// prefill / decode 实测速度,并按后端存档对比 —— 挑战赛考核点的可见证据(§12 卖点 2/6)。 struct ModelSelfTestView: View { @State private var output = "" @State private var phase: Phase = .idle @State private var rate: Double = 0 + @State private var lastResult: BenchmarkResult? + @State private var history: [String: BenchmarkResult] = [:] private enum Phase: Equatable { case idle, loading, running, done, failed(String) @@ -21,8 +23,6 @@ struct ModelSelfTestView: View { } } - private let prompt = "用中文一句话介绍肝功能里 ALT 这个指标。" - private var isBusy: Bool { phase == .loading || phase == .running } private var statusColor: Color { @@ -34,79 +34,162 @@ struct ModelSelfTestView: View { } var body: some View { - VStack(alignment: .leading, spacing: 16) { - VStack(alignment: .leading, spacing: 6) { - Text("测试 PROMPT") - .font(.tjScaled( 11, weight: .semibold)) - .tracking(0.5) - .foregroundStyle(Tj.Palette.text3) - Text(prompt) - .font(.tjScaled( 14)) - .foregroundStyle(Tj.Palette.text) - } - .padding(14) - .frame(maxWidth: .infinity, alignment: .leading) - .tjCard() + ScrollView { + VStack(alignment: .leading, spacing: 16) { + promptCard + HStack { + Text(phase.label) + .font(.tjScaled( 13, weight: .medium)) + .foregroundStyle(statusColor) + .lineLimit(1) + Spacer() + if rate > 0 { + Text(String(format: "%.1f tok/s", rate)) + .font(.tjScaled( 12, design: .monospaced)) + .foregroundStyle(Tj.Palette.text3) + } + } + + Button { + Task { await run() } + } label: { + Text(isBusy ? "运行中…" : "运行性能自检").frame(maxWidth: .infinity) + } + .buttonStyle(TjPrimaryButton()) + .disabled(isBusy) + + if isBusy { AIFlowBar() } + + if let r = lastResult { statsCard(r) } + + outputCard + + if !history.isEmpty { historyCard } + } + .padding(16) + } + .background(Tj.Palette.sand.ignoresSafeArea()) + .navigationTitle("性能自检") + .navigationBarTitleDisplayMode(.inline) + .onAppear { history = BenchmarkService.load() } + } + + private var promptCard: some View { + VStack(alignment: .leading, spacing: 6) { + Text("测试 PROMPT") + .font(.tjScaled( 11, weight: .semibold)) + .tracking(0.5) + .foregroundStyle(Tj.Palette.text3) + Text(BenchmarkService.fixedPrompt) + .font(.tjScaled( 14)) + .foregroundStyle(Tj.Palette.text) + } + .padding(14) + .frame(maxWidth: .infinity, alignment: .leading) + .tjCard() + } + + private func statsCard(_ r: BenchmarkResult) -> some View { + VStack(alignment: .leading, spacing: 10) { HStack { - Text(phase.label) - .font(.tjScaled( 13, weight: .medium)) - .foregroundStyle(statusColor) - .lineLimit(1) + Text("本次结果") + .font(.tjScaled( 12, weight: .semibold)) + .foregroundStyle(Tj.Palette.text2) Spacer() - if rate > 0 { - Text(String(format: "%.1f tok/s", rate)) - .font(.tjScaled( 12, design: .monospaced)) - .foregroundStyle(Tj.Palette.text3) + TjBadge(text: r.backendLabel, style: .leaf) + } + HStack(spacing: 0) { + metric(String(appLoc: "读入"), r.prefillTokensPerSecond > 0 + ? String(format: "%.0f tok/s", r.prefillTokensPerSecond) : "—") + metric(String(appLoc: "生成"), String(format: "%.1f tok/s", r.decodeTokensPerSecond)) + metric(String(appLoc: "总耗时"), String(format: "%.1fs", r.totalSeconds)) + } + Text(String(appLoc: "prompt \(r.promptTokens) tok · 生成 \(r.genTokens) tok · 100% 本地")) + .font(.tjScaled( 10, design: .monospaced)) + .foregroundStyle(Tj.Palette.text3) + } + .padding(14) + .frame(maxWidth: .infinity, alignment: .leading) + .tjCard() + } + + private func metric(_ label: String, _ value: String) -> some View { + VStack(spacing: 3) { + Text(value) + .font(.tjScaled( 15, weight: .semibold, design: .monospaced)) + .foregroundStyle(Tj.Palette.text) + Text(label) + .font(.tjScaled( 10)) + .foregroundStyle(Tj.Palette.text3) + } + .frame(maxWidth: .infinity) + } + + private var outputCard: some View { + ScrollView { + Text(output.isEmpty ? "(暂无输出)" : output) + .font(.system(.footnote, design: .monospaced)) + .foregroundStyle(Tj.Palette.text) + .frame(maxWidth: .infinity, alignment: .leading) + .textSelection(.enabled) + .padding(12) + } + .frame(maxHeight: 220) + .background( + RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous) + .fill(Tj.Palette.paper) + ) + .overlay( + RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous) + .strokeBorder(Tj.Palette.lineSoft, lineWidth: 1) + ) + } + + private var historyCard: some View { + VStack(alignment: .leading, spacing: 10) { + Text("各引擎实测对比") + .font(.tjScaled( 12, weight: .semibold)) + .foregroundStyle(Tj.Palette.text2) + ForEach(history.keys.sorted(), id: \.self) { key in + if let r = history[key] { + HStack { + Text(key) + .font(.tjScaled( 12, weight: .medium)) + .foregroundStyle(Tj.Palette.text) + Spacer() + Text(String(format: String(appLoc: "生成 %.1f tok/s"), r.decodeTokensPerSecond)) + .font(.tjScaled( 12, design: .monospaced)) + .foregroundStyle(Tj.Palette.leaf) + Text(r.date.formatted(.dateTime.month().day())) + .font(.tjScaled( 10)) + .foregroundStyle(Tj.Palette.text3) + } } } - - Button { - Task { await run() } - } label: { - Text(isBusy ? "运行中…" : "运行推理自检").frame(maxWidth: .infinity) - } - .buttonStyle(TjPrimaryButton()) - .disabled(isBusy) - - ScrollView { - Text(output.isEmpty ? "(暂无输出)" : output) - .font(.system(.footnote, design: .monospaced)) - .foregroundStyle(Tj.Palette.text) - .frame(maxWidth: .infinity, alignment: .leading) - .textSelection(.enabled) - .padding(12) - } - .frame(maxHeight: 280) - .background( - RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous) - .fill(Tj.Palette.paper) - ) - .overlay( - RoundedRectangle(cornerRadius: Tj.Radius.md, style: .continuous) - .strokeBorder(Tj.Palette.lineSoft, lineWidth: 1) - ) - - Spacer() + Text("在「我的 · 推理引擎」切换引擎后再跑一次,即可对比 SME2 与 GPU。") + .font(.tjScaled( 10)) + .foregroundStyle(Tj.Palette.text3) } - .padding(16) - .background(Tj.Palette.sand.ignoresSafeArea()) - .navigationTitle("推理自检") - .navigationBarTitleDisplayMode(.inline) + .padding(14) + .frame(maxWidth: .infinity, alignment: .leading) + .tjCard() } @MainActor private func run() async { output = "" rate = 0 + lastResult = nil phase = .loading do { - try await AIRuntime.shared.prepare() - phase = .running - for try await chunk in await AIRuntime.shared.generate(prompt: prompt, maxTokens: 200) { - output += chunk.text - rate = chunk.decodeRate + let result = try await BenchmarkService.shared.run { piece, r in + output += piece + if r > 0 { rate = r } + if phase == .loading { phase = .running } } + lastResult = result + history = BenchmarkService.load() phase = .done } catch { phase = .failed(error.localizedDescription) diff --git a/康康/Services/BenchmarkService.swift b/康康/Services/BenchmarkService.swift new file mode 100644 index 0000000..4137c54 --- /dev/null +++ b/康康/Services/BenchmarkService.swift @@ -0,0 +1,67 @@ +import Foundation + +/// 单次性能自检结果。按后端标签归档,供「MNN·SME2 vs MLX·GPU」对比展示(§12 卖点 2/6)。 +struct BenchmarkResult: Codable, Equatable { + var backendLabel: String + var promptTokens: Int + var genTokens: Int + var prefillTokensPerSecond: Double + var decodeTokensPerSecond: Double + var totalSeconds: Double + var date: Date +} + +/// 性能自检服务:跑固定 prompt,取 AIRuntime 的归一统计,按后端标签存 UserDefaults。 +/// UI(ModelSelfTestView)只经本服务调 AIRuntime(§3.1)。 +@MainActor +struct BenchmarkService { + static let shared = BenchmarkService() + private init() {} + + nonisolated static let storeKey = "kk.benchmark.results" + + /// 固定测试 prompt:跨设备/引擎可比的前提。 + static let fixedPrompt = "用中文一句话介绍肝功能里 ALT 这个指标。" + + /// 跑一次自检。onToken 把流式输出交给 UI 展示。 + func run(onToken: @escaping @MainActor (String, Double) -> Void) async throws -> BenchmarkResult { + try await AIRuntime.shared.prepare() + let start = Date() + let stream = await AIRuntime.shared.generate(prompt: Self.fixedPrompt, maxTokens: 128) + for try await chunk in stream { + onToken(chunk.text, chunk.decodeRate) + } + let total = Date().timeIntervalSince(start) + let label = await AIRuntime.shared.activeBackendLabel + let stats = await AIRuntime.shared.lastGenerateStats + let result = BenchmarkResult( + backendLabel: label, + promptTokens: stats?.promptTokens ?? 0, + genTokens: stats?.genTokens ?? 0, + prefillTokensPerSecond: stats?.prefillTokensPerSecond ?? 0, + decodeTokensPerSecond: stats?.decodeTokensPerSecond ?? 0, + totalSeconds: total, + date: .now + ) + Self.save(result) + return result + } + + // MARK: - 存档(静态纯函数,单测覆盖;nonisolated:纯 UserDefaults 操作,无需主线程) + + nonisolated static func save(_ result: BenchmarkResult, defaults: UserDefaults = .standard) { + var all = load(defaults: defaults) + all[result.backendLabel] = result + if let data = try? JSONEncoder().encode(all) { + defaults.set(data, forKey: storeKey) + } + } + + nonisolated static func load(defaults: UserDefaults = .standard) -> [String: BenchmarkResult] { + guard let data = defaults.data(forKey: storeKey), + let all = try? JSONDecoder().decode([String: BenchmarkResult].self, from: data) else { + return [:] + } + return all + } +} diff --git a/康康Tests/BenchmarkStoreTests.swift b/康康Tests/BenchmarkStoreTests.swift new file mode 100644 index 0000000..4d5d95b --- /dev/null +++ b/康康Tests/BenchmarkStoreTests.swift @@ -0,0 +1,45 @@ +import Testing +import Foundation +@testable import 康康 + +struct BenchmarkStoreTests { + + /// 每个用例独立 suite,避免 Swift Testing 并行执行时互相清空数据。 + private func freshDefaults(_ name: String) -> UserDefaults { + let suite = "test.kk.benchmark.\(name)" + let d = UserDefaults(suiteName: suite)! + d.removePersistentDomain(forName: suite) + return d + } + + @Test func savesAndLoadsPerBackend() { + let d = freshDefaults("savesAndLoads") + let mnn = BenchmarkResult(backendLabel: "MNN · SME2", promptTokens: 30, genTokens: 80, + prefillTokensPerSecond: 120, decodeTokensPerSecond: 25, + totalSeconds: 4.2, date: .now) + let mlx = BenchmarkResult(backendLabel: "MLX · GPU", promptTokens: 30, genTokens: 80, + prefillTokensPerSecond: 300, decodeTokensPerSecond: 40, + totalSeconds: 2.5, date: .now) + BenchmarkService.save(mnn, defaults: d) + BenchmarkService.save(mlx, defaults: d) + let all = BenchmarkService.load(defaults: d) + #expect(all.count == 2) + #expect(all["MNN · SME2"]?.decodeTokensPerSecond == 25) + } + + @Test func overwritesSameBackend() { + let d = freshDefaults("overwrites") + let old = BenchmarkResult(backendLabel: "MLX · GPU", promptTokens: 1, genTokens: 1, + prefillTokensPerSecond: 1, decodeTokensPerSecond: 1, + totalSeconds: 1, date: .now) + var new = old + new.decodeTokensPerSecond = 99 + BenchmarkService.save(old, defaults: d) + BenchmarkService.save(new, defaults: d) + #expect(BenchmarkService.load(defaults: d)["MLX · GPU"]?.decodeTokensPerSecond == 99) + } + + @Test func loadOnEmptyReturnsEmpty() { + #expect(BenchmarkService.load(defaults: freshDefaults("loadEmpty")).isEmpty) + } +}