diff --git a/康康/AI/GenerateStats.swift b/康康/AI/GenerateStats.swift
new file mode 100644
index 0000000..b4cadd2
--- /dev/null
+++ b/康康/AI/GenerateStats.swift
@@ -0,0 +1,19 @@
+import Foundation
+
+/// 单次生成的性能统计,两后端(MNN / MLX)归一。
+/// MNN 取自 LlmContext(prefill_us / decode_us);MLX 取自 GenerateCompletionInfo。
+struct GenerateStats: Sendable, Equatable {
+    var promptTokens: Int
+    var genTokens: Int
+    /// prefill(读入 prompt)耗时,秒。
+    var prefillSeconds: Double
+    /// decode(逐 token 生成)耗时,秒。
+    var decodeSeconds: Double
+
+    var prefillTokensPerSecond: Double {
+        prefillSeconds > 0 ? Double(promptTokens) / prefillSeconds : 0
+    }
+    var decodeTokensPerSecond: Double {
+        decodeSeconds > 0 ? Double(genTokens) / decodeSeconds : 0
+    }
+}
diff --git a/康康/AI/LLMSession.swift b/康康/AI/LLMSession.swift
index c88d947..842f09e 100644
--- a/康康/AI/LLMSession.swift
+++ b/康康/AI/LLMSession.swift
@@ -8,6 +8,11 @@ import MLXLMCommon
 actor LLMSession {
     let container: ModelContainer
 
+    /// 末次生成统计(取自流末尾的 .info 完成事件,性能自检用)。
+    private(set) var lastStats: GenerateStats?
+
+    private func record(_ s: GenerateStats) { lastStats = s }
+
     init(container: ModelContainer) {
         self.container = container
     }
@@ -78,9 +83,14 @@ actor LLMSession {
                                     let rate = elapsed > 0 ? Double(produced) / elapsed : 0
                                     continuation.yield(TokenChunk(text: text, decodeRate: rate))
 
-                                case .info:
+                                case .info(let info):
                                     // 生成完成统计,是流的最后一个事件
-                                    break
+                                    await self.record(GenerateStats(
+                                        promptTokens: info.promptTokenCount,
+                                        genTokens: info.generationTokenCount,
+                                        prefillSeconds: info.promptTime,
+                                        decodeSeconds: info.generateTime
+                                    ))
 
                                 case .toolCall:
                                     // 纯文本生成不会触发,switch 穷举
diff --git a/康康/AI/MNNBackend.swift b/康康/AI/MNNBackend.swift
index 4357fbe..bee2acf 100644
--- a/康康/AI/MNNBackend.swift
+++ b/康康/AI/MNNBackend.swift
@@ -9,6 +9,11 @@ import Foundation
 actor MNNBackend {
     private var bridge: MNNLLMBridge?
 
+    /// 末次生成统计(供 AIRuntime 在流结束后取走,性能自检用)。
+    private(set) var lastStats: GenerateStats?
+
+    private func record(_ s: GenerateStats) { lastStats = s }
+
     var isLoaded: Bool { bridge?.isLoaded ?? false }
 
     /// 从 MNN 模型目录加载(目录含 MNN llm 的 config.json + llm.mnn + 权重 + tokenizer)。
@@ -35,10 +40,17 @@ actor MNNBackend {
         return AsyncThrowingStream { continuation in
             let meter = MNNRateMeter()
             let task = Task.detached(priority: .userInitiated) {
-                _ = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
+                let stats = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
                     let rate = meter.tick()
                     continuation.yield(TokenChunk(text: piece, decodeRate: rate))
                 }
+                // ObjC 统计对象先抽成 Sendable 的 GenerateStats 再跨 actor 记录。
+                await self.record(GenerateStats(
+                    promptTokens: Int(stats.promptTokens),
+                    genTokens: Int(stats.genTokens),
+                    prefillSeconds: stats.prefillMs / 1000.0,
+                    decodeSeconds: stats.decodeMs / 1000.0
+                ))
                 continuation.finish()
             }
             continuation.onTermination = { _ in
@@ -58,9 +70,15 @@ actor MNNBackend {
             Task.detached(priority: .userInitiated) {
                 let sink = MNNTextSink()
                 do {
-                    _ = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
+                    let stats = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
                         sink.append(piece)
                     }
+                    await self.record(GenerateStats(
+                        promptTokens: Int(stats.promptTokens),
+                        genTokens: Int(stats.genTokens),
+                        prefillSeconds: stats.prefillMs / 1000.0,
+                        decodeSeconds: stats.decodeMs / 1000.0
+                    ))
                     cont.resume(returning: sink.text)
                 } catch {
                     cont.resume(throwing: AIRuntimeError.inferenceFailed(error.localizedDescription))