feat(AI): 两后端归一的 GenerateStats(prefill/decode 实测统计)

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
link2026
2026-06-10 06:42:59 +08:00
parent 8c8599e77d
commit 070e016f81
3 changed files with 51 additions and 4 deletions

View File

@@ -0,0 +1,19 @@
import Foundation
/// ,(MNN / MLX)
/// MNN LlmContext(prefill_us / decode_us);MLX GenerateCompletionInfo
struct GenerateStats: Sendable, Equatable {
var promptTokens: Int
var genTokens: Int
/// prefill( prompt),
var prefillSeconds: Double
/// decode( token ),
var decodeSeconds: Double
var prefillTokensPerSecond: Double {
prefillSeconds > 0 ? Double(promptTokens) / prefillSeconds : 0
}
var decodeTokensPerSecond: Double {
decodeSeconds > 0 ? Double(genTokens) / decodeSeconds : 0
}
}

View File

@@ -8,6 +8,11 @@ import MLXLMCommon
actor LLMSession {
let container: ModelContainer
/// ( .info ,)
private(set) var lastStats: GenerateStats?
private func record(_ s: GenerateStats) { lastStats = s }
init(container: ModelContainer) {
self.container = container
}
@@ -78,9 +83,14 @@ actor LLMSession {
let rate = elapsed > 0 ? Double(produced) / elapsed : 0
continuation.yield(TokenChunk(text: text, decodeRate: rate))
case .info:
case .info(let info):
// ,
break
await self.record(GenerateStats(
promptTokens: info.promptTokenCount,
genTokens: info.generationTokenCount,
prefillSeconds: info.promptTime,
decodeSeconds: info.generateTime
))
case .toolCall:
// ,switch

View File

@@ -9,6 +9,11 @@ import Foundation
actor MNNBackend {
private var bridge: MNNLLMBridge?
/// ( AIRuntime ,)
private(set) var lastStats: GenerateStats?
private func record(_ s: GenerateStats) { lastStats = s }
var isLoaded: Bool { bridge?.isLoaded ?? false }
/// MNN ( MNN llm config.json + llm.mnn + + tokenizer)
@@ -35,10 +40,17 @@ actor MNNBackend {
return AsyncThrowingStream { continuation in
let meter = MNNRateMeter()
let task = Task.detached(priority: .userInitiated) {
_ = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
let stats = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
let rate = meter.tick()
continuation.yield(TokenChunk(text: piece, decodeRate: rate))
}
// ObjC Sendable GenerateStats actor
await self.record(GenerateStats(
promptTokens: Int(stats.promptTokens),
genTokens: Int(stats.genTokens),
prefillSeconds: stats.prefillMs / 1000.0,
decodeSeconds: stats.decodeMs / 1000.0
))
continuation.finish()
}
continuation.onTermination = { _ in
@@ -58,9 +70,15 @@ actor MNNBackend {
Task.detached(priority: .userInitiated) {
let sink = MNNTextSink()
do {
_ = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
let stats = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
sink.append(piece)
}
await self.record(GenerateStats(
promptTokens: Int(stats.promptTokens),
genTokens: Int(stats.genTokens),
prefillSeconds: stats.prefillMs / 1000.0,
decodeSeconds: stats.decodeMs / 1000.0
))
cont.resume(returning: sink.text)
} catch {
cont.resume(throwing: AIRuntimeError.inferenceFailed(error.localizedDescription))