feat(AI): 两后端归一的 GenerateStats(prefill/decode 实测统计)

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
link2026
2026-06-10 06:42:59 +08:00
parent 8c8599e77d
commit 070e016f81
3 changed files with 51 additions and 4 deletions

View File

@@ -9,6 +9,11 @@ import Foundation
actor MNNBackend {
private var bridge: MNNLLMBridge?
/// ( AIRuntime ,)
private(set) var lastStats: GenerateStats?
private func record(_ s: GenerateStats) { lastStats = s }
var isLoaded: Bool { bridge?.isLoaded ?? false }
/// MNN ( MNN llm config.json + llm.mnn + + tokenizer)
@@ -35,10 +40,17 @@ actor MNNBackend {
return AsyncThrowingStream { continuation in
let meter = MNNRateMeter()
let task = Task.detached(priority: .userInitiated) {
_ = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
let stats = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
let rate = meter.tick()
continuation.yield(TokenChunk(text: piece, decodeRate: rate))
}
// ObjC Sendable GenerateStats actor
await self.record(GenerateStats(
promptTokens: Int(stats.promptTokens),
genTokens: Int(stats.genTokens),
prefillSeconds: stats.prefillMs / 1000.0,
decodeSeconds: stats.decodeMs / 1000.0
))
continuation.finish()
}
continuation.onTermination = { _ in
@@ -58,9 +70,15 @@ actor MNNBackend {
Task.detached(priority: .userInitiated) {
let sink = MNNTextSink()
do {
_ = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
let stats = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
sink.append(piece)
}
await self.record(GenerateStats(
promptTokens: Int(stats.promptTokens),
genTokens: Int(stats.genTokens),
prefillSeconds: stats.prefillMs / 1000.0,
decodeSeconds: stats.decodeMs / 1000.0
))
cont.resume(returning: sink.text)
} catch {
cont.resume(throwing: AIRuntimeError.inferenceFailed(error.localizedDescription))