feat(AI): 两后端归一的 GenerateStats(prefill/decode 实测统计)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -9,6 +9,11 @@ import Foundation
|
||||
actor MNNBackend {
|
||||
private var bridge: MNNLLMBridge?
|
||||
|
||||
/// 末次生成统计(供 AIRuntime 在流结束后取走,性能自检用)。
|
||||
private(set) var lastStats: GenerateStats?
|
||||
|
||||
private func record(_ s: GenerateStats) { lastStats = s }
|
||||
|
||||
var isLoaded: Bool { bridge?.isLoaded ?? false }
|
||||
|
||||
/// 从 MNN 模型目录加载(目录含 MNN llm 的 config.json + llm.mnn + 权重 + tokenizer)。
|
||||
@@ -35,10 +40,17 @@ actor MNNBackend {
|
||||
return AsyncThrowingStream { continuation in
|
||||
let meter = MNNRateMeter()
|
||||
let task = Task.detached(priority: .userInitiated) {
|
||||
_ = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||
let stats = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||
let rate = meter.tick()
|
||||
continuation.yield(TokenChunk(text: piece, decodeRate: rate))
|
||||
}
|
||||
// ObjC 统计对象先抽成 Sendable 的 GenerateStats 再跨 actor 记录。
|
||||
await self.record(GenerateStats(
|
||||
promptTokens: Int(stats.promptTokens),
|
||||
genTokens: Int(stats.genTokens),
|
||||
prefillSeconds: stats.prefillMs / 1000.0,
|
||||
decodeSeconds: stats.decodeMs / 1000.0
|
||||
))
|
||||
continuation.finish()
|
||||
}
|
||||
continuation.onTermination = { _ in
|
||||
@@ -58,9 +70,15 @@ actor MNNBackend {
|
||||
Task.detached(priority: .userInitiated) {
|
||||
let sink = MNNTextSink()
|
||||
do {
|
||||
_ = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||
let stats = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||
sink.append(piece)
|
||||
}
|
||||
await self.record(GenerateStats(
|
||||
promptTokens: Int(stats.promptTokens),
|
||||
genTokens: Int(stats.genTokens),
|
||||
prefillSeconds: stats.prefillMs / 1000.0,
|
||||
decodeSeconds: stats.decodeMs / 1000.0
|
||||
))
|
||||
cont.resume(returning: sink.text)
|
||||
} catch {
|
||||
cont.resume(throwing: AIRuntimeError.inferenceFailed(error.localizedDescription))
|
||||
|
||||
Reference in New Issue
Block a user