feat(AI): 两后端归一的 GenerateStats(prefill/decode 实测统计)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,11 @@ import MLXLMCommon
|
||||
actor LLMSession {
|
||||
let container: ModelContainer
|
||||
|
||||
/// 末次生成统计(取自流末尾的 .info 完成事件,性能自检用)。
|
||||
private(set) var lastStats: GenerateStats?
|
||||
|
||||
private func record(_ s: GenerateStats) { lastStats = s }
|
||||
|
||||
init(container: ModelContainer) {
|
||||
self.container = container
|
||||
}
|
||||
@@ -78,9 +83,14 @@ actor LLMSession {
|
||||
let rate = elapsed > 0 ? Double(produced) / elapsed : 0
|
||||
continuation.yield(TokenChunk(text: text, decodeRate: rate))
|
||||
|
||||
case .info:
|
||||
case .info(let info):
|
||||
// 生成完成统计,是流的最后一个事件
|
||||
break
|
||||
await self.record(GenerateStats(
|
||||
promptTokens: info.promptTokenCount,
|
||||
genTokens: info.generationTokenCount,
|
||||
prefillSeconds: info.promptTime,
|
||||
decodeSeconds: info.generateTime
|
||||
))
|
||||
|
||||
case .toolCall:
|
||||
// 纯文本生成不会触发,switch 穷举
|
||||
|
||||
Reference in New Issue
Block a user