feat(AI): 两后端归一的 GenerateStats(prefill/decode 实测统计)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
19
康康/AI/GenerateStats.swift
Normal file
19
康康/AI/GenerateStats.swift
Normal file
@@ -0,0 +1,19 @@
|
||||
import Foundation
|
||||
|
||||
/// 单次生成的性能统计,两后端(MNN / MLX)归一。
|
||||
/// MNN 取自 LlmContext(prefill_us / decode_us);MLX 取自 GenerateCompletionInfo。
|
||||
struct GenerateStats: Sendable, Equatable {
|
||||
var promptTokens: Int
|
||||
var genTokens: Int
|
||||
/// prefill(读入 prompt)耗时,秒。
|
||||
var prefillSeconds: Double
|
||||
/// decode(逐 token 生成)耗时,秒。
|
||||
var decodeSeconds: Double
|
||||
|
||||
var prefillTokensPerSecond: Double {
|
||||
prefillSeconds > 0 ? Double(promptTokens) / prefillSeconds : 0
|
||||
}
|
||||
var decodeTokensPerSecond: Double {
|
||||
decodeSeconds > 0 ? Double(genTokens) / decodeSeconds : 0
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,11 @@ import MLXLMCommon
|
||||
actor LLMSession {
|
||||
let container: ModelContainer
|
||||
|
||||
/// 末次生成统计(取自流末尾的 .info 完成事件,性能自检用)。
|
||||
private(set) var lastStats: GenerateStats?
|
||||
|
||||
private func record(_ s: GenerateStats) { lastStats = s }
|
||||
|
||||
init(container: ModelContainer) {
|
||||
self.container = container
|
||||
}
|
||||
@@ -78,9 +83,14 @@ actor LLMSession {
|
||||
let rate = elapsed > 0 ? Double(produced) / elapsed : 0
|
||||
continuation.yield(TokenChunk(text: text, decodeRate: rate))
|
||||
|
||||
case .info:
|
||||
case .info(let info):
|
||||
// 生成完成统计,是流的最后一个事件
|
||||
break
|
||||
await self.record(GenerateStats(
|
||||
promptTokens: info.promptTokenCount,
|
||||
genTokens: info.generationTokenCount,
|
||||
prefillSeconds: info.promptTime,
|
||||
decodeSeconds: info.generateTime
|
||||
))
|
||||
|
||||
case .toolCall:
|
||||
// 纯文本生成不会触发,switch 穷举
|
||||
|
||||
@@ -9,6 +9,11 @@ import Foundation
|
||||
actor MNNBackend {
|
||||
private var bridge: MNNLLMBridge?
|
||||
|
||||
/// 末次生成统计(供 AIRuntime 在流结束后取走,性能自检用)。
|
||||
private(set) var lastStats: GenerateStats?
|
||||
|
||||
private func record(_ s: GenerateStats) { lastStats = s }
|
||||
|
||||
var isLoaded: Bool { bridge?.isLoaded ?? false }
|
||||
|
||||
/// 从 MNN 模型目录加载(目录含 MNN llm 的 config.json + llm.mnn + 权重 + tokenizer)。
|
||||
@@ -35,10 +40,17 @@ actor MNNBackend {
|
||||
return AsyncThrowingStream { continuation in
|
||||
let meter = MNNRateMeter()
|
||||
let task = Task.detached(priority: .userInitiated) {
|
||||
_ = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||
let stats = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||
let rate = meter.tick()
|
||||
continuation.yield(TokenChunk(text: piece, decodeRate: rate))
|
||||
}
|
||||
// ObjC 统计对象先抽成 Sendable 的 GenerateStats 再跨 actor 记录。
|
||||
await self.record(GenerateStats(
|
||||
promptTokens: Int(stats.promptTokens),
|
||||
genTokens: Int(stats.genTokens),
|
||||
prefillSeconds: stats.prefillMs / 1000.0,
|
||||
decodeSeconds: stats.decodeMs / 1000.0
|
||||
))
|
||||
continuation.finish()
|
||||
}
|
||||
continuation.onTermination = { _ in
|
||||
@@ -58,9 +70,15 @@ actor MNNBackend {
|
||||
Task.detached(priority: .userInitiated) {
|
||||
let sink = MNNTextSink()
|
||||
do {
|
||||
_ = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||
let stats = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||
sink.append(piece)
|
||||
}
|
||||
await self.record(GenerateStats(
|
||||
promptTokens: Int(stats.promptTokens),
|
||||
genTokens: Int(stats.genTokens),
|
||||
prefillSeconds: stats.prefillMs / 1000.0,
|
||||
decodeSeconds: stats.decodeMs / 1000.0
|
||||
))
|
||||
cont.resume(returning: sink.text)
|
||||
} catch {
|
||||
cont.resume(throwing: AIRuntimeError.inferenceFailed(error.localizedDescription))
|
||||
|
||||
Reference in New Issue
Block a user