feat(AI): 两后端归一的 GenerateStats(prefill/decode 实测统计)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
19
康康/AI/GenerateStats.swift
Normal file
19
康康/AI/GenerateStats.swift
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
import Foundation
|
||||||
|
|
||||||
|
/// 单次生成的性能统计,两后端(MNN / MLX)归一。
|
||||||
|
/// MNN 取自 LlmContext(prefill_us / decode_us);MLX 取自 GenerateCompletionInfo。
|
||||||
|
struct GenerateStats: Sendable, Equatable {
|
||||||
|
var promptTokens: Int
|
||||||
|
var genTokens: Int
|
||||||
|
/// prefill(读入 prompt)耗时,秒。
|
||||||
|
var prefillSeconds: Double
|
||||||
|
/// decode(逐 token 生成)耗时,秒。
|
||||||
|
var decodeSeconds: Double
|
||||||
|
|
||||||
|
var prefillTokensPerSecond: Double {
|
||||||
|
prefillSeconds > 0 ? Double(promptTokens) / prefillSeconds : 0
|
||||||
|
}
|
||||||
|
var decodeTokensPerSecond: Double {
|
||||||
|
decodeSeconds > 0 ? Double(genTokens) / decodeSeconds : 0
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -8,6 +8,11 @@ import MLXLMCommon
|
|||||||
actor LLMSession {
|
actor LLMSession {
|
||||||
let container: ModelContainer
|
let container: ModelContainer
|
||||||
|
|
||||||
|
/// 末次生成统计(取自流末尾的 .info 完成事件,性能自检用)。
|
||||||
|
private(set) var lastStats: GenerateStats?
|
||||||
|
|
||||||
|
private func record(_ s: GenerateStats) { lastStats = s }
|
||||||
|
|
||||||
init(container: ModelContainer) {
|
init(container: ModelContainer) {
|
||||||
self.container = container
|
self.container = container
|
||||||
}
|
}
|
||||||
@@ -78,9 +83,14 @@ actor LLMSession {
|
|||||||
let rate = elapsed > 0 ? Double(produced) / elapsed : 0
|
let rate = elapsed > 0 ? Double(produced) / elapsed : 0
|
||||||
continuation.yield(TokenChunk(text: text, decodeRate: rate))
|
continuation.yield(TokenChunk(text: text, decodeRate: rate))
|
||||||
|
|
||||||
case .info:
|
case .info(let info):
|
||||||
// 生成完成统计,是流的最后一个事件
|
// 生成完成统计,是流的最后一个事件
|
||||||
break
|
await self.record(GenerateStats(
|
||||||
|
promptTokens: info.promptTokenCount,
|
||||||
|
genTokens: info.generationTokenCount,
|
||||||
|
prefillSeconds: info.promptTime,
|
||||||
|
decodeSeconds: info.generateTime
|
||||||
|
))
|
||||||
|
|
||||||
case .toolCall:
|
case .toolCall:
|
||||||
// 纯文本生成不会触发,switch 穷举
|
// 纯文本生成不会触发,switch 穷举
|
||||||
|
|||||||
@@ -9,6 +9,11 @@ import Foundation
|
|||||||
actor MNNBackend {
|
actor MNNBackend {
|
||||||
private var bridge: MNNLLMBridge?
|
private var bridge: MNNLLMBridge?
|
||||||
|
|
||||||
|
/// 末次生成统计(供 AIRuntime 在流结束后取走,性能自检用)。
|
||||||
|
private(set) var lastStats: GenerateStats?
|
||||||
|
|
||||||
|
private func record(_ s: GenerateStats) { lastStats = s }
|
||||||
|
|
||||||
var isLoaded: Bool { bridge?.isLoaded ?? false }
|
var isLoaded: Bool { bridge?.isLoaded ?? false }
|
||||||
|
|
||||||
/// 从 MNN 模型目录加载(目录含 MNN llm 的 config.json + llm.mnn + 权重 + tokenizer)。
|
/// 从 MNN 模型目录加载(目录含 MNN llm 的 config.json + llm.mnn + 权重 + tokenizer)。
|
||||||
@@ -35,10 +40,17 @@ actor MNNBackend {
|
|||||||
return AsyncThrowingStream { continuation in
|
return AsyncThrowingStream { continuation in
|
||||||
let meter = MNNRateMeter()
|
let meter = MNNRateMeter()
|
||||||
let task = Task.detached(priority: .userInitiated) {
|
let task = Task.detached(priority: .userInitiated) {
|
||||||
_ = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
|
let stats = box.value.generateText(prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||||
let rate = meter.tick()
|
let rate = meter.tick()
|
||||||
continuation.yield(TokenChunk(text: piece, decodeRate: rate))
|
continuation.yield(TokenChunk(text: piece, decodeRate: rate))
|
||||||
}
|
}
|
||||||
|
// ObjC 统计对象先抽成 Sendable 的 GenerateStats 再跨 actor 记录。
|
||||||
|
await self.record(GenerateStats(
|
||||||
|
promptTokens: Int(stats.promptTokens),
|
||||||
|
genTokens: Int(stats.genTokens),
|
||||||
|
prefillSeconds: stats.prefillMs / 1000.0,
|
||||||
|
decodeSeconds: stats.decodeMs / 1000.0
|
||||||
|
))
|
||||||
continuation.finish()
|
continuation.finish()
|
||||||
}
|
}
|
||||||
continuation.onTermination = { _ in
|
continuation.onTermination = { _ in
|
||||||
@@ -58,9 +70,15 @@ actor MNNBackend {
|
|||||||
Task.detached(priority: .userInitiated) {
|
Task.detached(priority: .userInitiated) {
|
||||||
let sink = MNNTextSink()
|
let sink = MNNTextSink()
|
||||||
do {
|
do {
|
||||||
_ = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
|
let stats = try box.value.analyzeImages(paths, prompt: prompt, maxTokens: Int32(maxTokens)) { piece in
|
||||||
sink.append(piece)
|
sink.append(piece)
|
||||||
}
|
}
|
||||||
|
await self.record(GenerateStats(
|
||||||
|
promptTokens: Int(stats.promptTokens),
|
||||||
|
genTokens: Int(stats.genTokens),
|
||||||
|
prefillSeconds: stats.prefillMs / 1000.0,
|
||||||
|
decodeSeconds: stats.decodeMs / 1000.0
|
||||||
|
))
|
||||||
cont.resume(returning: sink.text)
|
cont.resume(returning: sink.text)
|
||||||
} catch {
|
} catch {
|
||||||
cont.resume(throwing: AIRuntimeError.inferenceFailed(error.localizedDescription))
|
cont.resume(throwing: AIRuntimeError.inferenceFailed(error.localizedDescription))
|
||||||
|
|||||||
Reference in New Issue
Block a user