根据提供的code differences信息，由于没有具体的代码变更内容，我将生成一个通用的commit message模板：

``` docs(readme): 更新文档说明 - 添加了项目使用指南 - 完善了API接口说明 - 修正了一些文字错误 ``` 注：由于未提供具体的代码差异信息，以上为示例格式。请提供具体的代码变更内容以便生成准确的commit message。
2026-06-17 08:35:59 +08:00
parent b3777d508d
commit de19d7abcd
23 changed files with 364 additions and 154 deletions
--- a/康康/AI/AIRuntime.swift
+++ b/康康/AI/AIRuntime.swift
@@ -34,7 +34,6 @@ actor AIRuntime {

    private(set) var status: Status = .notReady
    private(set) var vlStatus: Status = .notReady
-    private(set) var lastDecodeRate: Double = 0

    /// 末次文本生成的性能统计(性能自检页消费;两后端归一)。
    private(set) var lastGenerateStats: GenerateStats?
@@ -247,6 +246,8 @@ actor AIRuntime {
                }
                // 进闸门:保证本次 LLM 解码与任何 VL 解码 / 模型加载串行,绝不并发占显存。
                await self.acquireGate(priority)
+                // defer 保证正常结束 / 异常 / 取消都释放闸门;杜绝未来新增 early-return 导致全局推理死锁。
+                defer { self.releaseGate() }
                do {
                    // session.generate 跨 actor 边界,需要 await
                    let stream = await session.generate(prompt: prompt, maxTokens: maxTokens)
@@ -256,9 +257,6 @@ actor AIRuntime {
                        try Task.checkCancellation()
                        // 后台任务让位:前台请求在排队时,下一个 token 处主动退出。
                        if self.shouldPreempt(priority) { throw CancellationError() }
-                        // Task 闭包在 generate() 内启动,继承 AIRuntime 的 actor 隔离;
-                        // 调用同 actor 的 recordRate 不需要 await
-                        self.recordRate(chunk.decodeRate)
                        continuation.yield(chunk)
                    }
                    self.lastGenerateStats = await session.lastStats
@@ -269,9 +267,6 @@ actor AIRuntime {
                } catch {
                    continuation.finish(throwing: AIRuntimeError.inferenceFailed("\(error)"))
                }
-                // 正常结束 / 异常 / 取消(checkCancellation 抛出后被上面 catch 吞掉)都会走到这,
-                // 闸门一定释放,不会死锁后续推理。
-                self.releaseGate()
            }
            // 消费者取消/流终止时取消内部 Task(与 LLMSession / HealthExportService 一致)。
            continuation.onTermination = { _ in task.cancel() }
@@ -290,6 +285,7 @@ actor AIRuntime {
                    return
                }
                await self.acquireGate(priority)
+                defer { self.releaseGate() }   // 无论正常结束 / 异常 / 取消都释放闸门,防死锁
                do {
                    let stream = await self.mnn.generate(prompt: prompt, maxTokens: maxTokens)
                    for try await chunk in stream {
@@ -297,7 +293,6 @@ actor AIRuntime {
                        // 后台任务让位:前台请求在排队时,下一个 token 处主动退出
                        //(流终止触发 MNNBackend.onTermination → bridge.cancel())。
                        if self.shouldPreempt(priority) { throw CancellationError() }
-                        self.recordRate(chunk.decodeRate)
                        continuation.yield(chunk)
                    }
                    self.lastGenerateStats = await self.mnn.lastStats
@@ -307,16 +302,11 @@ actor AIRuntime {
                } catch {
                    continuation.finish(throwing: AIRuntimeError.inferenceFailed("\(error)"))
                }
-                self.releaseGate()
            }
            continuation.onTermination = { _ in task.cancel() }
        }
    }

-    private func recordRate(_ rate: Double) {
-        if rate > 0 { lastDecodeRate = rate }
-    }
-
    // MARK: - VL

    /// 加载 VL 模型。幂等,首调真正 load。