fix(concurrency): clear 4 Swift 6 warnings under default MainActor isolation

- ModelStore/FileVault: drop nonisolated(unsafe) on shared, mark all instance methods nonisolated (they only read filesystem); ModelKind enum also nonisolated - AIRuntime ↔ ModelStore cross-actor call resolved by the above - LLMSession: replace deprecated Device.setDefault(device:) with task-scoped Device.withDefaultDevice(.cpu, body:); wrap both load and generate so the TaskLocal propagates through ModelContainer.perform
2026-05-25 23:18:08 +08:00
parent 53da442424
commit e4a68a1bdd
3 changed files with 67 additions and 57 deletions
--- a/康康/AI/LLMSession.swift
+++ b/康康/AI/LLMSession.swift
@@ -12,18 +12,27 @@ actor LLMSession {
        self.container = container
    }

+    /// 在 simulator 把默认设备强切为 CPU(MLX 的 Metal backend 在部分 Sim 路径会 abort)。
+    /// 真机走 body 默认设备(GPU/ANE)。
+    /// 用 task-scoped `withDefaultDevice`,TaskLocal 会传递到 child Task / actor 调用。
+    private static func withDeviceOverride<R>(
+        _ body: () async throws -> R
+    ) async rethrows -> R {
+        #if targetEnvironment(simulator)
+        return try await Device.withDefaultDevice(.cpu, body)
+        #else
+        return try await body()
+        #endif
+    }
+
    /// 从本地目录加载模型(包含 config.json + weights + tokenizer)。
    static func load(folderURL: URL) async throws -> LLMSession {
-        #if targetEnvironment(simulator)
-        // MLX 的 iOS Simulator GPU stream 初始化会在部分 Metal backend 路径中 abort。
-        // 模拟器只用于功能调试,强制走 CPU;真机仍保留默认 GPU/ANE 相关路径。
-        Device.setDefault(device: .cpu)
-        #endif
-
        let configuration = ModelConfiguration(directory: folderURL)
-        let container = try await LLMModelFactory.shared.loadContainer(
-            configuration: configuration
-        )
+        let container = try await withDeviceOverride {
+            try await LLMModelFactory.shared.loadContainer(
+                configuration: configuration
+            )
+        }
        return LLMSession(container: container)
    }

@@ -35,46 +44,48 @@ actor LLMSession {
        AsyncThrowingStream { continuation in
            let task = Task {
                do {
-                    let parameters = GenerateParameters(
-                        maxTokens: maxTokens,
-                        temperature: Float(0.6),
-                        topP: Float(0.9)
-                    )
+                    try await Self.withDeviceOverride {
+                        let parameters = GenerateParameters(
+                            maxTokens: maxTokens,
+                            temperature: Float(0.6),
+                            topP: Float(0.9)
+                        )

-                    try await container.perform { (context: ModelContext) in
-                        let userInput = UserInput(prompt: prompt)
-                        let lmInput = try await context.processor.prepare(input: userInput)
+                        try await container.perform { (context: ModelContext) in
+                            let userInput = UserInput(prompt: prompt)
+                            let lmInput = try await context.processor.prepare(input: userInput)

-                        let start = Date()
-                        var produced = 0
+                            let start = Date()
+                            var produced = 0

-                        for await event in try MLXLMCommon.generate(
-                            input: lmInput,
-                            parameters: parameters,
-                            context: context
-                        ) {
-                            if Task.isCancelled { break }
+                            for await event in try MLXLMCommon.generate(
+                                input: lmInput,
+                                parameters: parameters,
+                                context: context
+                            ) {
+                                if Task.isCancelled { break }

-                            switch event {
-                            case .chunk(let text):
-                                produced += 1
-                                let elapsed = Date().timeIntervalSince(start)
-                                let rate = elapsed > 0 ? Double(produced) / elapsed : 0
-                                continuation.yield(TokenChunk(text: text, decodeRate: rate))
+                                switch event {
+                                case .chunk(let text):
+                                    produced += 1
+                                    let elapsed = Date().timeIntervalSince(start)
+                                    let rate = elapsed > 0 ? Double(produced) / elapsed : 0
+                                    continuation.yield(TokenChunk(text: text, decodeRate: rate))

-                            case .info:
-                                // 生成完成统计,是流的最后一个事件
-                                break
+                                case .info:
+                                    // 生成完成统计,是流的最后一个事件
+                                    break

-                            case .toolCall:
-                                // 纯文本生成不会触发,switch 穷举
-                                break
+                                case .toolCall:
+                                    // 纯文本生成不会触发,switch 穷举
+                                    break
+                                }
                            }
+                            // 注:研究笔记里曾建议尾部 MLX.GPU.synchronize() 以确保
+                            // GPU 操作全部完成。但 AsyncStream 已经 yield 真实解码后的
+                            // 文字,GPU 是否完全空闲不影响数据正确性。去掉此调用同时省
+                            // 一份 transitive import MLX 的依赖,简化 SPM 链接。
                        }
-                        // 注:研究笔记里曾建议尾部 MLX.GPU.synchronize() 以确保
-                        // GPU 操作全部完成。但 AsyncStream 已经 yield 真实解码后的
-                        // 文字,GPU 是否完全空闲不影响数据正确性。去掉此调用同时省
-                        // 一份 transitive import MLX 的依赖,简化 SPM 链接。
                    }
                    continuation.finish()
                } catch {