fix(concurrency): clear 4 Swift 6 warnings under default MainActor isolation
- ModelStore/FileVault: drop nonisolated(unsafe) on shared, mark all instance methods nonisolated (they only read filesystem); ModelKind enum also nonisolated - AIRuntime ↔ ModelStore cross-actor call resolved by the above - LLMSession: replace deprecated Device.setDefault(device:) with task-scoped Device.withDefaultDevice(.cpu, body:); wrap both load and generate so the TaskLocal propagates through ModelContainer.perform
This commit is contained in:
@@ -12,18 +12,27 @@ actor LLMSession {
|
||||
self.container = container
|
||||
}
|
||||
|
||||
/// 在 simulator 把默认设备强切为 CPU(MLX 的 Metal backend 在部分 Sim 路径会 abort)。
|
||||
/// 真机走 body 默认设备(GPU/ANE)。
|
||||
/// 用 task-scoped `withDefaultDevice`,TaskLocal 会传递到 child Task / actor 调用。
|
||||
private static func withDeviceOverride<R>(
|
||||
_ body: () async throws -> R
|
||||
) async rethrows -> R {
|
||||
#if targetEnvironment(simulator)
|
||||
return try await Device.withDefaultDevice(.cpu, body)
|
||||
#else
|
||||
return try await body()
|
||||
#endif
|
||||
}
|
||||
|
||||
/// 从本地目录加载模型(包含 config.json + weights + tokenizer)。
|
||||
static func load(folderURL: URL) async throws -> LLMSession {
|
||||
#if targetEnvironment(simulator)
|
||||
// MLX 的 iOS Simulator GPU stream 初始化会在部分 Metal backend 路径中 abort。
|
||||
// 模拟器只用于功能调试,强制走 CPU;真机仍保留默认 GPU/ANE 相关路径。
|
||||
Device.setDefault(device: .cpu)
|
||||
#endif
|
||||
|
||||
let configuration = ModelConfiguration(directory: folderURL)
|
||||
let container = try await LLMModelFactory.shared.loadContainer(
|
||||
configuration: configuration
|
||||
)
|
||||
let container = try await withDeviceOverride {
|
||||
try await LLMModelFactory.shared.loadContainer(
|
||||
configuration: configuration
|
||||
)
|
||||
}
|
||||
return LLMSession(container: container)
|
||||
}
|
||||
|
||||
@@ -35,46 +44,48 @@ actor LLMSession {
|
||||
AsyncThrowingStream { continuation in
|
||||
let task = Task {
|
||||
do {
|
||||
let parameters = GenerateParameters(
|
||||
maxTokens: maxTokens,
|
||||
temperature: Float(0.6),
|
||||
topP: Float(0.9)
|
||||
)
|
||||
try await Self.withDeviceOverride {
|
||||
let parameters = GenerateParameters(
|
||||
maxTokens: maxTokens,
|
||||
temperature: Float(0.6),
|
||||
topP: Float(0.9)
|
||||
)
|
||||
|
||||
try await container.perform { (context: ModelContext) in
|
||||
let userInput = UserInput(prompt: prompt)
|
||||
let lmInput = try await context.processor.prepare(input: userInput)
|
||||
try await container.perform { (context: ModelContext) in
|
||||
let userInput = UserInput(prompt: prompt)
|
||||
let lmInput = try await context.processor.prepare(input: userInput)
|
||||
|
||||
let start = Date()
|
||||
var produced = 0
|
||||
let start = Date()
|
||||
var produced = 0
|
||||
|
||||
for await event in try MLXLMCommon.generate(
|
||||
input: lmInput,
|
||||
parameters: parameters,
|
||||
context: context
|
||||
) {
|
||||
if Task.isCancelled { break }
|
||||
for await event in try MLXLMCommon.generate(
|
||||
input: lmInput,
|
||||
parameters: parameters,
|
||||
context: context
|
||||
) {
|
||||
if Task.isCancelled { break }
|
||||
|
||||
switch event {
|
||||
case .chunk(let text):
|
||||
produced += 1
|
||||
let elapsed = Date().timeIntervalSince(start)
|
||||
let rate = elapsed > 0 ? Double(produced) / elapsed : 0
|
||||
continuation.yield(TokenChunk(text: text, decodeRate: rate))
|
||||
switch event {
|
||||
case .chunk(let text):
|
||||
produced += 1
|
||||
let elapsed = Date().timeIntervalSince(start)
|
||||
let rate = elapsed > 0 ? Double(produced) / elapsed : 0
|
||||
continuation.yield(TokenChunk(text: text, decodeRate: rate))
|
||||
|
||||
case .info:
|
||||
// 生成完成统计,是流的最后一个事件
|
||||
break
|
||||
case .info:
|
||||
// 生成完成统计,是流的最后一个事件
|
||||
break
|
||||
|
||||
case .toolCall:
|
||||
// 纯文本生成不会触发,switch 穷举
|
||||
break
|
||||
case .toolCall:
|
||||
// 纯文本生成不会触发,switch 穷举
|
||||
break
|
||||
}
|
||||
}
|
||||
// 注:研究笔记里曾建议尾部 MLX.GPU.synchronize() 以确保
|
||||
// GPU 操作全部完成。但 AsyncStream 已经 yield 真实解码后的
|
||||
// 文字,GPU 是否完全空闲不影响数据正确性。去掉此调用同时省
|
||||
// 一份 transitive import MLX 的依赖,简化 SPM 链接。
|
||||
}
|
||||
// 注:研究笔记里曾建议尾部 MLX.GPU.synchronize() 以确保
|
||||
// GPU 操作全部完成。但 AsyncStream 已经 yield 真实解码后的
|
||||
// 文字,GPU 是否完全空闲不影响数据正确性。去掉此调用同时省
|
||||
// 一份 transitive import MLX 的依赖,简化 SPM 链接。
|
||||
}
|
||||
continuation.finish()
|
||||
} catch {
|
||||
|
||||
Reference in New Issue
Block a user