Files
kangkang/康康/AI/VLSession.swift
link2026 1b01923c8e feat(capture): 统一报告捕获流程并集成视觉语言模型识别
- 替换 QuickCaptureFlow 和 ArchiveFlow 为 UnifiedCaptureFlow 统一流程
- 新增 VLSession 封装 Qwen2.5-VL 模型进行图像文本推理
- 实现 AIRuntime 中 VL 模型的准备和分析功能
- 添加 VLPrompts 定义体检化验单识别的 JSON 输出模板
- 创建 CaptureReviewForm 提供 VL 解析结果的可编辑表单界面
- 集成 VisionKit 文档扫描器支持真机多页文档扫描
- 为模拟器实现 PhotosPicker 回退方案选择已有照片
- 在 RootView 中统一使用 UnifiedCaptureFlow 处理快速和归档流程
- 添加 CustomMetricEditor 支持自定义监测指标的创建编辑删除
- 扩展 KangkangApp 模型配置以支持新数据类型
- 实现档案列表中症状结束功能通过时间线行点击触发
2026-05-26 11:18:00 +08:00

73 lines
2.6 KiB
Swift

import Foundation
import MLX
import MLXVLM
import MLXLMCommon
/// MLX VL (Qwen2.5-VL)
/// LLMSession actor , AIRuntime
actor VLSession {
let container: ModelContainer
init(container: ModelContainer) {
self.container = container
}
private static func withDeviceOverride<R>(
_ body: () async throws -> R
) async rethrows -> R {
#if targetEnvironment(simulator)
return try await Device.withDefaultDevice(.cpu, body)
#else
return try await body()
#endif
}
/// VL ( config.json + weights + tokenizer + processor)
static func load(folderURL: URL) async throws -> VLSession {
let configuration = ModelConfiguration(directory: folderURL)
let container = try await withDeviceOverride {
try await VLMModelFactory.shared.loadContainer(
configuration: configuration
)
}
return VLSession(container: container)
}
/// ( token )
/// VL JSON , JSON UI
/// - Parameters:
/// - imageURLs: file:// URL, FileVault
/// - prompt: (VLPrompts.reportExtraction)
/// - maxTokens: 512(JSON 200-400)
func analyze(imageURLs: [URL],
prompt: String,
maxTokens: Int = 512) async throws -> String {
try await Self.withDeviceOverride {
try await container.perform { (context: ModelContext) in
let images = imageURLs.map { UserInput.Image.url($0) }
let userInput = UserInput(prompt: prompt, images: images)
let lmInput = try await context.processor.prepare(input: userInput)
let parameters = GenerateParameters(
maxTokens: maxTokens,
temperature: Float(0.2), // JSON ,
topP: Float(0.9)
)
var collected = ""
for await event in try MLXLMCommon.generate(
input: lmInput,
parameters: parameters,
context: context
) {
if Task.isCancelled { break }
if case .chunk(let text) = event {
collected.append(text)
}
}
return collected
}
}
}
}