Files
kangkang/康康/Services/SpeechDictationService.swift
link2026 9d856fcfc4 ```
feat(AI): 集成MNN推理引擎替换MLX作为主AI运行时

- 引入MNN(alibaba) + Arm SME2 + CPU作为主AI运行时,支持A19/iPhone17的
  SME2和A17的NEON加速
- 添加MLX Swift作为兜底GPU推理方案,实现双后端切换机制
- 使用单一Qwen3.5-2B多模态模型(1.2GB),替代原有的LLM+VL分离架构
- 实现InferenceEngine.current引擎选择逻辑,真机默认MNN,模拟器回退MLX
- 更新AIAgent架构,通过MNNLLMBridge(ObjC++) → MNNBackend进行推理
- 修改队列机制防止并发推理导致OOM,使用信号量闸门控制显存占用
- 更新文档中的技术栈说明、模块边界和周次交付计划
```
2026-06-15 09:24:59 +08:00

162 lines
6.5 KiB
Swift

import Foundation
import Speech
import AVFoundation
/// (spec 2026-06-10-voice-diary)
/// AVAudioEngine buffer SFSpeechAudioBufferRecognitionRequest,
/// `requiresOnDeviceRecognition = true` ,;****
///
/// :start(onPartial:) partial;stop() 稿
/// :DiaryQuickSheet MainActor , MainActor;
/// audio tap 线,,线 Task { @MainActor }
final class SpeechDictationService {
enum DictationError: Error, LocalizedError {
case unavailable
case audioEngineStartFailed(String)
var errorDescription: String? {
switch self {
case .unavailable:
return String(appLoc: "本机不支持端侧语音识别")
case .audioEngineStartFailed(let m):
return String(appLoc: "录音启动失败:\(m)")
}
}
}
/// `prefix` `partial` ,便
/// :;;(/),
/// ,
static func merge(prefix: String, partial: String) -> String {
if partial.isEmpty { return prefix }
if prefix.isEmpty { return partial }
if prefix.last?.isWhitespace == true { return prefix + partial }
return prefix + " " + partial
}
/// ;(demo 使)
private static func makeRecognizer() -> SFSpeechRecognizer? {
if let r = SFSpeechRecognizer(locale: .current), r.supportsOnDeviceRecognition {
return r
}
if let r = SFSpeechRecognizer(locale: Locale(identifier: "zh-CN")),
r.supportsOnDeviceRecognition {
return r
}
return nil
}
/// false(/) UI mic ,
static var isAvailable: Bool { makeRecognizer() != nil }
private let audioEngine = AVAudioEngine()
private var request: SFSpeechAudioBufferRecognitionRequest?
private var task: SFSpeechRecognitionTask?
/// ;isFinal didFinishstop() final partial
private var latestText = ""
private var didFinish = false
private(set) var isRecording = false
/// + false
func requestAuthorization() async -> Bool {
let speech = await withCheckedContinuation { (c: CheckedContinuation<SFSpeechRecognizerAuthorizationStatus, Never>) in
SFSpeechRecognizer.requestAuthorization { c.resume(returning: $0) }
}
guard speech == .authorized else { return false }
return await AVAudioApplication.requestRecordPermission()
}
/// + partial 线()
func start(onPartial: @escaping (String) -> Void) throws {
guard !isRecording else { return }
guard let recognizer = Self.makeRecognizer(), recognizer.isAvailable else {
throw DictationError.unavailable
}
let session = AVAudioSession.sharedInstance()
do {
try session.setCategory(.record, mode: .measurement, options: .duckOthers)
try session.setActive(true, options: .notifyOthersOnDeactivation)
} catch {
throw DictationError.audioEngineStartFailed(error.localizedDescription)
}
let request = SFSpeechAudioBufferRecognitionRequest()
request.requiresOnDeviceRecognition = true // 线:
request.shouldReportPartialResults = true
request.addsPunctuation = true
self.request = request
latestText = ""
didFinish = false
let input = audioEngine.inputNode
let format = input.outputFormat(forBus: 0)
// tap 线: request, self
input.installTap(onBus: 0, bufferSize: 1024, format: format) { buffer, _ in
request.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
input.removeTap(onBus: 0)
deactivateSession()
throw DictationError.audioEngineStartFailed(error.localizedDescription)
}
task = recognizer.recognitionTask(with: request) { [weak self] result, error in
// 线 线
Task { @MainActor in
guard let self else { return }
if let result {
self.latestText = result.bestTranscription.formattedString
onPartial(self.latestText)
if result.isFinal { self.didFinish = true }
}
if error != nil { self.didFinish = true }
}
}
isRecording = true
}
/// ,( 1.5s, partial),稿
/// partial (spec :)
func stop() async -> String {
guard isRecording else { return "" }
isRecording = false
audioEngine.stop()
audioEngine.inputNode.removeTap(onBus: 0)
request?.endAudio()
let deadline = Date().addingTimeInterval(1.5)
while !didFinish && Date() < deadline {
try? await Task.sleep(nanoseconds: 100_000_000)
}
task?.cancel()
task = nil
request = nil
deactivateSession()
return latestText
}
/// sheet :,
func abort() {
guard isRecording else { return }
isRecording = false
audioEngine.stop()
audioEngine.inputNode.removeTap(onBus: 0)
request?.endAudio()
task?.cancel()
task = nil
request = nil
deactivateSession()
}
private func deactivateSession() {
try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
}
}