import Foundation import Speech import AVFoundation /// 端侧流式语音转写(spec 2026-06-10-voice-diary)。 /// AVAudioEngine 麦克风 buffer → SFSpeechAudioBufferRecognitionRequest, /// `requiresOnDeviceRecognition = true` 硬性端侧,识别内容不出设备;**不落盘任何音频**。 /// /// 生命周期:start(onPartial:) 开始录音并实时回调 partial;stop() 结束并返回最终稿。 /// 调用方:DiaryQuickSheet。工程默认 MainActor 隔离,本类型即 MainActor; /// audio tap 与识别回调在系统线程,闭包内只碰局部捕获对象,回主线程统一走 Task { @MainActor }。 final class SpeechDictationService { enum DictationError: Error, LocalizedError { case unavailable case audioEngineStartFailed(String) var errorDescription: String? { switch self { case .unavailable: return String(appLoc: "本机不支持端侧语音识别") case .audioEngineStartFailed(let m): return String(appLoc: "录音启动失败:\(m)") } } } /// 优先系统语言;系统语言不支持端侧时兜底中文(demo 机即使系统是英文也能用)。 private static func makeRecognizer() -> SFSpeechRecognizer? { if let r = SFSpeechRecognizer(locale: .current), r.supportsOnDeviceRecognition { return r } if let r = SFSpeechRecognizer(locale: Locale(identifier: "zh-CN")), r.supportsOnDeviceRecognition { return r } return nil } /// 本机是否支持端侧识别。false(模拟器/老机型)时 UI 隐藏 mic 入口,静默降级。 static var isAvailable: Bool { makeRecognizer() != nil } private let audioEngine = AVAudioEngine() private var request: SFSpeechAudioBufferRecognitionRequest? private var task: SFSpeechRecognitionTask? /// 识别回调持续刷新;isFinal 或出错时置 didFinish。stop() 用「final 优先、partial 兜底」。 private var latestText = "" private var didFinish = false private(set) var isRecording = false /// 麦克风 + 语音识别两个权限一起申请。任一被拒返回 false。 func requestAuthorization() async -> Bool { let speech = await withCheckedContinuation { (c: CheckedContinuation) in SFSpeechRecognizer.requestAuthorization { c.resume(returning: $0) } } guard speech == .authorized else { return false } return await AVAudioApplication.requestRecordPermission() } /// 开始录音 + 流式识别。partial 结果在主线程回调(录音面板实时字幕)。 func start(onPartial: @escaping (String) -> Void) throws { guard !isRecording else { return } guard let recognizer = Self.makeRecognizer(), recognizer.isAvailable else { throw DictationError.unavailable } let session = AVAudioSession.sharedInstance() do { try session.setCategory(.record, mode: .measurement, options: .duckOthers) try session.setActive(true, options: .notifyOthersOnDeactivation) } catch { throw DictationError.audioEngineStartFailed(error.localizedDescription) } let request = SFSpeechAudioBufferRecognitionRequest() request.requiresOnDeviceRecognition = true // 红线:识别不出设备 request.shouldReportPartialResults = true request.addsPunctuation = true self.request = request latestText = "" didFinish = false let input = audioEngine.inputNode let format = input.outputFormat(forBus: 0) // tap 在音频线程跑:只碰局部捕获的 request,不碰 self input.installTap(onBus: 0, bufferSize: 1024, format: format) { buffer, _ in request.append(buffer) } audioEngine.prepare() do { try audioEngine.start() } catch { input.removeTap(onBus: 0) deactivateSession() throw DictationError.audioEngineStartFailed(error.localizedDescription) } task = recognizer.recognitionTask(with: request) { [weak self] result, error in // 系统线程 → 主线程 Task { @MainActor in guard let self else { return } if let result { self.latestText = result.bestTranscription.formattedString onPartial(self.latestText) if result.isFinal { self.didFinish = true } } if error != nil { self.didFinish = true } } } isRecording = true } /// 停止录音,等待最终识别结果(最多 1.5s,超时用最新 partial),返回最终稿。 /// 中途识别出错时已拿到的 partial 一样返回(spec 错误表:照常进整理流程)。 func stop() async -> String { guard isRecording else { return "" } isRecording = false audioEngine.stop() audioEngine.inputNode.removeTap(onBus: 0) request?.endAudio() let deadline = Date().addingTimeInterval(1.5) while !didFinish && Date() < deadline { try? await Task.sleep(nanoseconds: 100_000_000) } task?.cancel() task = nil request = nil deactivateSession() return latestText } /// 用户直接关 sheet 时的清理:不关心结果,立即停。 func abort() { guard isRecording else { return } isRecording = false audioEngine.stop() audioEngine.inputNode.removeTap(onBus: 0) request?.endAudio() task?.cancel() task = nil request = nil deactivateSession() } private func deactivateSession() { try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation) } }