From db327afd79ce044213f5a9ae27aab89f0f91760c Mon Sep 17 00:00:00 2001 From: link2026 Date: Wed, 10 Jun 2026 06:12:54 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E8=AF=AD=E9=9F=B3=E6=97=A5=E8=AE=B0):=20S?= =?UTF-8?q?peechDictationService=20=E7=AB=AF=E4=BE=A7=E6=B5=81=E5=BC=8F?= =?UTF-8?q?=E8=BD=AC=E5=86=99(=E4=B8=8D=E8=90=BD=E7=9B=98=E9=9F=B3?= =?UTF-8?q?=E9=A2=91)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 --- 康康/Services/SpeechDictationService.swift | 151 +++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 康康/Services/SpeechDictationService.swift diff --git a/康康/Services/SpeechDictationService.swift b/康康/Services/SpeechDictationService.swift new file mode 100644 index 0000000..0c3cedb --- /dev/null +++ b/康康/Services/SpeechDictationService.swift @@ -0,0 +1,151 @@ +import Foundation +import Speech +import AVFoundation + +/// 端侧流式语音转写(spec 2026-06-10-voice-diary)。 +/// AVAudioEngine 麦克风 buffer → SFSpeechAudioBufferRecognitionRequest, +/// `requiresOnDeviceRecognition = true` 硬性端侧,识别内容不出设备;**不落盘任何音频**。 +/// +/// 生命周期:start(onPartial:) 开始录音并实时回调 partial;stop() 结束并返回最终稿。 +/// 调用方:DiaryQuickSheet。工程默认 MainActor 隔离,本类型即 MainActor; +/// audio tap 与识别回调在系统线程,闭包内只碰局部捕获对象,回主线程统一走 Task { @MainActor }。 +final class SpeechDictationService { + + enum DictationError: Error, LocalizedError { + case unavailable + case audioEngineStartFailed(String) + + var errorDescription: String? { + switch self { + case .unavailable: + return String(appLoc: "本机不支持端侧语音识别") + case .audioEngineStartFailed(let m): + return String(appLoc: "录音启动失败:\(m)") + } + } + } + + /// 优先系统语言;系统语言不支持端侧时兜底中文(demo 机即使系统是英文也能用)。 + private static func makeRecognizer() -> SFSpeechRecognizer? { + if let r = SFSpeechRecognizer(locale: .current), r.supportsOnDeviceRecognition { + return r + } + if let r = SFSpeechRecognizer(locale: Locale(identifier: "zh-CN")), + r.supportsOnDeviceRecognition { + return r + } + return nil + } + + /// 本机是否支持端侧识别。false(模拟器/老机型)时 UI 隐藏 mic 入口,静默降级。 + static var isAvailable: Bool { makeRecognizer() != nil } + + private let audioEngine = AVAudioEngine() + private var request: SFSpeechAudioBufferRecognitionRequest? + private var task: SFSpeechRecognitionTask? + /// 识别回调持续刷新;isFinal 或出错时置 didFinish。stop() 用「final 优先、partial 兜底」。 + private var latestText = "" + private var didFinish = false + + private(set) var isRecording = false + + /// 麦克风 + 语音识别两个权限一起申请。任一被拒返回 false。 + func requestAuthorization() async -> Bool { + let speech = await withCheckedContinuation { (c: CheckedContinuation) in + SFSpeechRecognizer.requestAuthorization { c.resume(returning: $0) } + } + guard speech == .authorized else { return false } + return await AVAudioApplication.requestRecordPermission() + } + + /// 开始录音 + 流式识别。partial 结果在主线程回调(录音面板实时字幕)。 + func start(onPartial: @escaping (String) -> Void) throws { + guard !isRecording else { return } + guard let recognizer = Self.makeRecognizer(), recognizer.isAvailable else { + throw DictationError.unavailable + } + + let session = AVAudioSession.sharedInstance() + do { + try session.setCategory(.record, mode: .measurement, options: .duckOthers) + try session.setActive(true, options: .notifyOthersOnDeactivation) + } catch { + throw DictationError.audioEngineStartFailed(error.localizedDescription) + } + + let request = SFSpeechAudioBufferRecognitionRequest() + request.requiresOnDeviceRecognition = true // 红线:识别不出设备 + request.shouldReportPartialResults = true + request.addsPunctuation = true + self.request = request + latestText = "" + didFinish = false + + let input = audioEngine.inputNode + let format = input.outputFormat(forBus: 0) + // tap 在音频线程跑:只碰局部捕获的 request,不碰 self + input.installTap(onBus: 0, bufferSize: 1024, format: format) { buffer, _ in + request.append(buffer) + } + audioEngine.prepare() + do { + try audioEngine.start() + } catch { + input.removeTap(onBus: 0) + deactivateSession() + throw DictationError.audioEngineStartFailed(error.localizedDescription) + } + + task = recognizer.recognitionTask(with: request) { [weak self] result, error in + // 系统线程 → 主线程 + Task { @MainActor in + guard let self else { return } + if let result { + self.latestText = result.bestTranscription.formattedString + onPartial(self.latestText) + if result.isFinal { self.didFinish = true } + } + if error != nil { self.didFinish = true } + } + } + isRecording = true + } + + /// 停止录音,等待最终识别结果(最多 1.5s,超时用最新 partial),返回最终稿。 + /// 中途识别出错时已拿到的 partial 一样返回(spec 错误表:照常进整理流程)。 + func stop() async -> String { + guard isRecording else { return "" } + isRecording = false + + audioEngine.stop() + audioEngine.inputNode.removeTap(onBus: 0) + request?.endAudio() + + let deadline = Date().addingTimeInterval(1.5) + while !didFinish && Date() < deadline { + try? await Task.sleep(nanoseconds: 100_000_000) + } + task?.cancel() + task = nil + request = nil + deactivateSession() + return latestText + } + + /// 用户直接关 sheet 时的清理:不关心结果,立即停。 + func abort() { + guard isRecording else { return } + isRecording = false + audioEngine.stop() + audioEngine.inputNode.removeTap(onBus: 0) + request?.endAudio() + task?.cancel() + task = nil + request = nil + deactivateSession() + } + + private func deactivateSession() { + try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation) + } +}