feat(语音日记): SpeechDictationService 端侧流式转写(不落盘音频)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
151
康康/Services/SpeechDictationService.swift
Normal file
151
康康/Services/SpeechDictationService.swift
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
import Foundation
|
||||||
|
import Speech
|
||||||
|
import AVFoundation
|
||||||
|
|
||||||
|
/// 端侧流式语音转写(spec 2026-06-10-voice-diary)。
|
||||||
|
/// AVAudioEngine 麦克风 buffer → SFSpeechAudioBufferRecognitionRequest,
|
||||||
|
/// `requiresOnDeviceRecognition = true` 硬性端侧,识别内容不出设备;**不落盘任何音频**。
|
||||||
|
///
|
||||||
|
/// 生命周期:start(onPartial:) 开始录音并实时回调 partial;stop() 结束并返回最终稿。
|
||||||
|
/// 调用方:DiaryQuickSheet。工程默认 MainActor 隔离,本类型即 MainActor;
|
||||||
|
/// audio tap 与识别回调在系统线程,闭包内只碰局部捕获对象,回主线程统一走 Task { @MainActor }。
|
||||||
|
final class SpeechDictationService {
|
||||||
|
|
||||||
|
enum DictationError: Error, LocalizedError {
|
||||||
|
case unavailable
|
||||||
|
case audioEngineStartFailed(String)
|
||||||
|
|
||||||
|
var errorDescription: String? {
|
||||||
|
switch self {
|
||||||
|
case .unavailable:
|
||||||
|
return String(appLoc: "本机不支持端侧语音识别")
|
||||||
|
case .audioEngineStartFailed(let m):
|
||||||
|
return String(appLoc: "录音启动失败:\(m)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 优先系统语言;系统语言不支持端侧时兜底中文(demo 机即使系统是英文也能用)。
|
||||||
|
private static func makeRecognizer() -> SFSpeechRecognizer? {
|
||||||
|
if let r = SFSpeechRecognizer(locale: .current), r.supportsOnDeviceRecognition {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
if let r = SFSpeechRecognizer(locale: Locale(identifier: "zh-CN")),
|
||||||
|
r.supportsOnDeviceRecognition {
|
||||||
|
return r
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 本机是否支持端侧识别。false(模拟器/老机型)时 UI 隐藏 mic 入口,静默降级。
|
||||||
|
static var isAvailable: Bool { makeRecognizer() != nil }
|
||||||
|
|
||||||
|
private let audioEngine = AVAudioEngine()
|
||||||
|
private var request: SFSpeechAudioBufferRecognitionRequest?
|
||||||
|
private var task: SFSpeechRecognitionTask?
|
||||||
|
/// 识别回调持续刷新;isFinal 或出错时置 didFinish。stop() 用「final 优先、partial 兜底」。
|
||||||
|
private var latestText = ""
|
||||||
|
private var didFinish = false
|
||||||
|
|
||||||
|
private(set) var isRecording = false
|
||||||
|
|
||||||
|
/// 麦克风 + 语音识别两个权限一起申请。任一被拒返回 false。
|
||||||
|
func requestAuthorization() async -> Bool {
|
||||||
|
let speech = await withCheckedContinuation { (c: CheckedContinuation<SFSpeechRecognizerAuthorizationStatus, Never>) in
|
||||||
|
SFSpeechRecognizer.requestAuthorization { c.resume(returning: $0) }
|
||||||
|
}
|
||||||
|
guard speech == .authorized else { return false }
|
||||||
|
return await AVAudioApplication.requestRecordPermission()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 开始录音 + 流式识别。partial 结果在主线程回调(录音面板实时字幕)。
|
||||||
|
func start(onPartial: @escaping (String) -> Void) throws {
|
||||||
|
guard !isRecording else { return }
|
||||||
|
guard let recognizer = Self.makeRecognizer(), recognizer.isAvailable else {
|
||||||
|
throw DictationError.unavailable
|
||||||
|
}
|
||||||
|
|
||||||
|
let session = AVAudioSession.sharedInstance()
|
||||||
|
do {
|
||||||
|
try session.setCategory(.record, mode: .measurement, options: .duckOthers)
|
||||||
|
try session.setActive(true, options: .notifyOthersOnDeactivation)
|
||||||
|
} catch {
|
||||||
|
throw DictationError.audioEngineStartFailed(error.localizedDescription)
|
||||||
|
}
|
||||||
|
|
||||||
|
let request = SFSpeechAudioBufferRecognitionRequest()
|
||||||
|
request.requiresOnDeviceRecognition = true // 红线:识别不出设备
|
||||||
|
request.shouldReportPartialResults = true
|
||||||
|
request.addsPunctuation = true
|
||||||
|
self.request = request
|
||||||
|
latestText = ""
|
||||||
|
didFinish = false
|
||||||
|
|
||||||
|
let input = audioEngine.inputNode
|
||||||
|
let format = input.outputFormat(forBus: 0)
|
||||||
|
// tap 在音频线程跑:只碰局部捕获的 request,不碰 self
|
||||||
|
input.installTap(onBus: 0, bufferSize: 1024, format: format) { buffer, _ in
|
||||||
|
request.append(buffer)
|
||||||
|
}
|
||||||
|
audioEngine.prepare()
|
||||||
|
do {
|
||||||
|
try audioEngine.start()
|
||||||
|
} catch {
|
||||||
|
input.removeTap(onBus: 0)
|
||||||
|
deactivateSession()
|
||||||
|
throw DictationError.audioEngineStartFailed(error.localizedDescription)
|
||||||
|
}
|
||||||
|
|
||||||
|
task = recognizer.recognitionTask(with: request) { [weak self] result, error in
|
||||||
|
// 系统线程 → 主线程
|
||||||
|
Task { @MainActor in
|
||||||
|
guard let self else { return }
|
||||||
|
if let result {
|
||||||
|
self.latestText = result.bestTranscription.formattedString
|
||||||
|
onPartial(self.latestText)
|
||||||
|
if result.isFinal { self.didFinish = true }
|
||||||
|
}
|
||||||
|
if error != nil { self.didFinish = true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isRecording = true
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 停止录音,等待最终识别结果(最多 1.5s,超时用最新 partial),返回最终稿。
|
||||||
|
/// 中途识别出错时已拿到的 partial 一样返回(spec 错误表:照常进整理流程)。
|
||||||
|
func stop() async -> String {
|
||||||
|
guard isRecording else { return "" }
|
||||||
|
isRecording = false
|
||||||
|
|
||||||
|
audioEngine.stop()
|
||||||
|
audioEngine.inputNode.removeTap(onBus: 0)
|
||||||
|
request?.endAudio()
|
||||||
|
|
||||||
|
let deadline = Date().addingTimeInterval(1.5)
|
||||||
|
while !didFinish && Date() < deadline {
|
||||||
|
try? await Task.sleep(nanoseconds: 100_000_000)
|
||||||
|
}
|
||||||
|
task?.cancel()
|
||||||
|
task = nil
|
||||||
|
request = nil
|
||||||
|
deactivateSession()
|
||||||
|
return latestText
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 用户直接关 sheet 时的清理:不关心结果,立即停。
|
||||||
|
func abort() {
|
||||||
|
guard isRecording else { return }
|
||||||
|
isRecording = false
|
||||||
|
audioEngine.stop()
|
||||||
|
audioEngine.inputNode.removeTap(onBus: 0)
|
||||||
|
request?.endAudio()
|
||||||
|
task?.cancel()
|
||||||
|
task = nil
|
||||||
|
request = nil
|
||||||
|
deactivateSession()
|
||||||
|
}
|
||||||
|
|
||||||
|
private func deactivateSession() {
|
||||||
|
try? AVAudioSession.sharedInstance().setActive(false, options: .notifyOthersOnDeactivation)
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user