import SwiftUI import UIKit /// 「长按 + 语音直达」面板:开口说想记什么 → 端侧转写(SpeechDictationService) /// → LLM 意图分类(VoiceIntentService)→ 回调 RootView 打开对应新建入口。 /// /// 状态机: /// ``` /// requesting(权限) → recording(实时字幕) → classifying → onResolve(intent) 关闭 /// │ 拒绝 → denied │ 没听到/没听懂 → failed(再说一次 / 打开菜单) /// ``` /// 全程本机:转写 requiresOnDeviceRecognition,分类走端侧 LLM。 struct VoiceCommandSheet: View { /// 识别成功:RootView 负责关闭本 sheet 并路由。 let onResolve: (VoiceIntent) -> Void /// 兜底:打开普通新建菜单(RecordSheet)。 let onOpenMenu: () -> Void @Environment(\.dismiss) private var dismiss enum Phase: Equatable { case requesting case denied case recording case classifying case failed(message: String) } @State private var phase: Phase = .requesting @State private var transcript = "" @State private var seconds = 0 /// @State 保证视图身份期内实例唯一(同 DiaryQuickSheet 的注释,防止重建后麦克风悬挂)。 @State private var dictation = SpeechDictationService() @State private var ticker: Task? /// 录音超过 20s 自动结束:语音直达说的都是短句,长录是忘了点完成。 private let maxSeconds = 20 var body: some View { VStack(spacing: 0) { Capsule() .fill(Tj.Palette.line) .frame(width: 40, height: 4) .padding(.top, 10) .padding(.bottom, 16) HStack { VStack(alignment: .leading, spacing: 2) { Text("说出想记的内容") .font(.tjH2()) .foregroundStyle(Tj.Palette.text) Text("比如:记一下血压 / 我头疼 / 拍个药盒") .font(.tjScaled( 11)) .foregroundStyle(Tj.Palette.text3) } Spacer() Text("全程本机") .font(.tjScaled( 12)) .foregroundStyle(Tj.Palette.text3) } .padding(.horizontal, 20) .padding(.bottom, 16) content .frame(maxWidth: .infinity, maxHeight: .infinity, alignment: .top) .padding(.horizontal, 20) buttons .padding(.horizontal, 20) .padding(.vertical, 14) } .background( Tj.Palette.sand .clipShape(RoundedRectangle(cornerRadius: Tj.Radius.xl, style: .continuous)) .ignoresSafeArea(edges: .bottom) ) .presentationDetents([.fraction(0.5)]) .presentationDragIndicator(.hidden) .presentationBackground(Tj.Palette.sand) .presentationCornerRadius(Tj.Radius.xl) .task { await begin() } .onDisappear { ticker?.cancel() dictation.abort() } } // MARK: - 分阶段内容 @ViewBuilder private var content: some View { switch phase { case .requesting: ProgressView().tint(Tj.Palette.ink) .frame(maxWidth: .infinity) .padding(.top, 30) case .denied: VStack(spacing: 10) { Image(systemName: "mic.slash") .font(.tjScaled( 30)) .foregroundStyle(Tj.Palette.text3) Text("需要麦克风与语音识别权限") .font(.tjScaled( 14, weight: .semibold)) .foregroundStyle(Tj.Palette.text) Text("语音和文字都只在本机处理,不会上传。") .font(.tjScaled( 12)) .foregroundStyle(Tj.Palette.text3) Button("前往设置") { if let url = URL(string: UIApplication.openSettingsURLString) { UIApplication.shared.open(url) } } .font(.tjScaled( 13, weight: .semibold)) .foregroundStyle(Tj.Palette.ink) } .frame(maxWidth: .infinity) .padding(.top, 16) case .recording: VStack(spacing: 14) { HStack(spacing: 8) { Circle() .fill(Tj.Palette.brick) .frame(width: 8, height: 8) Text("正在听 · \(seconds)s") .font(.tjScaled( 12, weight: .semibold)) .foregroundStyle(Tj.Palette.brick) } transcriptBox(placeholder: String(appLoc: "请开口说话…")) } case .classifying: VStack(spacing: 14) { HStack(spacing: 8) { ProgressView().tint(Tj.Palette.ink) Text("正在理解…") .font(.tjScaled( 12, weight: .semibold)) .foregroundStyle(Tj.Palette.text2) } transcriptBox(placeholder: "") } case .failed(let message): VStack(spacing: 10) { Image(systemName: "questionmark.bubble") .font(.tjScaled( 28)) .foregroundStyle(Tj.Palette.text3) Text(message) .font(.tjScaled( 13)) .foregroundStyle(Tj.Palette.text2) .multilineTextAlignment(.center) if !transcript.isEmpty { Text("“\(transcript)”") .font(.tjScaled( 12)) .foregroundStyle(Tj.Palette.text3) .lineLimit(2) } } .frame(maxWidth: .infinity) .padding(.top, 12) } } private func transcriptBox(placeholder: String) -> some View { ScrollView(showsIndicators: false) { Text(transcript.isEmpty ? placeholder : transcript) .font(.tjScaled( 15)) .foregroundStyle(transcript.isEmpty ? Tj.Palette.text3 : Tj.Palette.text) .frame(maxWidth: .infinity, alignment: .leading) } .frame(minHeight: 64, maxHeight: 110) .padding(.horizontal, 14) .padding(.vertical, 12) .background( RoundedRectangle(cornerRadius: Tj.Radius.sm, style: .continuous) .fill(Tj.Palette.paper) ) .overlay( RoundedRectangle(cornerRadius: Tj.Radius.sm, style: .continuous) .strokeBorder(Tj.Palette.line, lineWidth: 1) ) } // MARK: - 底部按钮 @ViewBuilder private var buttons: some View { switch phase { case .recording: HStack(spacing: 12) { Button("取消") { dismiss() } .buttonStyle(TjGhostButton(height: 44, fontSize: 15, horizontalPadding: 18)) Button("说完了") { finishRecording() } .buttonStyle(TjPrimaryButton(height: 44, fontSize: 15, horizontalPadding: 18)) } case .failed: HStack(spacing: 12) { Button("打开新建菜单") { onOpenMenu() } .buttonStyle(TjGhostButton(height: 44, fontSize: 14, horizontalPadding: 14)) Button("再说一次") { Task { await begin() } } .buttonStyle(TjPrimaryButton(height: 44, fontSize: 14, horizontalPadding: 18)) } case .denied: Button("取消") { dismiss() } .buttonStyle(TjGhostButton(height: 44, fontSize: 15, horizontalPadding: 18)) case .requesting, .classifying: Button("取消") { dismiss() } .buttonStyle(TjGhostButton(height: 44, fontSize: 15, horizontalPadding: 18)) } } // MARK: - 流程 private func begin() async { ticker?.cancel() transcript = "" seconds = 0 guard SpeechDictationService.isAvailable else { phase = .failed(message: String(appLoc: "本机不支持端侧语音识别,试试下面的新建菜单")) return } phase = .requesting guard await dictation.requestAuthorization() else { phase = .denied return } do { try dictation.start { transcript = $0 } phase = .recording startTicker() } catch { phase = .failed(message: error.localizedDescription) } } private func startTicker() { ticker = Task { @MainActor in while !Task.isCancelled { try? await Task.sleep(nanoseconds: 1_000_000_000) guard phase == .recording else { return } seconds += 1 if seconds >= maxSeconds { finishRecording() return } } } } private func finishRecording() { guard phase == .recording else { return } ticker?.cancel() // 已经在屏幕上的实时字幕:stop() 偶发因最终结果竞争/取消返回空, // 这时别把用户已经看到的内容丢掉,回退到实时字幕。 let live = transcript phase = .classifying Task { let finalText = await dictation.stop() let text = finalText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? live : finalText transcript = text let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmed.isEmpty else { phase = .failed(message: String(appLoc: "没听到内容,再试一次?")) return } if let intent = await VoiceIntentService.classify(trimmed) { onResolve(intent) } else { phase = .failed(message: String(appLoc: "没听懂想记什么,再说一次,或直接选菜单")) } } } } #Preview { Text("bg") .sheet(isPresented: .constant(true)) { VoiceCommandSheet(onResolve: { print($0) }, onOpenMenu: {}) } }