import Foundation import SwiftData /// 「导出身体档案」的服务层。 /// /// 流程(对齐 spec §6): /// prepare → extractingIntent → retrieving → generating → completed /// /// 红线对齐: /// - UI 只通过本服务调用 AI(§3.1) /// - 两次 LLM 调用都进 `AIRuntime.shared` 的 actor 队列,与 CaptureService 串行(§3.1) /// - 意图 JSON 解析失败 → 用 30 天 + 空关键词兜底,流程不中断(§3.2 / spec §9) /// - 不引入云、不写密码学、不重构现有结构(§10) @MainActor struct HealthExportService { static let shared = HealthExportService() private init() {} // MARK: - Public types enum Phase: String, Sendable { case extractingIntent case retrieving case generating case completed var label: String { switch self { case .extractingIntent: return String(appLoc: "理解意图") case .retrieving: return String(appLoc: "检索数据") case .generating: return String(appLoc: "撰写报告") case .completed: return String(appLoc: "已完成") } } } enum Event { case phaseChanged(Phase) case token(TokenChunk) case completed(persistentID: PersistentIdentifier) // .failed 走 stream throw,不在 Event 里 } enum ServiceError: Error, LocalizedError { case modelNotReady case generationFailed(String) case cancelled var errorDescription: String? { switch self { case .modelNotReady: return String(appLoc: "AI 模型尚未准备好,请先到「我的 · 模型管理」下载。") case .generationFailed(let m): return String(appLoc: "生成失败:\(m)") case .cancelled: return String(appLoc: "已取消") } } } // MARK: - Entry point /// 主入口。返回事件流;UI 关闭 sheet → stream 取消 → Service 不入库。 /// 调用方需在 MainActor。 func export(prompt: String, in modelContext: ModelContext) -> AsyncThrowingStream { AsyncThrowingStream { continuation in let task = Task { @MainActor in do { // —— 预热模型(幂等) —— do { try await AIRuntime.shared.prepare() } catch { throw ServiceError.modelNotReady } // —— Phase 1: 抽意图 —— continuation.yield(.phaseChanged(.extractingIntent)) let intent = await Self.extractIntent(userPrompt: prompt) try Task.checkCancellation() // —— Phase 2: 检索 —— continuation.yield(.phaseChanged(.retrieving)) let snapshot = Self.retrieve(intent: intent, ctx: modelContext) try Task.checkCancellation() // —— Phase 3: 生成 —— continuation.yield(.phaseChanged(.generating)) let dataJSON = Self.serializeData(snapshot: snapshot) var generated = "" var lastRate: Double = 0 if Self.isEffectivelyEmpty(snapshot) { // 没有任何真实记录:跳过 LLM,直接产出确定性「无记录」摘要, // 从根上杜绝小模型在空数据上编造病例(用户红线:严格按历史信息)。 generated = Self.fallbackReport(label: intent.labelCN, userPrompt: prompt) continuation.yield(.token(TokenChunk(text: generated, decodeRate: 0))) } else { let genPrompt = HealthExportPrompts.reportGeneration( userPrompt: prompt, intentLabelCN: intent.labelCN, dataJSON: dataJSON ) // —— 流式去 ... 兜底 —— // Prompt 里已加 Qwen3 的 `/no_think`,但模型偶尔仍带 thinking。 // 用「全文累计 + 每 chunk 重清 + diff yield」: // - thinking 阶段,UI 看到的 generated 始终为空 // - 看到 后,真实内容流式出现 var rawAccum = "" let stream = await AIRuntime.shared.generate( prompt: genPrompt, maxTokens: 1024 ) for try await chunk in stream { try Task.checkCancellation() if chunk.decodeRate > 0 { lastRate = chunk.decodeRate } rawAccum += chunk.text let clean = Self.stripThinkBlocks(rawAccum) if clean.count > generated.count, clean.hasPrefix(generated) { let delta = String(clean.dropFirst(generated.count)) generated = clean continuation.yield(.token(TokenChunk( text: delta, decodeRate: chunk.decodeRate ))) } else if clean != generated { // 极少:清理后比上次还短(模型补了开标签)。让 UI 不要回退, // 直接对齐 generated = clean 但不 yield(避免显示倒退)。 generated = clean } } } guard !generated.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { throw ServiceError.generationFailed("模型未输出任何内容") } // —— Phase 4: 持久化 —— let export = HealthExport( prompt: prompt, content: generated, referencedIndicatorIDs: snapshot.indicators.map { Self.idString($0.persistentModelID) }, referencedReportIDs: snapshot.reports.map { Self.idString($0.persistentModelID) }, referencedSymptomIDs: snapshot.symptoms.map { Self.idString($0.persistentModelID) }, referencedDiaryIDs: snapshot.diaries.map { Self.idString($0.persistentModelID) }, inferredTimeFromDate: snapshot.fromDate, inferredTimeToDate: snapshot.toDate, inferredIntent: intent.intent, inferredLabelCN: intent.labelCN, modelTag: ModelKind.llm.rawValue, // 取实际加载的 LLM tag,而非写死默认值(本地推理凭证 §12#6) decodeRate: lastRate ) modelContext.insert(export) do { try modelContext.save() } catch { // 保存失败不阻塞 UI 显示文本;仅记日志(W6 可接 telemetry) print("[HealthExportService] save failed: \(error)") } continuation.yield(.phaseChanged(.completed)) continuation.yield(.completed(persistentID: export.persistentModelID)) continuation.finish() } catch is CancellationError { continuation.finish(throwing: ServiceError.cancelled) } catch let e as ServiceError { continuation.finish(throwing: e) } catch { continuation.finish(throwing: ServiceError.generationFailed("\(error)")) } } continuation.onTermination = { _ in task.cancel() } } } // MARK: - Phase 1: intent extraction struct Intent: Sendable { var timeRangeDays: Int var keywords: [String] var symptomKeywords: [String] var intent: String var labelCN: String /// 兜底:抽不出 → 30 天 + 空关键词。 static let fallback = Intent( timeRangeDays: 30, keywords: [], symptomKeywords: [], intent: "general_review", labelCN: "近期健康摘要" ) } /// 调一次 LLM 拿 JSON,失败用 `Intent.fallback`。 /// 不流式 —— 直接拼成完整字符串再解析。 private static func extractIntent(userPrompt: String) async -> Intent { let prompt = HealthExportPrompts.intentExtraction(userPrompt: userPrompt) var collected = "" do { let stream = await AIRuntime.shared.generate(prompt: prompt, maxTokens: 200) for try await chunk in stream { collected += chunk.text } } catch { return .fallback } return parseIntent(collected) ?? .fallback } /// 解析 JSON。容错:抠出第一段 `{…}`,缺字段填默认值。 /// 公开 (internal) 给单测调用。 static func parseIntent(_ raw: String) -> Intent? { let jsonString = CaptureService.extractJSONObject(from: raw) guard let data = jsonString.data(using: .utf8), let obj = try? JSONSerialization.jsonObject(with: data, options: [.fragmentsAllowed]), let dict = obj as? [String: Any] else { return nil } let days = clampDays(dict["time_range_days"]) let keywords = stringArray(dict["keywords"]) let symptomKeywords = stringArray(dict["symptom_keywords"]) let intent = (dict["intent"] as? String)?.trimmingCharacters(in: .whitespaces) ?? "general_review" let labelCN = (dict["intent_label_cn"] as? String)?.trimmingCharacters(in: .whitespaces) ?? "近期健康摘要" return Intent( timeRangeDays: days, keywords: keywords, symptomKeywords: symptomKeywords, intent: intent.isEmpty ? "general_review" : intent, labelCN: labelCN.isEmpty ? "近期健康摘要" : labelCN ) } private static func clampDays(_ raw: Any?) -> Int { if let n = raw as? Int { return max(1, min(365, n)) } if let n = raw as? Double { return max(1, min(365, Int(n))) } if let s = raw as? String, let n = Int(s) { return max(1, min(365, n)) } return 30 } private static func stringArray(_ raw: Any?) -> [String] { guard let arr = raw as? [Any] else { return [] } return arr.compactMap { ($0 as? String)?.trimmingCharacters(in: .whitespaces) } .filter { !$0.isEmpty } } // MARK: - Phase 2: retrieve struct Snapshot { var fromDate: Date var toDate: Date var indicators: [Indicator] var symptoms: [Symptom] var reports: [Report] var diaries: [DiaryEntry] var profile: UserProfile } /// 同步 SwiftData 查询。@MainActor。 private static func retrieve(intent: Intent, ctx: ModelContext) -> Snapshot { let toDate = Date() let fromDate = Calendar.current.date( byAdding: .day, value: -intent.timeRangeDays, to: toDate ) ?? toDate.addingTimeInterval(-30 * 86400) // —— Indicators(时间窗 + 关键词软过滤) —— let indDesc = FetchDescriptor( predicate: #Predicate { $0.capturedAt >= fromDate && $0.capturedAt <= toDate }, sortBy: [SortDescriptor(\.capturedAt, order: .reverse)] ) var indicators = (try? ctx.fetch(indDesc)) ?? [] if !intent.keywords.isEmpty { let filtered = indicators.filter { ind in intent.keywords.contains { kw in ind.name.localizedCaseInsensitiveContains(kw) } } // 关键词命中为主,但保留所有异常项(避免漏掉医生关心的) let abnormal = indicators.filter { $0.status != .normal } let combined = (filtered + abnormal).reduce(into: [Indicator]()) { acc, x in if !acc.contains(where: { $0.persistentModelID == x.persistentModelID }) { acc.append(x) } } indicators = combined.isEmpty ? indicators : combined } indicators = Array(indicators.prefix(20)) // —— Symptoms(时间窗有交叠) —— let symptomDesc = FetchDescriptor( sortBy: [SortDescriptor(\.startedAt, order: .reverse)] ) let allSymptoms = (try? ctx.fetch(symptomDesc)) ?? [] let symptoms = Array( allSymptoms.filter { sym in let overlapsStart = sym.startedAt <= toDate let overlapsEnd = (sym.endedAt ?? Date.distantFuture) >= fromDate return overlapsStart && overlapsEnd }.prefix(10) ) // —— Reports(时间窗) —— let reportDesc = FetchDescriptor( predicate: #Predicate { $0.reportDate >= fromDate && $0.reportDate <= toDate }, sortBy: [SortDescriptor(\.reportDate, order: .reverse)] ) let reports = Array(((try? ctx.fetch(reportDesc)) ?? []).prefix(8)) // —— Diary —— // 有具体症状词 → 按词过滤(targeted,保留隐私); // 无症状词(泛化请求,如「最近身体异常」)→ 纳入时间窗内最近 5 条日记。 // 之前「无词即清空」会让真实记录完全不进 prompt → 数据为空 → 小模型编造,是本次 bug 主因之一。 let diaryDesc = FetchDescriptor( predicate: #Predicate { $0.createdAt >= fromDate && $0.createdAt <= toDate }, sortBy: [SortDescriptor(\.createdAt, order: .reverse)] ) let allDiaries = (try? ctx.fetch(diaryDesc)) ?? [] let diaries: [DiaryEntry] if intent.symptomKeywords.isEmpty { diaries = Array(allDiaries.prefix(5)) } else { diaries = Array( allDiaries.filter { d in intent.symptomKeywords.contains { kw in d.content.localizedCaseInsensitiveContains(kw) } }.prefix(5) ) } // —— Profile(单例) —— let profile = UserProfileStore.loadOrCreate(in: ctx) return Snapshot( fromDate: fromDate, toDate: toDate, indicators: indicators, symptoms: symptoms, reports: reports, diaries: diaries, profile: profile ) } // MARK: - Phase 3: serialize data for prompt /// 把 Snapshot 序列化成给 LLM 的精简 JSON。 /// 不用 Codable —— 字段命名要保持 prompt 里描述的英文 key,顺序也要稳定。 static func serializeData(snapshot: Snapshot) -> String { let df = DateFormatter() df.locale = Locale(identifier: "en_US_POSIX") df.dateFormat = "yyyy-MM-dd" let profile = snapshot.profile var root: [String: Any] = [:] // profile var profDict: [String: Any] = [:] if let age = profile.age { profDict["age"] = age } let sexLabel = profile.sex.label if profile.sex != .undisclosed { profDict["sex"] = sexLabel } if let h = profile.heightCM { profDict["height_cm"] = h } if let w = profile.weightKG { profDict["weight_kg"] = w.truncatingRemainder(dividingBy: 1) == 0 ? Int(w) : Double(round(w * 10) / 10) } if !profile.bloodTypeRaw.isEmpty { profDict["blood_type"] = profile.bloodTypeRaw } if !profile.allergies.isEmpty { profDict["allergies"] = profile.allergies } if !profile.chronicConditions.isEmpty { profDict["chronic"] = profile.chronicConditions } if !profile.familyHistory.isEmpty { profDict["family_history"] = profile.familyHistory } if !profile.currentMedications.isEmpty { profDict["current_meds"] = profile.currentMedications } root["profile"] = profDict // symptoms root["symptoms"] = snapshot.symptoms.map { s -> [String: Any] in var d: [String: Any] = [ "name": s.name, "started": df.string(from: s.startedAt), "severity": s.severity, "ongoing": s.isOngoing ] if let ended = s.endedAt { d["ended"] = df.string(from: ended) } if let note = s.note, !note.isEmpty { d["note"] = note } return d } // indicators root["indicators"] = snapshot.indicators.map { i -> [String: Any] in [ "name": i.name, "value": i.value, "unit": i.unit, "range": i.range, "status": i.status.rawValue, "date": df.string(from: i.capturedAt) ] } // reports root["reports"] = snapshot.reports.map { r -> [String: Any] in var d: [String: Any] = [ "title": r.title, "type": r.type.label, "date": df.string(from: r.reportDate) ] if let inst = r.institution, !inst.isEmpty { d["institution"] = inst } if let sum = r.summary, !sum.isEmpty { d["summary"] = sum } return d } // diaries root["diaries"] = snapshot.diaries.map { d -> [String: Any] in let excerpt = String(d.content.prefix(80)) return [ "date": df.string(from: d.createdAt), "excerpt": excerpt ] } // 时间窗也给 LLM 看 root["time_window"] = [ "from": df.string(from: snapshot.fromDate), "to": df.string(from: snapshot.toDate) ] guard let data = try? JSONSerialization.data( withJSONObject: root, options: [.prettyPrinted, .sortedKeys] ), let str = String(data: data, encoding: .utf8) else { return "{}" } return str } // MARK: - 空数据兜底(杜绝编造) /// 检索结果是否「实质为空」:无症状/指标/报告/日记,且 profile 也没有任何可写字段。 /// 为真时跳过 LLM,改用确定性「无记录」摘要,避免小模型凭先验编造病例。 static func isEffectivelyEmpty(_ s: Snapshot) -> Bool { guard s.symptoms.isEmpty, s.indicators.isEmpty, s.reports.isEmpty, s.diaries.isEmpty else { return false } let p = s.profile return p.age == nil && p.sex == .undisclosed && p.heightCM == nil && p.weightKG == nil && p.bloodTypeRaw.isEmpty && p.allergies.isEmpty && p.chronicConditions.isEmpty && p.familyHistory.isEmpty && p.currentMedications.isEmpty } /// 无真实记录时的确定性摘要:6 段全「无记录」,主诉仅照搬患者原话,不做任何推断。 static func fallbackReport(label: String, userPrompt: String) -> String { let title = label.isEmpty ? "# 就诊摘要" : "# 就诊摘要 — \(label)" let complaint = userPrompt.trimmingCharacters(in: .whitespacesAndNewlines) let complaintLine = complaint.isEmpty ? "无记录" : complaint return """ \(title) > 本次未检索到可用的健康记录(指标 / 症状 / 报告 / 日记均为空),以下仅据患者原话,未做任何推断。 ## 主诉 \(complaintLine) ## 患者背景 无记录 ## 近期症状(按时间倒序) 无记录 ## 关键指标(异常项优先) 无记录 ## 在服药与过敏 无记录 ## 患者疑问 无记录 """ } // MARK: - Helpers /// 把 SwiftData persistentModelID 编成稳定字符串。 /// W3 引用回链跳源记录时,用这个字符串反查(暂未实现)。 private static func idString(_ id: PersistentIdentifier) -> String { String(describing: id) } // MARK: - 标签清理 /// 在全文累计上做一次性清理,返回应展示给用户的干净文本。 /// 用「累计 + 重清 + diff yield」方式调用,确保: /// - 配对 `...` 整段移除(包括空 think 块) /// - 未闭合 `...`(还没等到闭标签)→ 全部暂存,等闭标签出现再放 /// - Qwen3 偶尔只吐 `` 闭标签 → 它之前的内容也当 thinking 丢弃 /// - 头部空白 trim,避免 `## 标题` 前面有多余空行 static func stripThinkBlocks(_ raw: String) -> String { var s = raw // 1. 反复删配对 ...(包括 think 块体为空的情况) while let openR = s.range(of: ""), let closeR = s.range(of: "", range: openR.upperBound..") { s = String(s[..") { s = String(s[closeR.upperBound...]) } // 4. 顶部空白 trim while let first = s.first, first.isWhitespace { s.removeFirst() } return s } }