```
docs(health-profile): 添加防编造加固修订记录到导出健康档案设计文档 补充了关于导出摘要出现虚构病例问题的详细分析和修复方案, 包括检索策略优化、空数据兜底处理和prompt重写等三层防护措施。 ```
This commit is contained in:
@@ -85,40 +85,49 @@ struct HealthExportService {
|
||||
// —— Phase 3: 生成 ——
|
||||
continuation.yield(.phaseChanged(.generating))
|
||||
let dataJSON = Self.serializeData(snapshot: snapshot)
|
||||
let genPrompt = HealthExportPrompts.reportGeneration(
|
||||
userPrompt: prompt,
|
||||
intentLabelCN: intent.labelCN,
|
||||
dataJSON: dataJSON
|
||||
)
|
||||
|
||||
// —— 流式去 <think>...</think> 兜底 ——
|
||||
// Prompt 里已加 Qwen3 的 `/no_think`,但模型偶尔仍带 thinking。
|
||||
// 用「全文累计 + 每 chunk 重清 + diff yield」:
|
||||
// - thinking 阶段,UI 看到的 generated 始终为空
|
||||
// - 看到 </think> 后,真实内容流式出现
|
||||
var rawAccum = ""
|
||||
var generated = ""
|
||||
var lastRate: Double = 0
|
||||
let stream = await AIRuntime.shared.generate(
|
||||
prompt: genPrompt,
|
||||
maxTokens: 1024
|
||||
)
|
||||
for try await chunk in stream {
|
||||
try Task.checkCancellation()
|
||||
if chunk.decodeRate > 0 { lastRate = chunk.decodeRate }
|
||||
rawAccum += chunk.text
|
||||
let clean = Self.stripThinkBlocks(rawAccum)
|
||||
if clean.count > generated.count, clean.hasPrefix(generated) {
|
||||
let delta = String(clean.dropFirst(generated.count))
|
||||
generated = clean
|
||||
continuation.yield(.token(TokenChunk(
|
||||
text: delta,
|
||||
decodeRate: chunk.decodeRate
|
||||
)))
|
||||
} else if clean != generated {
|
||||
// 极少:清理后比上次还短(模型补了开标签)。让 UI 不要回退,
|
||||
// 直接对齐 generated = clean 但不 yield(避免显示倒退)。
|
||||
generated = clean
|
||||
|
||||
if Self.isEffectivelyEmpty(snapshot) {
|
||||
// 没有任何真实记录:跳过 LLM,直接产出确定性「无记录」摘要,
|
||||
// 从根上杜绝小模型在空数据上编造病例(用户红线:严格按历史信息)。
|
||||
generated = Self.fallbackReport(label: intent.labelCN, userPrompt: prompt)
|
||||
continuation.yield(.token(TokenChunk(text: generated, decodeRate: 0)))
|
||||
} else {
|
||||
let genPrompt = HealthExportPrompts.reportGeneration(
|
||||
userPrompt: prompt,
|
||||
intentLabelCN: intent.labelCN,
|
||||
dataJSON: dataJSON
|
||||
)
|
||||
|
||||
// —— 流式去 <think>...</think> 兜底 ——
|
||||
// Prompt 里已加 Qwen3 的 `/no_think`,但模型偶尔仍带 thinking。
|
||||
// 用「全文累计 + 每 chunk 重清 + diff yield」:
|
||||
// - thinking 阶段,UI 看到的 generated 始终为空
|
||||
// - 看到 </think> 后,真实内容流式出现
|
||||
var rawAccum = ""
|
||||
let stream = await AIRuntime.shared.generate(
|
||||
prompt: genPrompt,
|
||||
maxTokens: 1024
|
||||
)
|
||||
for try await chunk in stream {
|
||||
try Task.checkCancellation()
|
||||
if chunk.decodeRate > 0 { lastRate = chunk.decodeRate }
|
||||
rawAccum += chunk.text
|
||||
let clean = Self.stripThinkBlocks(rawAccum)
|
||||
if clean.count > generated.count, clean.hasPrefix(generated) {
|
||||
let delta = String(clean.dropFirst(generated.count))
|
||||
generated = clean
|
||||
continuation.yield(.token(TokenChunk(
|
||||
text: delta,
|
||||
decodeRate: chunk.decodeRate
|
||||
)))
|
||||
} else if clean != generated {
|
||||
// 极少:清理后比上次还短(模型补了开标签)。让 UI 不要回退,
|
||||
// 直接对齐 generated = clean 但不 yield(避免显示倒退)。
|
||||
generated = clean
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -292,18 +301,21 @@ struct HealthExportService {
|
||||
)
|
||||
let reports = Array(((try? ctx.fetch(reportDesc)) ?? []).prefix(8))
|
||||
|
||||
// —— Diary(隐私过滤:必须有 symptom_keyword 命中,否则不入 prompt) ——
|
||||
// —— Diary ——
|
||||
// 有具体症状词 → 按词过滤(targeted,保留隐私);
|
||||
// 无症状词(泛化请求,如「最近身体异常」)→ 纳入时间窗内最近 5 条日记。
|
||||
// 之前「无词即清空」会让真实记录完全不进 prompt → 数据为空 → 小模型编造,是本次 bug 主因之一。
|
||||
let diaryDesc = FetchDescriptor<DiaryEntry>(
|
||||
predicate: #Predicate { $0.createdAt >= fromDate && $0.createdAt <= toDate },
|
||||
sortBy: [SortDescriptor(\.createdAt, order: .reverse)]
|
||||
)
|
||||
let allDiaries = (try? ctx.fetch(diaryDesc)) ?? []
|
||||
let diaries: [DiaryEntry]
|
||||
if intent.symptomKeywords.isEmpty {
|
||||
diaries = []
|
||||
diaries = Array(allDiaries.prefix(5))
|
||||
} else {
|
||||
let diaryDesc = FetchDescriptor<DiaryEntry>(
|
||||
predicate: #Predicate { $0.createdAt >= fromDate && $0.createdAt <= toDate },
|
||||
sortBy: [SortDescriptor(\.createdAt, order: .reverse)]
|
||||
)
|
||||
let all = (try? ctx.fetch(diaryDesc)) ?? []
|
||||
diaries = Array(
|
||||
all.filter { d in
|
||||
allDiaries.filter { d in
|
||||
intent.symptomKeywords.contains { kw in
|
||||
d.content.localizedCaseInsensitiveContains(kw)
|
||||
}
|
||||
@@ -416,6 +428,56 @@ struct HealthExportService {
|
||||
return str
|
||||
}
|
||||
|
||||
// MARK: - 空数据兜底(杜绝编造)
|
||||
|
||||
/// 检索结果是否「实质为空」:无症状/指标/报告/日记,且 profile 也没有任何可写字段。
|
||||
/// 为真时跳过 LLM,改用确定性「无记录」摘要,避免小模型凭先验编造病例。
|
||||
static func isEffectivelyEmpty(_ s: Snapshot) -> Bool {
|
||||
guard s.symptoms.isEmpty, s.indicators.isEmpty, s.reports.isEmpty, s.diaries.isEmpty else {
|
||||
return false
|
||||
}
|
||||
let p = s.profile
|
||||
return p.age == nil
|
||||
&& p.sex == .undisclosed
|
||||
&& p.heightCM == nil
|
||||
&& p.weightKG == nil
|
||||
&& p.bloodTypeRaw.isEmpty
|
||||
&& p.allergies.isEmpty
|
||||
&& p.chronicConditions.isEmpty
|
||||
&& p.familyHistory.isEmpty
|
||||
&& p.currentMedications.isEmpty
|
||||
}
|
||||
|
||||
/// 无真实记录时的确定性摘要:6 段全「无记录」,主诉仅照搬患者原话,不做任何推断。
|
||||
static func fallbackReport(label: String, userPrompt: String) -> String {
|
||||
let title = label.isEmpty ? "# 就诊摘要" : "# 就诊摘要 — \(label)"
|
||||
let complaint = userPrompt.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let complaintLine = complaint.isEmpty ? "无记录" : complaint
|
||||
return """
|
||||
\(title)
|
||||
|
||||
> 本次未检索到可用的健康记录(指标 / 症状 / 报告 / 日记均为空),以下仅据患者原话,未做任何推断。
|
||||
|
||||
## 主诉
|
||||
\(complaintLine)
|
||||
|
||||
## 患者背景
|
||||
无记录
|
||||
|
||||
## 近期症状(按时间倒序)
|
||||
无记录
|
||||
|
||||
## 关键指标(异常项优先)
|
||||
无记录
|
||||
|
||||
## 在服药与过敏
|
||||
无记录
|
||||
|
||||
## 患者疑问
|
||||
无记录
|
||||
"""
|
||||
}
|
||||
|
||||
// MARK: - Helpers
|
||||
|
||||
/// 把 SwiftData persistentModelID 编成稳定字符串。
|
||||
|
||||
Reference in New Issue
Block a user