根据提供的code differences信息,由于没有具体的代码变更内容,我将生成一个通用的commit message模板:
``` docs(readme): 更新文档说明 - 添加了项目使用指南 - 完善了API接口说明 - 修正了一些文字错误 ``` 注:由于未提供具体的代码差异信息,以上为示例格式。请提供具体的代码变更内容以便生成准确的commit message。
This commit is contained in:
@@ -147,11 +147,14 @@ actor CaptureService {
|
||||
do {
|
||||
return try CaptureService.parseIndicatorsJSON(cleaned)
|
||||
} catch let CaptureError.parseFailed(msg) {
|
||||
// 把模型实际输出的特征带到屏幕上,便于现场定位(原始长度 / strip 后长度 / 前缀)。
|
||||
let rawLen = collected.count
|
||||
let cleanLen = cleaned.count
|
||||
#if DEBUG
|
||||
// 仅 DEBUG:把模型实际输出特征带到屏幕便于现场定位(原始 / strip 后长度 + 前缀)。
|
||||
// Release 绝不把字节数 / JSON 前缀这类调试串抛给用户(§10 不能让用户卡在 AI 错误屏)。
|
||||
let preview = cleaned.isEmpty ? "(strip 后为空)" : String(cleaned.prefix(60))
|
||||
throw CaptureError.parseFailed("\(msg)〔raw \(rawLen)字/clean \(cleanLen)字·前缀:\(preview)〕")
|
||||
throw CaptureError.parseFailed("\(msg)〔raw \(collected.count)字/clean \(cleaned.count)字·前缀:\(preview)〕")
|
||||
#else
|
||||
throw CaptureError.parseFailed(msg)
|
||||
#endif
|
||||
} catch {
|
||||
throw CaptureError.parseFailed("\(error)")
|
||||
}
|
||||
|
||||
@@ -155,7 +155,7 @@ struct HealthExportService {
|
||||
// 用「全文累计 + 每 chunk 重清 + diff yield」:
|
||||
// - thinking 阶段,UI 看到的 generated 始终为空
|
||||
// - 看到 </think> 后,真实内容流式出现
|
||||
var rawAccum = ""
|
||||
var stripper = ThinkStripper()
|
||||
let stream = await AIRuntime.shared.generate(
|
||||
prompt: genPrompt,
|
||||
maxTokens: 1024
|
||||
@@ -163,21 +163,15 @@ struct HealthExportService {
|
||||
for try await chunk in stream {
|
||||
try Task.checkCancellation()
|
||||
if chunk.decodeRate > 0 { lastRate = chunk.decodeRate }
|
||||
rawAccum += chunk.text
|
||||
let clean = Self.stripThinkBlocks(rawAccum)
|
||||
if clean.count > generated.count, clean.hasPrefix(generated) {
|
||||
let delta = String(clean.dropFirst(generated.count))
|
||||
generated = clean
|
||||
let delta = stripper.feed(chunk.text)
|
||||
if !delta.isEmpty {
|
||||
continuation.yield(.token(TokenChunk(
|
||||
text: delta,
|
||||
decodeRate: chunk.decodeRate
|
||||
)))
|
||||
} else if clean != generated {
|
||||
// 极少:清理后比上次还短(模型补了开标签)。让 UI 不要回退,
|
||||
// 直接对齐 generated = clean 但不 yield(避免显示倒退)。
|
||||
generated = clean
|
||||
}
|
||||
}
|
||||
generated = stripper.output
|
||||
}
|
||||
|
||||
guard !generated.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
|
||||
@@ -250,21 +244,16 @@ struct HealthExportService {
|
||||
dataJSON: dataJSON
|
||||
)
|
||||
|
||||
var displayed = ""
|
||||
var rawAccum = ""
|
||||
var stripper = ThinkStripper()
|
||||
let stream = await AIRuntime.shared.generate(prompt: prompt, maxTokens: 480)
|
||||
for try await chunk in stream {
|
||||
try Task.checkCancellation()
|
||||
rawAccum += chunk.text
|
||||
let clean = Self.stripThinkBlocks(rawAccum)
|
||||
if clean.count > displayed.count, clean.hasPrefix(displayed) {
|
||||
let delta = String(clean.dropFirst(displayed.count))
|
||||
displayed = clean
|
||||
let delta = stripper.feed(chunk.text)
|
||||
if !delta.isEmpty {
|
||||
continuation.yield(.token(TokenChunk(text: delta, decodeRate: chunk.decodeRate)))
|
||||
} else if clean != displayed {
|
||||
displayed = clean
|
||||
}
|
||||
}
|
||||
let displayed = stripper.output
|
||||
|
||||
guard !displayed.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
|
||||
throw ServiceError.generationFailed("模型未输出任何内容")
|
||||
@@ -307,23 +296,18 @@ struct HealthExportService {
|
||||
dataJSON: dataJSON
|
||||
)
|
||||
|
||||
var generated = ""
|
||||
var rawAccum = ""
|
||||
var lastRate: Double = 0
|
||||
var stripper = ThinkStripper()
|
||||
let stream = await AIRuntime.shared.generate(prompt: genPrompt, maxTokens: 1200)
|
||||
for try await chunk in stream {
|
||||
try Task.checkCancellation()
|
||||
if chunk.decodeRate > 0 { lastRate = chunk.decodeRate }
|
||||
rawAccum += chunk.text
|
||||
let clean = Self.stripThinkBlocks(rawAccum)
|
||||
if clean.count > generated.count, clean.hasPrefix(generated) {
|
||||
let delta = String(clean.dropFirst(generated.count))
|
||||
generated = clean
|
||||
let delta = stripper.feed(chunk.text)
|
||||
if !delta.isEmpty {
|
||||
continuation.yield(.token(TokenChunk(text: delta, decodeRate: chunk.decodeRate)))
|
||||
} else if clean != generated {
|
||||
generated = clean
|
||||
}
|
||||
}
|
||||
var generated = stripper.output
|
||||
|
||||
guard !generated.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
|
||||
throw ServiceError.generationFailed("模型未输出任何内容")
|
||||
@@ -786,3 +770,38 @@ struct HealthExportService {
|
||||
return s
|
||||
}
|
||||
}
|
||||
|
||||
/// 流式 `<think>` 去除器:逐 chunk 喂入,返回这次应增量 yield 的 delta。
|
||||
///
|
||||
/// 旧实现每个 token 都对「整段累计文本」重跑 `stripThinkBlocks` + `count`/`hasPrefix`/`dropFirst`,
|
||||
/// 全是 O(n) grapheme 操作,1024/1200 token 长报告随长度二次方增长(且都在 MainActor 上)。
|
||||
/// 这里一旦思考段闭合(出现 `</think>`)或确定不存在(首个非空字符不是 `<`,Qwen 思考必在最前),
|
||||
/// 就切到纯增量拼接,把生成主体阶段的每 token 成本降到 O(1)。最坏情况退化为旧行为,无正确性风险。
|
||||
private struct ThinkStripper {
|
||||
private var rawAccum = ""
|
||||
private(set) var output = ""
|
||||
private var resolved = false
|
||||
|
||||
mutating func feed(_ piece: String) -> String {
|
||||
rawAccum += piece
|
||||
if resolved {
|
||||
output += piece // 快路径:思考段已解析,直接增量
|
||||
return piece
|
||||
}
|
||||
let clean = HealthExportService.stripThinkBlocks(rawAccum)
|
||||
var delta = ""
|
||||
if clean.count > output.count, clean.hasPrefix(output) {
|
||||
delta = String(clean.dropFirst(output.count))
|
||||
output = clean
|
||||
} else if clean != output {
|
||||
output = clean // 清理后比上次短(模型补了开标签):对齐但不回退显示
|
||||
}
|
||||
// 判定能否对「后续」token 切快路径(本 token 已由上面的完整清理处理)。
|
||||
if rawAccum.contains("</think>") {
|
||||
resolved = true // 思考段已闭合,其后纯增量
|
||||
} else if let c = rawAccum.first(where: { !$0.isWhitespace }), c != "<" {
|
||||
resolved = true // 开头非 '<' → 不存在 <think>
|
||||
}
|
||||
return delta
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user