feat(AI): MNN 文本模型升到 Qwen3.5-4B(taobao-mnn 预转换)

现场机 iPhone 17(A19/SME2)内存与加速均可承载 4B,质量优于 2B。

- ModelKind.mnnLLM rawValue → "Qwen3.5-4B-MNN",displayName → Qwen3.5-4B (MNN/SME2)
- ModelManifest:7 个运行时文件(llm.mnn.weight ~2.45GB + 拆分的
  visual.mnn.weight 188MB),总计 2,836,770,850 bytes(~2.64GiB)
- ModelManifestTests:文件数 7 / 总字节 / URL 更新到 Qwen3.5-4B-MNN
- CLAUDE.md §2:MNN 主模型记为 Qwen3.5-4B,MLX 兜底仍 2B

模拟器 ModelManifestTests TEST SUCCEEDED。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
link2026
2026-06-08 20:28:14 +08:00
parent 39b1521f00
commit cbacd9461a
4 changed files with 15 additions and 14 deletions

View File

@@ -24,7 +24,7 @@
| 图表 | Swift Charts | iOS 16+ 原生 |
| **AI 运行时(主)** | **MNN (alibaba) + Arm SME2 + CPU** | 挑战赛考核点:Qwen + MNN + SME2 端侧 CPU 推理。device-only(xcframework 见 `scripts/build-mnn-xcframework.sh`),A19/iPhone17 启用 SME2、A17 回退 NEON。经 `MNNLLMBridge`(ObjC++)→ `MNNBackend` |
| **AI 运行时(兜底)** | **MLX Swift (Apple 官方,Metal GPU)** | 双后端:`InferenceEngine` 切换,模拟器/兜底用 MLX。不要建议 Core ML / llama.cpp / Ollama |
| LLM | Qwen3.5-2B 4bit(MNN 格式 + MLX `mlx-community/Qwen3.5-2B-4bit`) | 文本生成、关键词抽取、趋势解读 |
| LLM | MNN 主:Qwen3.5-4B(`taobao-mnn/Qwen3.5-4B-MNN`,~2.64GiB);MLX 兜底:Qwen3.5-2B-4bit | 文本生成、关键词抽取、趋势解读 |
| VL | Qwen3-VL-4B-Instruct 4bit (MLX `mlx-community/Qwen3-VL-4B-Instruct-4bit`) | 拍照→结构化指标。MNN VL 需 OMNI 构建,暂走 MLX |
| 文档扫描 | VisionKit `VNDocumentCameraView` | 不要自己写透视校正 |
| Face ID | LocalAuthentication | |

View File

@@ -60,17 +60,18 @@ nonisolated enum ModelManifest {
ModelFile(path: "video_preprocessor_config.json", bytes: 817),
]
case .mnnLLM:
// taobao-mnn/Qwen3.5-2B-MNN MNN (HF API ,2026-06)
// taobao-mnn/Qwen3.5-4B-MNN MNN (HF API ,2026-06)
// :config.json(MNN llm )+ llm_config.json()+ llm.mnn()
// + llm.mnn.weight( ~1.1GB)+ tokenizer.txt + visual.mnn(, mllm)
// README/.gitattributes dump(llm.mnn.json / export_args.json)
// + llm.mnn.weight( ~2.45GB)+ tokenizer.txt + visual.mnn/visual.mnn.weight(,
// mllm,) README/.gitattributes dump
return [
ModelFile(path: "config.json", bytes: 652),
ModelFile(path: "llm_config.json", bytes: 8_692),
ModelFile(path: "llm.mnn", bytes: 2_148_136),
ModelFile(path: "llm.mnn.weight", bytes: 1_176_647_702),
ModelFile(path: "llm_config.json", bytes: 8_693),
ModelFile(path: "llm.mnn", bytes: 3_651_096),
ModelFile(path: "llm.mnn.weight", bytes: 2_629_387_626),
ModelFile(path: "tokenizer.txt", bytes: 6_465_727),
ModelFile(path: "visual.mnn", bytes: 488_096),
ModelFile(path: "visual.mnn.weight", bytes: 196_768_960),
]
}
}

View File

@@ -4,16 +4,16 @@ nonisolated enum ModelKind: String, CaseIterable {
/// Models/ / CDN
/// - llm:MLX(GPU),Qwen3.5-2B(, qwen3_5 )
/// - vl :MLX(GPU),Qwen3-VL-4B
/// - mnnLLM:MNN(CPU/SME2,),Qwen3.5-2B MNN (taobao-mnn)
/// - mnnLLM:MNN(CPU/SME2,),Qwen3.5-4B MNN (taobao-mnn)
case llm = "Qwen3.5-2B-4bit"
case vl = "Qwen3-VL-4B-Instruct-4bit"
case mnnLLM = "Qwen3.5-2B-MNN"
case mnnLLM = "Qwen3.5-4B-MNN"
var displayName: String {
switch self {
case .llm: return "Qwen3.5-2B (MLX)"
case .vl: return "Qwen3-VL-4B"
case .mnnLLM: return "Qwen3.5-2B (MNN/SME2)"
case .mnnLLM: return "Qwen3.5-4B (MNN/SME2)"
}
}

View File

@@ -20,12 +20,12 @@ struct ModelManifestTests {
#expect(ModelManifest.totalBytes(for: .vl) == 3_109_729_929)
}
@Test func mnnHasSixFunctionalFiles() {
#expect(ModelManifest.files(for: .mnnLLM).count == 6)
@Test func mnnHasSevenFunctionalFiles() {
#expect(ModelManifest.files(for: .mnnLLM).count == 7)
}
@Test func mnnTotalBytesMatchesManifest() {
#expect(ModelManifest.totalBytes(for: .mnnLLM) == 1_185_759_005)
#expect(ModelManifest.totalBytes(for: .mnnLLM) == 2_836_770_850)
}
@Test func mnnHasEssentialRuntimeFiles() {
@@ -39,7 +39,7 @@ struct ModelManifestTests {
@Test func mnnFileURLUsesRepoPath() {
let file = ModelFile(path: "config.json", bytes: 652)
let url = ModelManifest.fileURL(for: .mnnLLM, file: file)
#expect(url.absoluteString == "https://file.myv0.com/Qwen3.5-2B-MNN/config.json")
#expect(url.absoluteString == "https://file.myv0.com/Qwen3.5-4B-MNN/config.json")
}
@Test func excludesReadmeAndGitattributes() {