```

feat(iOS): 更新MNN后端模型配置优化性能将MNN主模型从Qwen3.5-4B(~2.64GiB)降级为Qwen3.5-2B(~1.1GiB)，因为4B版本实测运行过慢，影响用户体验。iPhone17+/SME2设备使用2B模型，保留MLX 兜底方案用于模拟器和备用场景，确保AI推理性能和存储效率的平衡。 ```
2026-06-09 22:20:07 +08:00
parent ca5a3fa38b
commit b79ae54b7b
40 changed files with 1327 additions and 452 deletions
--- a/康康/AI/ModelManifest.swift
+++ b/康康/AI/ModelManifest.swift
@@ -18,18 +18,20 @@ nonisolated enum ModelManifest {
    static func files(for kind: ModelKind) -> [ModelFile] {
        switch kind {
        case .llm:
-            // Qwen3.5-4B-4bit:多模态仓库,MLX 兜底用它同时做文本(LLMModelFactory qwen3_5 文本路径)
-            // 与视觉(VLMModelFactory qwen3_5)。字节数取自 mlx-community/Qwen3.5-4B-4bit
-            // 仓库实际 blob 大小(HF API,2026-06 核对)。镜像全部运行文件(含视觉预处理配置),
-            // 排除 README.md / .gitattributes。
+            // Qwen3.5-2B-4bit:多模态仓库,但走 LLMModelFactory 的 qwen3_5 文本路径加载。
+            // 字节数取自 mlx-community/Qwen3.5-2B-4bit 仓库实际 blob 大小(HF API,2026-06 核对)。
+            // 该仓库 tokenizer 体系为 vocab.json + tokenizer.json(无 merges.txt /
+            // special_tokens_map.json / added_tokens.json),chat_template 改为 .jinja。
+            // 一并镜像视觉预处理配置(preprocessor / processor / video_preprocessor),
+            // 文本加载用不到但体积可忽略,保持与仓库一致避免漏文件。
            return [
-                ModelFile(path: "config.json", bytes: 3_366),
-                ModelFile(path: "model.safetensors", bytes: 3_034_300_695),
-                ModelFile(path: "model.safetensors.index.json", bytes: 101_944),
+                ModelFile(path: "config.json", bytes: 3_113),
+                ModelFile(path: "model.safetensors", bytes: 1_722_271_785),
+                ModelFile(path: "model.safetensors.index.json", bytes: 81_722),
                ModelFile(path: "tokenizer.json", bytes: 19_989_343),
                ModelFile(path: "tokenizer_config.json", bytes: 1_139),
                ModelFile(path: "vocab.json", bytes: 6_722_759),
-                ModelFile(path: "chat_template.jinja", bytes: 7_756),
+                ModelFile(path: "chat_template.jinja", bytes: 7_755),
                ModelFile(path: "preprocessor_config.json", bytes: 390),
                ModelFile(path: "processor_config.json", bytes: 1_300),
                ModelFile(path: "video_preprocessor_config.json", bytes: 385),
@@ -58,18 +60,17 @@ nonisolated enum ModelManifest {
                ModelFile(path: "video_preprocessor_config.json", bytes: 817),
            ]
        case .mnnLLM:
-            // taobao-mnn/Qwen3.5-4B-MNN 预转换 MNN 格式(HF API 实测,2026-06)。
+            // taobao-mnn/Qwen3.5-2B-MNN 预转换 MNN 格式(HF API 实测,2026-06)。
            // 运行时必需:config.json(MNN llm 配置)+ llm_config.json(超参)+ llm.mnn(图)
-            // + llm.mnn.weight(量化权重 ~2.45GB)+ tokenizer.txt + visual.mnn/visual.mnn.weight(多模态,
-            // 文本路径不用但配置含 mllm,带上避免缺文件)。排除 README/.gitattributes 与可读 dump。
+            // + llm.mnn.weight(量化权重 ~1.1GB)+ tokenizer.txt + visual.mnn(多模态,文本路径不用但配置含 mllm)。
+            // 排除 README/.gitattributes 与可读 dump(llm.mnn.json / export_args.json)。
            return [
                ModelFile(path: "config.json", bytes: 652),
-                ModelFile(path: "llm_config.json", bytes: 8_693),
-                ModelFile(path: "llm.mnn", bytes: 3_651_096),
-                ModelFile(path: "llm.mnn.weight", bytes: 2_629_387_626),
+                ModelFile(path: "llm_config.json", bytes: 8_692),
+                ModelFile(path: "llm.mnn", bytes: 2_148_136),
+                ModelFile(path: "llm.mnn.weight", bytes: 1_176_647_702),
                ModelFile(path: "tokenizer.txt", bytes: 6_465_727),
                ModelFile(path: "visual.mnn", bytes: 488_096),
-                ModelFile(path: "visual.mnn.weight", bytes: 196_768_960),
            ]
        }
    }