karlexmarin Claude Opus 4.7 (1M context) commited on
Commit
f09cd1d
·
1 Parent(s): fbec820

v0.7.1: Chat-template Sniffer (anti-bullshit #2) + 9th mode

Browse files

Ships anti-bullshit pack #2: detect when an evaluation framework will silently halve accuracy because the chat template wasn't applied. lm-eval-harness issue #1841: running via vLLM-served auto-applies chat_template, but local hf/vllm mode does not — multi-turn evals drop ~50% with no warning.

NEW
- 📜 Chat-template mode: paste an HF model id (or raw tokenizer_config.json) → 1-second classification into a known family (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek / custom / none) + exact CLI flags for lm-eval / vLLM / transformers.
- js/chat_template_sniffer.js: pure logic module (codes + params, no human strings). Fetches /raw/main/tokenizer_config.json from HF Hub, parses chat_template field, matches distinctive markers per family.
- Verdicts: ok (known family) · custom (template present, unrecognized) · missing (no chat_template — base model) · base_model · unknown.
- Per-framework command output: lm_eval --apply_chat_template, vllm serve --chat-template <name>, tokenizer.apply_chat_template().

VIRTUAL SIMULATION
- 7 HF fixtures classify correctly: Llama-3, Qwen/ChatML, Mistral, Gemma, Phi-3, DeepSeek (full-width unicode), Alpaca.
- Edge cases: base model → missing; custom unknown format → custom.
- Live HF fetch tested against 3 real models (Mistral-7B-Instruct-v0.3, Qwen2.5-7B-Instruct, Phi-3-mini-4k-instruct) — all classify correctly.

i18n
- 33 new template.* keys × 4 langs (modes.template, mode_desc.template, all warnings/verdicts/labels/status messages).
- modes.tip updated 8 → 9 modes in 4 langs.
- 456 keys × 4 langs, 0 missing / 0 extra (parity verified).

42/42 smoke tests passed locally.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (5) hide show
  1. index.html +29 -0
  2. js/chat_template_sniffer.js +124 -0
  3. js/i18n.js +144 -4
  4. js/main.js +159 -1
  5. style.css +27 -0
index.html CHANGED
@@ -335,6 +335,7 @@
335
  <button class="mode-btn" data-mode="diagnose" role="tab" aria-selected="false" data-i18n="modes.diagnose">🩺 Diagnose CLI</button>
336
  <button class="mode-btn" data-mode="phase" role="tab" aria-selected="false" data-i18n="modes.phase">📊 Phase diagram</button>
337
  <button class="mode-btn" data-mode="unmask" role="tab" aria-selected="false" data-i18n="modes.unmask">🪟 Unmask</button>
 
338
  </div>
339
  <p id="mode-desc" class="recipe-desc" data-i18n="modes.desc">
340
  <strong>Quickest start</strong>: paste any HuggingFace model id (e.g. <code>meta-llama/Meta-Llama-3-8B</code>),
@@ -652,6 +653,34 @@
652
  <div id="unmask-output" style="margin-top: 1em;"></div>
653
  </section>
654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  <!-- Recipe selector (mode=recipe) -->
656
  <section id="recipe-section" style="display:none;">
657
  <h2 data-i18n="recipe.title">📋 Recipe</h2>
 
335
  <button class="mode-btn" data-mode="diagnose" role="tab" aria-selected="false" data-i18n="modes.diagnose">🩺 Diagnose CLI</button>
336
  <button class="mode-btn" data-mode="phase" role="tab" aria-selected="false" data-i18n="modes.phase">📊 Phase diagram</button>
337
  <button class="mode-btn" data-mode="unmask" role="tab" aria-selected="false" data-i18n="modes.unmask">🪟 Unmask</button>
338
+ <button class="mode-btn" data-mode="template" role="tab" aria-selected="false" data-i18n="modes.template">📜 Chat-template</button>
339
  </div>
340
  <p id="mode-desc" class="recipe-desc" data-i18n="modes.desc">
341
  <strong>Quickest start</strong>: paste any HuggingFace model id (e.g. <code>meta-llama/Meta-Llama-3-8B</code>),
 
653
  <div id="unmask-output" style="margin-top: 1em;"></div>
654
  </section>
655
 
656
+ <!-- Chat-template sniffer mode (v0.7.1 anti-bullshit pack #2) -->
657
+ <section id="template-section" style="display:none;">
658
+ <h2><span data-i18n="template.title">📜 Chat-template Sniffer</span>
659
+ <span class="info"><span class="tooltip" data-i18n="template.tip">
660
+ Paste an HF model id (or raw tokenizer_config.json). Detects the
661
+ chat-template family (Llama-3, ChatML, Mistral, Gemma, Phi-3,
662
+ Alpaca, DeepSeek, custom) and gives you the exact framework command
663
+ to use it correctly. lm-eval-harness silently halves accuracy if you
664
+ forget to apply it (issue #1841).
665
+ </span></span>
666
+ </h2>
667
+ <p class="recipe-desc" data-i18n="template.desc">
668
+ <strong>Did you forget <code>--apply_chat_template</code>?</strong> Most multi-turn evals fail by ~50% because the chat template wasn't applied. Paste a model id, get the exact CLI flag for your stack.
669
+ </p>
670
+ <div class="form-row">
671
+ <label for="template-id" data-i18n="template.id_label">HF model id:</label>
672
+ <input type="text" id="template-id" placeholder="e.g. mistralai/Mistral-7B-Instruct-v0.3" />
673
+ <button type="button" id="template-fetch-btn" data-i18n="template.fetch_btn">📜 Sniff</button>
674
+ </div>
675
+ <p id="template-status" class="recipe-desc" style="font-size:0.92em;"></p>
676
+ <details style="margin: 0.6em 0;">
677
+ <summary style="cursor:pointer; font-size:0.92em;" data-i18n="template.paste_summary">Or paste raw tokenizer_config.json (private models)</summary>
678
+ <textarea id="template-paste" rows="6" style="width:100%; font-family:monospace; font-size:0.85em; margin-top:0.4em;" placeholder='{"chat_template": "...", ...}'></textarea>
679
+ <button type="button" id="template-paste-btn" data-i18n="template.paste_btn" style="margin-top:0.4em;">📜 Sniff pasted config</button>
680
+ </details>
681
+ <div id="template-output" style="margin-top: 1em;"></div>
682
+ </section>
683
+
684
  <!-- Recipe selector (mode=recipe) -->
685
  <section id="recipe-section" style="display:none;">
686
  <h2 data-i18n="recipe.title">📋 Recipe</h2>
js/chat_template_sniffer.js ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Chat-template sniffer (v0.7.1 anti-bullshit pack #2)
2
+ // Parses tokenizer_config.json and detects which chat-template family the
3
+ // model uses. Pure logic — no human-readable strings. main.js renders via i18n.
4
+ //
5
+ // Why this matters: lm-eval-harness applied via vLLM-served API auto-applies
6
+ // the chat_template; local `hf`/`vllm` mode does NOT. This silently halves
7
+ // accuracy on multi-turn evals. Issue #1841 in lm-evaluation-harness.
8
+
9
+ // Distinctive markers per family. Order matters: more specific first.
10
+ const FAMILIES = [
11
+ {
12
+ id: "llama-3",
13
+ label: "Llama-3 instruct",
14
+ // begin_of_text uses bos_token variable in real templates, not literal —
15
+ // these two are the reliable signature.
16
+ markers: ["<|start_header_id|>", "<|eot_id|>"],
17
+ chatTemplateName: "llama-3",
18
+ vllmTemplate: "examples/template_llama_3.jinja",
19
+ },
20
+ {
21
+ id: "chatml",
22
+ label: "ChatML (Qwen, OpenAI-style)",
23
+ markers: ["<|im_start|>", "<|im_end|>"],
24
+ chatTemplateName: "chatml",
25
+ vllmTemplate: "examples/template_chatml.jinja",
26
+ },
27
+ {
28
+ id: "mistral",
29
+ label: "Mistral instruct",
30
+ markers: ["[INST]", "[/INST]"],
31
+ chatTemplateName: "mistral",
32
+ vllmTemplate: "examples/template_mistral.jinja",
33
+ },
34
+ {
35
+ id: "gemma",
36
+ label: "Gemma",
37
+ markers: ["<start_of_turn>", "<end_of_turn>"],
38
+ chatTemplateName: "gemma",
39
+ vllmTemplate: "examples/template_gemma.jinja",
40
+ },
41
+ {
42
+ id: "phi-3",
43
+ label: "Phi-3",
44
+ markers: ["<|user|>", "<|assistant|>", "<|end|>"],
45
+ chatTemplateName: "phi-3",
46
+ vllmTemplate: "examples/template_phi3.jinja",
47
+ },
48
+ {
49
+ id: "deepseek",
50
+ label: "DeepSeek",
51
+ // DeepSeek uses full-width unicode bars (U+FF5C). Check the codepoint
52
+ // explicitly so source files staying ASCII-safe still match.
53
+ markers: ["|User|", "|Assistant|"],
54
+ chatTemplateName: "deepseek",
55
+ vllmTemplate: null,
56
+ },
57
+ {
58
+ id: "alpaca",
59
+ label: "Alpaca",
60
+ markers: ["### Instruction:", "### Response:"],
61
+ chatTemplateName: "alpaca",
62
+ vllmTemplate: null,
63
+ },
64
+ ];
65
+
66
+ export function sniffChatTemplate(tokenizerConfig) {
67
+ const out = {
68
+ hasChatTemplate: false,
69
+ rawTemplate: null,
70
+ rawTemplateLength: 0,
71
+ detectedFamily: null,
72
+ detectedLabel: null,
73
+ chatTemplateName: null,
74
+ vllmTemplate: null,
75
+ addGenerationPromptDetected: false,
76
+ matchedMarkers: [],
77
+ verdict: "unknown", // ok | custom | missing | base_model | unknown
78
+ warnings: [], // each: { code, params }
79
+ };
80
+
81
+ const tpl = tokenizerConfig?.chat_template;
82
+ if (typeof tpl === "string" && tpl.length > 0) {
83
+ out.hasChatTemplate = true;
84
+ out.rawTemplate = tpl.length > 600 ? tpl.slice(0, 600) + "…" : tpl;
85
+ out.rawTemplateLength = tpl.length;
86
+ out.addGenerationPromptDetected = /add_generation_prompt/.test(tpl);
87
+
88
+ // Try each family in order. Match if ALL markers are present in the template.
89
+ for (const fam of FAMILIES) {
90
+ const hits = fam.markers.filter(m => tpl.includes(m));
91
+ if (hits.length === fam.markers.length) {
92
+ out.detectedFamily = fam.id;
93
+ out.detectedLabel = fam.label;
94
+ out.chatTemplateName = fam.chatTemplateName;
95
+ out.vllmTemplate = fam.vllmTemplate;
96
+ out.matchedMarkers = hits;
97
+ out.verdict = "ok";
98
+ break;
99
+ }
100
+ }
101
+ if (!out.detectedFamily) {
102
+ out.detectedFamily = "custom";
103
+ out.detectedLabel = null;
104
+ out.verdict = "custom";
105
+ out.warnings.push({ code: "custom_template", params: { length: out.rawTemplateLength } });
106
+ }
107
+ } else {
108
+ // No chat_template at all — typical for base / pretrained-only models.
109
+ // Could still be a legitimate base model, so verdict depends on caller intent.
110
+ out.verdict = "missing";
111
+ out.warnings.push({ code: "no_chat_template", params: {} });
112
+ }
113
+
114
+ // Universal warning: lm-eval-harness silent halving.
115
+ if (out.hasChatTemplate) {
116
+ out.warnings.push({ code: "lm_eval_apply", params: {} });
117
+ }
118
+ // vLLM warning if template requires explicit --chat-template flag
119
+ if (out.hasChatTemplate && out.detectedFamily !== "alpaca" && out.detectedFamily !== "deepseek") {
120
+ out.warnings.push({ code: "vllm_apply", params: { name: out.chatTemplateName ?? "auto" } });
121
+ }
122
+
123
+ return out;
124
+ }
js/i18n.js CHANGED
@@ -189,6 +189,41 @@ export const TRANSLATIONS = {
189
  "mode_desc.phase": "γ × θ scatter of the paper's empirical panel. Hover a dot for details, click to load into Diagnose / Recipe forms.",
190
  "mode_desc.unmask": "Detects whether max_position_embeddings is misleading (SWA / YaRN / RoPE-scaling). Paste a model id, get a 1-line verdict.",
191
  "profile.preset_loaded": "✅ Loaded preset for <strong>{id}</strong>. Form pre-filled. (Click 📥 Fetch to override with the latest config from HF Hub.)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  "share.import_desc": "Got a JSON file from someone else's TAF analysis? Load it here to see the verdict + chain locally. Same view as if you'd run it yourself.",
193
  "share.import_btn": "📂 Load shared JSON",
194
  "synthesis.system": "You are a precise transformer LLM diagnostic assistant. Given pre-computed TAF formula results, write a clear plain-English summary in 4-6 sentences. Cite the section number (§X.Y) for each number you mention. Always give a concrete recommendation. Do NOT invent numbers.",
@@ -281,7 +316,7 @@ export const TRANSLATIONS = {
281
  "common.no": "No",
282
 
283
  // Mode tooltips
284
- "modes.tip": "<strong>Eight ways to use the tool</strong>.<br><strong>📇 Profile</strong>: paste a model id → 5-recipe TAF Card.<br><strong>🆚 Compare</strong>: 2-3 models side-by-side on one recipe.<br><strong>🔍 Inspect config</strong>: paste raw config.json → full Profile.<br><strong>💬 Ask</strong>: free-form question, browser LLM picks the recipe.<br><strong>📋 Recipe</strong>: manual selection with full form control.<br><strong>🩺 Diagnose CLI</strong>: generate Python command for local γ measurement.<br><strong>📊 Phase diagram</strong>: 23-model panel on (log θ, γ) plane.<br><strong>🪟 Unmask</strong>: detect misleading max_position_embeddings (SWA / YaRN / RoPE-scaling).",
285
  "profile.tip": "<strong>One-click full diagnosis</strong>. Paste any HF model id (or pick preset). Tool runs all 5 recipes (long-context, KV-compression, custom-vs-API, budget, hardware) and produces a single <strong>TAF Card</strong> with verdict per dimension + key numbers + architecture classification.<br><br><strong>Use case</strong>: \"I'm evaluating Qwen2.5-32B for production — what's its full viability profile?\" → paste id → Profile → done.",
286
  "compare.tip": "<strong>Same recipe, multiple models</strong>. Pick 2-3 candidate models and one recipe. See verdicts in a single comparison table.<br><br><strong>Use case</strong>: \"I need long-context retrieval at 16K — which is best: Llama-3-8B, Mistral-7B, or Qwen-7B?\" → pick 3 + X-2 + 16K → see winner.",
287
 
@@ -802,6 +837,41 @@ export const TRANSLATIONS = {
802
  "mode_desc.phase": "Scatter γ × θ del panel empírico del paper. Hover sobre puntos para detalles, click para cargar en Diagnose / Recipe.",
803
  "mode_desc.unmask": "Detecta si max_position_embeddings es engañoso (SWA / YaRN / RoPE-scaling). Pega un model id, obtén un veredicto en 1 línea.",
804
  "profile.preset_loaded": "✅ Preset cargado para <strong>{id}</strong>. Formulario pre-rellenado. (Click 📥 Fetch para sobreescribir con el último config de HF Hub.)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
  "share.import_desc": "¿Tienes un fichero JSON del análisis TAF de alguien? Cárgalo aquí para ver el veredicto + cadena localmente. La misma vista que si lo hubieras ejecutado tú.",
806
  "share.import_btn": "📂 Cargar JSON compartido",
807
  "synthesis.system": "Eres un asistente de diagnóstico preciso para LLMs transformer. Dados resultados de fórmulas TAF pre-calculados, escribe un resumen claro en español de 4-6 frases. Cita el número de sección (§X.Y) para cada número que menciones. Da siempre una recomendación concreta. NO inventes números.",
@@ -894,7 +964,7 @@ export const TRANSLATIONS = {
894
  "common.no": "No",
895
 
896
  // Tooltips de modos
897
- "modes.tip": "<strong>Ocho formas de usar la herramienta</strong>.<br><strong>📇 Perfil</strong>: pega un id → TAF Card de 5 recetas.<br><strong>🆚 Comparar</strong>: 2-3 modelos lado a lado en una receta.<br><strong>🔍 Inspeccionar config</strong>: pega config.json crudo → Perfil completo.<br><strong>💬 Pregunta</strong>: pregunta libre, el LLM del navegador elige la receta.<br><strong>📋 Receta</strong>: selección manual con control total del formulario.<br><strong>🩺 Diagnóstico CLI</strong>: genera comando Python para medir γ localmente.<br><strong>📊 Diagrama de fase</strong>: panel de 23 modelos en plano (log θ, γ).<br><strong>🪟 Desenmascarar</strong>: detecta max_position_embeddings engañoso (SWA / YaRN / RoPE-scaling).",
898
  "profile.tip": "<strong>Diagnóstico completo en un click</strong>. Pega cualquier id de modelo HF (o elige preset). La herramienta ejecuta las 5 recetas (contexto largo, compresión KV, custom vs API, presupuesto, hardware) y produce una única <strong>TAF Card</strong> con veredicto por dimensión + números clave + clasificación arquitectónica.<br><br><strong>Caso de uso</strong>: \"Estoy evaluando Qwen2.5-32B para producción — ¿cuál es su perfil completo de viabilidad?\" → pega id → Perfilar → listo.",
899
  "compare.tip": "<strong>Misma receta, múltiples modelos</strong>. Elige 2-3 modelos candidatos y una receta. Ve los veredictos en una única tabla comparativa.<br><br><strong>Caso de uso</strong>: \"Necesito recuperación de contexto largo a 16K — ¿cuál es mejor: Llama-3-8B, Mistral-7B o Qwen-7B?\" → elige 3 + X-2 + 16K → ve el ganador.",
900
 
@@ -1279,6 +1349,41 @@ export const TRANSLATIONS = {
1279
  "mode_desc.phase": "Scatter γ × θ du panel empirique du papier. Survolez les points pour détails, cliquez pour charger dans Diagnose / Recipe.",
1280
  "mode_desc.unmask": "Détecte si max_position_embeddings est trompeur (SWA / YaRN / RoPE-scaling). Collez un model id, obtenez un verdict en 1 ligne.",
1281
  "profile.preset_loaded": "✅ Préréglage chargé pour <strong>{id}</strong>. Formulaire pré-rempli. (Cliquez 📥 Fetch pour écraser avec le dernier config depuis HF Hub.)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1282
  "share.import_desc": "Vous avez un fichier JSON de l'analyse TAF de quelqu'un ? Chargez-le ici pour voir le verdict + la chaîne localement. La même vue que si vous l'aviez exécuté vous-même.",
1283
  "share.import_btn": "📂 Charger JSON partagé",
1284
  "synthesis.system": "Vous êtes un assistant de diagnostic précis pour LLMs transformer. Étant donné des résultats de formules TAF pré-calculés, écrivez un résumé clair en français de 4-6 phrases. Citez le numéro de section (§X.Y) pour chaque nombre mentionné. Donnez toujours une recommandation concrète. N'INVENTEZ PAS de nombres.",
@@ -1371,7 +1476,7 @@ export const TRANSLATIONS = {
1371
  "common.no": "Non",
1372
 
1373
  // Tooltips des modes
1374
- "modes.tip": "<strong>Huit façons d'utiliser l'outil</strong>.<br><strong>📇 Profil</strong>: collez un id → TAF Card avec 5 recettes.<br><strong>🆚 Comparer</strong>: 2-3 modèles côte à côte sur une recette.<br><strong>🔍 Inspecter config</strong>: collez config.json brut → Profil complet.<br><strong>💬 Question</strong>: question libre, le LLM du navigateur choisit la recette.<br><strong>📋 Recette</strong>: sélection manuelle avec contrôle total du formulaire.<br><strong>🩺 Diagnostic CLI</strong>: génère commande Python pour mesurer γ localement.<br><strong>📊 Diagramme de phase</strong>: panel de 23 modèles dans le plan (log θ, γ).<br><strong>🪟 Démasquer</strong>: détecte un max_position_embeddings trompeur (SWA / YaRN / RoPE-scaling).",
1375
  "profile.tip": "<strong>Diagnostic complet en un clic</strong>. Collez n'importe quel id de modèle HF (ou choisissez préréglage). L'outil exécute les 5 recettes (contexte long, compression KV, custom vs API, budget, hardware) et produit une <strong>TAF Card</strong> unique avec verdict par dimension + nombres clés + classification architecturale.<br><br><strong>Cas d'usage</strong>: « J'évalue Qwen2.5-32B pour la production — quel est son profil complet de viabilité ? » → collez id → Profiler → fait.",
1376
  "compare.tip": "<strong>Même recette, plusieurs modèles</strong>. Choisissez 2-3 modèles candidats et une recette. Voyez les verdicts dans un seul tableau comparatif.<br><br><strong>Cas d'usage</strong>: « J'ai besoin de récupération longue contexte à 16K — quel est le meilleur : Llama-3-8B, Mistral-7B ou Qwen-7B ? » → choisissez 3 + X-2 + 16K → voyez le gagnant.",
1377
 
@@ -1756,6 +1861,41 @@ export const TRANSLATIONS = {
1756
  "mode_desc.phase": "论文经验面板的 γ × θ 散点图。悬停点查看详情,点击加载到 Diagnose / Recipe 表单。",
1757
  "mode_desc.unmask": "检测 max_position_embeddings 是否误导(SWA / YaRN / RoPE 缩放)。粘贴 model id,1 行判定。",
1758
  "profile.preset_loaded": "✅ 已为 <strong>{id}</strong> 加载预设。表单已预填。(点击 📥 Fetch 用 HF Hub 最新 config 覆盖。)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1759
  "share.import_desc": "有他人 TAF 分析的 JSON 文件? 在这里加载以本地查看判定 + 链。与您自己运行的视图相同。",
1760
  "share.import_btn": "📂 加载共享的 JSON",
1761
  "synthesis.system": "您是 transformer LLM 的精确诊断助手。给定预先计算的 TAF 公式结果,用 4-6 句中文写出清晰的摘要。为每个提到的数字引用章节号 (§X.Y)。始终给出具体建议。不要编造数字。",
@@ -1848,7 +1988,7 @@ export const TRANSLATIONS = {
1848
  "common.no": "否",
1849
 
1850
  // 模式提示
1851
- "modes.tip": "<strong>种使用方式</strong>。<br><strong>📇 画像</strong>: 粘贴模型 id → 5 个配方的 TAF 卡。<br><strong>🆚 比较</strong>: 2-3 个模型在一个配方上并排比较。<br><strong>🔍 检查 config</strong>: 粘贴原始 config.json → 完整画像。<br><strong>💬 提问</strong>: 自由形式问题,浏览器 LLM 选择配方。<br><strong>📋 配方</strong>: 手动选择,完全控制表单。<br><strong>🩺 CLI 诊断</strong>: 生成 Python 命令在本地测量 γ。<br><strong>📊 相图</strong>: 23 个面板模型在 (log θ, γ) 平面上。<br><strong>🪟 揭示</strong>: 检测误导的 max_position_embeddings(SWA / YaRN / RoPE 缩放)。",
1852
  "profile.tip": "<strong>一键完整诊断</strong>。粘贴任意 HF 模型 id (或选择预设)。工具运行所有 5 个配方 (长上下文、KV 压缩、自定义 vs API、预算、硬件),生成单个 <strong>TAF 卡</strong>,显示每个维度的判定 + 关键数字 + 架构分类。<br><br><strong>用例</strong>: \"我正在为生产评估 Qwen2.5-32B — 它的完整可行性概况是什么?\" → 粘贴 id → 画像 → 完成。",
1853
  "compare.tip": "<strong>同一配方,多个模型</strong>。选择 2-3 个候选模型和一个配方。在单个比较表中查看判定。<br><br><strong>用例</strong>: \"我需要在 16K 进行长上下文检索 — 哪个最好: Llama-3-8B、Mistral-7B 或 Qwen-7B?\" → 选择 3 个 + X-2 + 16K → 看赢家。",
1854
 
 
189
  "mode_desc.phase": "γ × θ scatter of the paper's empirical panel. Hover a dot for details, click to load into Diagnose / Recipe forms.",
190
  "mode_desc.unmask": "Detects whether max_position_embeddings is misleading (SWA / YaRN / RoPE-scaling). Paste a model id, get a 1-line verdict.",
191
  "profile.preset_loaded": "✅ Loaded preset for <strong>{id}</strong>. Form pre-filled. (Click 📥 Fetch to override with the latest config from HF Hub.)",
192
+
193
+ // v0.7.1 — anti-bullshit pack #2: Chat-template Sniffer
194
+ "modes.template": "📜 Chat-template",
195
+ "mode_desc.template": "Detects which chat-template family a model uses (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek). Gives the exact CLI flag for lm-eval / vLLM / transformers.",
196
+ "template.title": "📜 Chat-template Sniffer",
197
+ "template.tip": "Paste an HF model id (or raw tokenizer_config.json). Detects the chat-template family and gives you the exact framework command to use it correctly. lm-eval-harness silently halves accuracy if you forget to apply it (issue #1841).",
198
+ "template.desc": "<strong>Did you forget <code>--apply_chat_template</code>?</strong> Most multi-turn evals fail by ~50% because the chat template wasn't applied. Paste a model id, get the exact CLI flag for your stack.",
199
+ "template.id_label": "HF model id:",
200
+ "template.fetch_btn": "📜 Sniff",
201
+ "template.paste_summary": "Or paste raw tokenizer_config.json (private models)",
202
+ "template.paste_btn": "📜 Sniff pasted config",
203
+ "template.label.family": "Detected family",
204
+ "template.label.markers": "Matched markers",
205
+ "template.label.tpl_len": "Template length",
206
+ "template.section.warnings": "Warnings",
207
+ "template.section.commands": "Commands by framework",
208
+ "template.section.raw": "Raw template (preview)",
209
+ "template.family.custom": "custom (unknown family)",
210
+ "template.family.none": "(no chat_template)",
211
+ "template.verdict.ok": "✅ TEMPLATE DETECTED",
212
+ "template.verdict.custom": "⚠ CUSTOM TEMPLATE",
213
+ "template.verdict.missing": "❌ NO CHAT TEMPLATE",
214
+ "template.verdict.base_model": "ℹ BASE MODEL (no chat)",
215
+ "template.verdict.unknown": "❓ UNKNOWN",
216
+ "template.warn.no_chat_template": "No <code>chat_template</code> field in tokenizer_config.json. This is typical for base / pretrained-only models. If you intended an instruct-tuned model, the wrong file may be loaded.",
217
+ "template.warn.custom_template": "Template is non-standard ({length} chars). The tool could not match it against known families. Inspect the raw preview below and verify your eval framework supports it.",
218
+ "template.warn.lm_eval_apply": "<strong>lm-eval-harness:</strong> add <code>--apply_chat_template</code> or your accuracy will silently drop ~50% on multi-turn evals (issue #1841).",
219
+ "template.warn.vllm_apply": "<strong>vLLM serve:</strong> verify <code>--chat-template</code> is set (auto-detection sometimes fails for fine-tuned variants). Suggested: <code>{name}</code>.",
220
+ "template.status.empty_id": "⚠ Enter a model id (e.g. mistralai/Mistral-7B-Instruct-v0.3).",
221
+ "template.status.fetching": "⏳ Fetching tokenizer_config.json for {modelId}...",
222
+ "template.status.success": "✅ Sniffed {modelId} (verdict: {verdict})",
223
+ "template.status.empty_paste": "⚠ Paste a tokenizer_config.json first.",
224
+ "template.status.invalid_json":"❌ Not valid JSON: {error}",
225
+ "template.status.success_paste":"✅ Sniffed pasted config (verdict: {verdict})",
226
+ "template.pasted_label": "(pasted tokenizer_config)",
227
  "share.import_desc": "Got a JSON file from someone else's TAF analysis? Load it here to see the verdict + chain locally. Same view as if you'd run it yourself.",
228
  "share.import_btn": "📂 Load shared JSON",
229
  "synthesis.system": "You are a precise transformer LLM diagnostic assistant. Given pre-computed TAF formula results, write a clear plain-English summary in 4-6 sentences. Cite the section number (§X.Y) for each number you mention. Always give a concrete recommendation. Do NOT invent numbers.",
 
316
  "common.no": "No",
317
 
318
  // Mode tooltips
319
+ "modes.tip": "<strong>Nine ways to use the tool</strong>.<br><strong>📇 Profile</strong>: paste a model id → 5-recipe TAF Card.<br><strong>🆚 Compare</strong>: 2-3 models side-by-side on one recipe.<br><strong>🔍 Inspect config</strong>: paste raw config.json → full Profile.<br><strong>💬 Ask</strong>: free-form question, browser LLM picks the recipe.<br><strong>📋 Recipe</strong>: manual selection with full form control.<br><strong>🩺 Diagnose CLI</strong>: generate Python command for local γ measurement.<br><strong>📊 Phase diagram</strong>: 23-model panel on (log θ, γ) plane.<br><strong>🪟 Unmask</strong>: detect misleading max_position_embeddings (SWA / YaRN / RoPE-scaling).<br><strong>📜 Chat-template</strong>: detect family + give exact CLI flag for lm-eval / vLLM / transformers.",
320
  "profile.tip": "<strong>One-click full diagnosis</strong>. Paste any HF model id (or pick preset). Tool runs all 5 recipes (long-context, KV-compression, custom-vs-API, budget, hardware) and produces a single <strong>TAF Card</strong> with verdict per dimension + key numbers + architecture classification.<br><br><strong>Use case</strong>: \"I'm evaluating Qwen2.5-32B for production — what's its full viability profile?\" → paste id → Profile → done.",
321
  "compare.tip": "<strong>Same recipe, multiple models</strong>. Pick 2-3 candidate models and one recipe. See verdicts in a single comparison table.<br><br><strong>Use case</strong>: \"I need long-context retrieval at 16K — which is best: Llama-3-8B, Mistral-7B, or Qwen-7B?\" → pick 3 + X-2 + 16K → see winner.",
322
 
 
837
  "mode_desc.phase": "Scatter γ × θ del panel empírico del paper. Hover sobre puntos para detalles, click para cargar en Diagnose / Recipe.",
838
  "mode_desc.unmask": "Detecta si max_position_embeddings es engañoso (SWA / YaRN / RoPE-scaling). Pega un model id, obtén un veredicto en 1 línea.",
839
  "profile.preset_loaded": "✅ Preset cargado para <strong>{id}</strong>. Formulario pre-rellenado. (Click 📥 Fetch para sobreescribir con el último config de HF Hub.)",
840
+
841
+ // v0.7.1 — anti-bullshit pack #2: Chat-template Sniffer
842
+ "modes.template": "📜 Chat-template",
843
+ "mode_desc.template": "Detecta qué familia de chat-template usa un modelo (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek). Da el flag CLI exacto para lm-eval / vLLM / transformers.",
844
+ "template.title": "📜 Detector de Chat-template",
845
+ "template.tip": "Pega un model id de HF (o tokenizer_config.json crudo). Detecta la familia del chat-template y te da el comando exacto para usarlo bien. lm-eval-harness divide la accuracy entre 2 silenciosamente si te olvidas de aplicarlo (issue #1841).",
846
+ "template.desc": "<strong>¿Olvidaste <code>--apply_chat_template</code>?</strong> La mayoría de evals multi-turn fallan ~50% porque el chat template no se aplicó. Pega un model id, obtén el flag CLI exacto para tu stack.",
847
+ "template.id_label": "ID modelo HF:",
848
+ "template.fetch_btn": "📜 Detectar",
849
+ "template.paste_summary": "O pega tokenizer_config.json crudo (modelos privados)",
850
+ "template.paste_btn": "📜 Detectar config pegado",
851
+ "template.label.family": "Familia detectada",
852
+ "template.label.markers": "Marcadores coincidentes",
853
+ "template.label.tpl_len": "Longitud template",
854
+ "template.section.warnings": "Avisos",
855
+ "template.section.commands": "Comandos por framework",
856
+ "template.section.raw": "Template crudo (preview)",
857
+ "template.family.custom": "custom (familia desconocida)",
858
+ "template.family.none": "(sin chat_template)",
859
+ "template.verdict.ok": "✅ TEMPLATE DETECTADO",
860
+ "template.verdict.custom": "⚠ TEMPLATE CUSTOM",
861
+ "template.verdict.missing": "❌ SIN CHAT TEMPLATE",
862
+ "template.verdict.base_model": "ℹ MODELO BASE (sin chat)",
863
+ "template.verdict.unknown": "❓ DESCONOCIDO",
864
+ "template.warn.no_chat_template": "Sin campo <code>chat_template</code> en tokenizer_config.json. Típico de modelos base / pretrained. Si esperabas un modelo instruct-tuned, puede que el archivo cargado sea incorrecto.",
865
+ "template.warn.custom_template": "Template no estándar ({length} chars). La herramienta no lo encajó en familias conocidas. Revisa el preview y verifica que tu framework de eval lo soporta.",
866
+ "template.warn.lm_eval_apply": "<strong>lm-eval-harness:</strong> añade <code>--apply_chat_template</code> o tu accuracy bajará ~50% silenciosamente en evals multi-turn (issue #1841).",
867
+ "template.warn.vllm_apply": "<strong>vLLM serve:</strong> verifica que <code>--chat-template</code> esté puesto (la auto-detección a veces falla en variantes fine-tuned). Sugerido: <code>{name}</code>.",
868
+ "template.status.empty_id": "⚠ Introduce un model id (ej. mistralai/Mistral-7B-Instruct-v0.3).",
869
+ "template.status.fetching": "⏳ Obteniendo tokenizer_config.json para {modelId}...",
870
+ "template.status.success": "✅ Detectado {modelId} (veredicto: {verdict})",
871
+ "template.status.empty_paste": "⚠ Pega un tokenizer_config.json primero.",
872
+ "template.status.invalid_json":"❌ JSON inválido: {error}",
873
+ "template.status.success_paste":"✅ Config pegado detectado (veredicto: {verdict})",
874
+ "template.pasted_label": "(tokenizer_config pegado)",
875
  "share.import_desc": "¿Tienes un fichero JSON del análisis TAF de alguien? Cárgalo aquí para ver el veredicto + cadena localmente. La misma vista que si lo hubieras ejecutado tú.",
876
  "share.import_btn": "📂 Cargar JSON compartido",
877
  "synthesis.system": "Eres un asistente de diagnóstico preciso para LLMs transformer. Dados resultados de fórmulas TAF pre-calculados, escribe un resumen claro en español de 4-6 frases. Cita el número de sección (§X.Y) para cada número que menciones. Da siempre una recomendación concreta. NO inventes números.",
 
964
  "common.no": "No",
965
 
966
  // Tooltips de modos
967
+ "modes.tip": "<strong>Nueve formas de usar la herramienta</strong>.<br><strong>📇 Perfil</strong>: pega un id → TAF Card de 5 recetas.<br><strong>🆚 Comparar</strong>: 2-3 modelos lado a lado en una receta.<br><strong>🔍 Inspeccionar config</strong>: pega config.json crudo → Perfil completo.<br><strong>💬 Pregunta</strong>: pregunta libre, el LLM del navegador elige la receta.<br><strong>📋 Receta</strong>: selección manual con control total del formulario.<br><strong>🩺 Diagnóstico CLI</strong>: genera comando Python para medir γ localmente.<br><strong>📊 Diagrama de fase</strong>: panel de 23 modelos en plano (log θ, γ).<br><strong>🪟 Desenmascarar</strong>: detecta max_position_embeddings engañoso (SWA / YaRN / RoPE-scaling).<br><strong>📜 Chat-template</strong>: detecta familia + da el flag CLI exacto para lm-eval / vLLM / transformers.",
968
  "profile.tip": "<strong>Diagnóstico completo en un click</strong>. Pega cualquier id de modelo HF (o elige preset). La herramienta ejecuta las 5 recetas (contexto largo, compresión KV, custom vs API, presupuesto, hardware) y produce una única <strong>TAF Card</strong> con veredicto por dimensión + números clave + clasificación arquitectónica.<br><br><strong>Caso de uso</strong>: \"Estoy evaluando Qwen2.5-32B para producción — ¿cuál es su perfil completo de viabilidad?\" → pega id → Perfilar → listo.",
969
  "compare.tip": "<strong>Misma receta, múltiples modelos</strong>. Elige 2-3 modelos candidatos y una receta. Ve los veredictos en una única tabla comparativa.<br><br><strong>Caso de uso</strong>: \"Necesito recuperación de contexto largo a 16K — ¿cuál es mejor: Llama-3-8B, Mistral-7B o Qwen-7B?\" → elige 3 + X-2 + 16K → ve el ganador.",
970
 
 
1349
  "mode_desc.phase": "Scatter γ × θ du panel empirique du papier. Survolez les points pour détails, cliquez pour charger dans Diagnose / Recipe.",
1350
  "mode_desc.unmask": "Détecte si max_position_embeddings est trompeur (SWA / YaRN / RoPE-scaling). Collez un model id, obtenez un verdict en 1 ligne.",
1351
  "profile.preset_loaded": "✅ Préréglage chargé pour <strong>{id}</strong>. Formulaire pré-rempli. (Cliquez 📥 Fetch pour écraser avec le dernier config depuis HF Hub.)",
1352
+
1353
+ // v0.7.1 — anti-bullshit pack #2: Chat-template Sniffer
1354
+ "modes.template": "📜 Chat-template",
1355
+ "mode_desc.template": "Détecte la famille de chat-template d'un modèle (Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek). Donne le flag CLI exact pour lm-eval / vLLM / transformers.",
1356
+ "template.title": "📜 Détecteur de Chat-template",
1357
+ "template.tip": "Collez un model id HF (ou tokenizer_config.json brut). Détecte la famille du chat-template et donne le commande exacte pour l'utiliser correctement. lm-eval-harness divise l'accuracy par 2 silencieusement si vous oubliez de l'appliquer (issue #1841).",
1358
+ "template.desc": "<strong>Avez-vous oublié <code>--apply_chat_template</code> ?</strong> La plupart des évals multi-tours échouent à ~50% parce que le chat template n'a pas été appliqué. Collez un model id, obtenez le flag CLI exact pour votre stack.",
1359
+ "template.id_label": "ID modèle HF :",
1360
+ "template.fetch_btn": "📜 Détecter",
1361
+ "template.paste_summary": "Ou collez tokenizer_config.json brut (modèles privés)",
1362
+ "template.paste_btn": "📜 Détecter config collé",
1363
+ "template.label.family": "Famille détectée",
1364
+ "template.label.markers": "Marqueurs correspondants",
1365
+ "template.label.tpl_len": "Longueur du template",
1366
+ "template.section.warnings": "Avertissements",
1367
+ "template.section.commands": "Commandes par framework",
1368
+ "template.section.raw": "Template brut (preview)",
1369
+ "template.family.custom": "custom (famille inconnue)",
1370
+ "template.family.none": "(pas de chat_template)",
1371
+ "template.verdict.ok": "✅ TEMPLATE DÉTECTÉ",
1372
+ "template.verdict.custom": "⚠ TEMPLATE CUSTOM",
1373
+ "template.verdict.missing": "❌ PAS DE CHAT TEMPLATE",
1374
+ "template.verdict.base_model": "ℹ MODÈLE DE BASE (sans chat)",
1375
+ "template.verdict.unknown": "❓ INCONNU",
1376
+ "template.warn.no_chat_template": "Pas de champ <code>chat_template</code> dans tokenizer_config.json. Typique des modèles base / pré-entraînés. Si vous attendiez un modèle instruct-tuned, le mauvais fichier peut être chargé.",
1377
+ "template.warn.custom_template": "Template non standard ({length} chars). L'outil n'a pas pu le faire correspondre aux familles connues. Inspectez le preview et vérifiez que votre framework d'éval le supporte.",
1378
+ "template.warn.lm_eval_apply": "<strong>lm-eval-harness :</strong> ajoutez <code>--apply_chat_template</code> ou votre accuracy chutera silencieusement de ~50% sur les évals multi-tours (issue #1841).",
1379
+ "template.warn.vllm_apply": "<strong>vLLM serve :</strong> vérifiez que <code>--chat-template</code> est défini (l'auto-détection échoue parfois sur les variantes fine-tunées). Suggéré : <code>{name}</code>.",
1380
+ "template.status.empty_id": "⚠ Saisissez un model id (ex. mistralai/Mistral-7B-Instruct-v0.3).",
1381
+ "template.status.fetching": "⏳ Récupération tokenizer_config.json pour {modelId}...",
1382
+ "template.status.success": "✅ {modelId} détecté (verdict : {verdict})",
1383
+ "template.status.empty_paste": "⚠ Collez d'abord un tokenizer_config.json.",
1384
+ "template.status.invalid_json":"❌ JSON invalide : {error}",
1385
+ "template.status.success_paste":"✅ Config collé détecté (verdict : {verdict})",
1386
+ "template.pasted_label": "(tokenizer_config collé)",
1387
  "share.import_desc": "Vous avez un fichier JSON de l'analyse TAF de quelqu'un ? Chargez-le ici pour voir le verdict + la chaîne localement. La même vue que si vous l'aviez exécuté vous-même.",
1388
  "share.import_btn": "📂 Charger JSON partagé",
1389
  "synthesis.system": "Vous êtes un assistant de diagnostic précis pour LLMs transformer. Étant donné des résultats de formules TAF pré-calculés, écrivez un résumé clair en français de 4-6 phrases. Citez le numéro de section (§X.Y) pour chaque nombre mentionné. Donnez toujours une recommandation concrète. N'INVENTEZ PAS de nombres.",
 
1476
  "common.no": "Non",
1477
 
1478
  // Tooltips des modes
1479
+ "modes.tip": "<strong>Neuf façons d'utiliser l'outil</strong>.<br><strong>📇 Profil</strong>: collez un id → TAF Card avec 5 recettes.<br><strong>🆚 Comparer</strong>: 2-3 modèles côte à côte sur une recette.<br><strong>🔍 Inspecter config</strong>: collez config.json brut → Profil complet.<br><strong>💬 Question</strong>: question libre, le LLM du navigateur choisit la recette.<br><strong>📋 Recette</strong>: sélection manuelle avec contrôle total du formulaire.<br><strong>🩺 Diagnostic CLI</strong>: génère commande Python pour mesurer γ localement.<br><strong>📊 Diagramme de phase</strong>: panel de 23 modèles dans le plan (log θ, γ).<br><strong>🪟 Démasquer</strong>: détecte un max_position_embeddings trompeur (SWA / YaRN / RoPE-scaling).<br><strong>📜 Chat-template</strong>: détecte la famille + donne le flag CLI exact pour lm-eval / vLLM / transformers.",
1480
  "profile.tip": "<strong>Diagnostic complet en un clic</strong>. Collez n'importe quel id de modèle HF (ou choisissez préréglage). L'outil exécute les 5 recettes (contexte long, compression KV, custom vs API, budget, hardware) et produit une <strong>TAF Card</strong> unique avec verdict par dimension + nombres clés + classification architecturale.<br><br><strong>Cas d'usage</strong>: « J'évalue Qwen2.5-32B pour la production — quel est son profil complet de viabilité ? » → collez id → Profiler → fait.",
1481
  "compare.tip": "<strong>Même recette, plusieurs modèles</strong>. Choisissez 2-3 modèles candidats et une recette. Voyez les verdicts dans un seul tableau comparatif.<br><br><strong>Cas d'usage</strong>: « J'ai besoin de récupération longue contexte à 16K — quel est le meilleur : Llama-3-8B, Mistral-7B ou Qwen-7B ? » → choisissez 3 + X-2 + 16K → voyez le gagnant.",
1482
 
 
1861
  "mode_desc.phase": "论文经验面板的 γ × θ 散点图。悬停点查看详情,点击加载到 Diagnose / Recipe 表单。",
1862
  "mode_desc.unmask": "检测 max_position_embeddings 是否误导(SWA / YaRN / RoPE 缩放)。粘贴 model id,1 行判定。",
1863
  "profile.preset_loaded": "✅ 已为 <strong>{id}</strong> 加载预设。表单已预填。(点击 📥 Fetch 用 HF Hub 最新 config 覆盖。)",
1864
+
1865
+ // v0.7.1 — anti-bullshit pack #2: Chat-template Sniffer
1866
+ "modes.template": "📜 Chat-template",
1867
+ "mode_desc.template": "检测模型使用的 chat-template 系列(Llama-3 / ChatML / Mistral / Gemma / Phi-3 / Alpaca / DeepSeek)。给出 lm-eval / vLLM / transformers 的精确 CLI flag。",
1868
+ "template.title": "📜 Chat-template 检测器",
1869
+ "template.tip": "粘贴 HF 模型 id(或原始 tokenizer_config.json)。检测 chat-template 系列并给出正确使用的精确框架命令。如果忘记应用,lm-eval-harness 会让 accuracy 静默对半(issue #1841)。",
1870
+ "template.desc": "<strong>忘了 <code>--apply_chat_template</code> 吗?</strong> 大多数 multi-turn eval 因为 chat template 未应用而失败 ~50%。粘贴 model id,获取你 stack 的精确 CLI flag。",
1871
+ "template.id_label": "HF 模型 id:",
1872
+ "template.fetch_btn": "📜 检测",
1873
+ "template.paste_summary": "或粘贴原始 tokenizer_config.json(私有模型)",
1874
+ "template.paste_btn": "📜 检测已粘贴 config",
1875
+ "template.label.family": "检测到的系列",
1876
+ "template.label.markers": "匹配的标记",
1877
+ "template.label.tpl_len": "Template 长度",
1878
+ "template.section.warnings": "警告",
1879
+ "template.section.commands": "各框架命令",
1880
+ "template.section.raw": "原始 template(预览)",
1881
+ "template.family.custom": "自定义(未知系列)",
1882
+ "template.family.none": "(无 chat_template)",
1883
+ "template.verdict.ok": "✅ 已检测到 TEMPLATE",
1884
+ "template.verdict.custom": "⚠ 自定义 TEMPLATE",
1885
+ "template.verdict.missing": "❌ 无 CHAT TEMPLATE",
1886
+ "template.verdict.base_model": "ℹ 基础模型(无 chat)",
1887
+ "template.verdict.unknown": "❓ 未知",
1888
+ "template.warn.no_chat_template": "tokenizer_config.json 中无 <code>chat_template</code> 字段。基础 / 仅预训练模型的典型情况。如果你期待 instruct-tuned 模型,可能加载了错误的文件。",
1889
+ "template.warn.custom_template": "非标准 template({length} 字符)。工具无法将其匹配到已知系列。检查下方预览并验证你的 eval 框架是否支持。",
1890
+ "template.warn.lm_eval_apply": "<strong>lm-eval-harness:</strong>添加 <code>--apply_chat_template</code>,否则 multi-turn eval 上 accuracy 会静默下降 ~50%(issue #1841)。",
1891
+ "template.warn.vllm_apply": "<strong>vLLM serve:</strong>验证 <code>--chat-template</code> 已设置(fine-tuned 变体的自动检测有时失败)。建议:<code>{name}</code>。",
1892
+ "template.status.empty_id": "⚠ 输入 model id(例如 mistralai/Mistral-7B-Instruct-v0.3)。",
1893
+ "template.status.fetching": "⏳ 正在获取 {modelId} 的 tokenizer_config.json...",
1894
+ "template.status.success": "✅ 已检测 {modelId}(判定:{verdict})",
1895
+ "template.status.empty_paste": "⚠ 请先粘贴 tokenizer_config.json。",
1896
+ "template.status.invalid_json":"❌ JSON 无效:{error}",
1897
+ "template.status.success_paste":"✅ 已检测粘贴的 config(判定:{verdict})",
1898
+ "template.pasted_label": "(已粘贴 tokenizer_config)",
1899
  "share.import_desc": "有他人 TAF 分析的 JSON 文件? 在这里加载以本地查看判定 + 链。与您自己运行的视图相同。",
1900
  "share.import_btn": "📂 加载共享的 JSON",
1901
  "synthesis.system": "您是 transformer LLM 的精确诊断助手。给定预先计算的 TAF 公式结果,用 4-6 句中文写出清晰的摘要。为每个提到的数字引用章节号 (§X.Y)。始终给出具体建议。不要编造数字。",
 
1988
  "common.no": "否",
1989
 
1990
  // 模式提示
1991
+ "modes.tip": "<strong>种使用方式</strong>。<br><strong>📇 画像</strong>: 粘贴模型 id → 5 个配方的 TAF 卡。<br><strong>🆚 比较</strong>: 2-3 个模型在一个配方上并排比较。<br><strong>🔍 检查 config</strong>: 粘贴原始 config.json → 完整画像。<br><strong>💬 提问</strong>: 自由形式问题,浏览器 LLM 选择配方。<br><strong>📋 配方</strong>: 手动选择,完全控制表单。<br><strong>🩺 CLI 诊断</strong>: 生成 Python 命令在本地测量 γ。<br><strong>📊 相图</strong>: 23 个面板模型在 (log θ, γ) 平面上。<br><strong>🪟 揭示</strong>: 检测误导的 max_position_embeddings(SWA / YaRN / RoPE 缩放)。<br><strong>📜 Chat-template</strong>: 检测系列 + 给出 lm-eval / vLLM / transformers 的精确 CLI flag。",
1992
  "profile.tip": "<strong>一键完整诊断</strong>。粘贴任意 HF 模型 id (或选择预设)。工具运行所有 5 个配方 (长上下文、KV 压缩、自定义 vs API、预算、硬件),生成单个 <strong>TAF 卡</strong>,显示每个维度的判定 + 关键数字 + 架构分类。<br><br><strong>用例</strong>: \"我正在为生产评估 Qwen2.5-32B — 它的完整可行性概况是什么?\" → 粘贴 id → 画像 → 完成。",
1993
  "compare.tip": "<strong>同一配方,多个模型</strong>。选择 2-3 个候选模型和一个配方。在单个比较表中查看判定。<br><br><strong>用例</strong>: \"我需要在 16K 进行长上下文检索 — 哪个最好: Llama-3-8B、Mistral-7B 或 Qwen-7B?\" → 选择 3 个 + X-2 + 16K → 看赢家。",
1994
 
js/main.js CHANGED
@@ -12,6 +12,7 @@ import { initPhaseDiagram } from "./phase_diagram.js";
12
  import { gammaCheckAll, REGIME_META } from "./gamma_check.js";
13
  import { loadLeanManifest, badgeHtml, badgesForUiBinding, renderTheoremTable, getManifest } from "./lean_badges.js";
14
  import { unmaskConfig } from "./swa_unmasker.js";
 
15
 
16
  const TAF_BROWSER_URL = "python/taf_browser.py";
17
  const ENABLE_WEBLLM = true;
@@ -186,7 +187,8 @@ document.querySelectorAll(".mode-btn").forEach(btn => {
186
  // Hide all mode sections
187
  ["ask-section", "recipe-section", "form-section",
188
  "profile-section", "compare-section", "inspector-section",
189
- "diagnose-section", "phase-section", "unmask-section"].forEach(id => {
 
190
  const el = $(id);
191
  if (el) el.style.display = "none";
192
  });
@@ -195,6 +197,7 @@ document.querySelectorAll(".mode-btn").forEach(btn => {
195
  ask: "ask-section", recipe: "recipe-section", profile: "profile-section",
196
  compare: "compare-section", inspector: "inspector-section",
197
  diagnose: "diagnose-section", phase: "phase-section", unmask: "unmask-section",
 
198
  };
199
  const sectionId = sectionMap[mode];
200
  if (sectionId) $(sectionId).style.display = "";
@@ -581,6 +584,161 @@ $("unmask-id")?.addEventListener("keydown", (e) => {
581
  if (e.key === "Enter") { e.preventDefault(); runUnmaskFromId(); }
582
  });
583
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
  function configToPreset(cfg, modelId) {
585
  const n_attn = cfg.num_attention_heads || cfg.n_head || 0;
586
  const n_kv = cfg.num_key_value_heads || cfg.num_attention_heads || cfg.n_head || 0;
 
12
  import { gammaCheckAll, REGIME_META } from "./gamma_check.js";
13
  import { loadLeanManifest, badgeHtml, badgesForUiBinding, renderTheoremTable, getManifest } from "./lean_badges.js";
14
  import { unmaskConfig } from "./swa_unmasker.js";
15
+ import { sniffChatTemplate } from "./chat_template_sniffer.js";
16
 
17
  const TAF_BROWSER_URL = "python/taf_browser.py";
18
  const ENABLE_WEBLLM = true;
 
187
  // Hide all mode sections
188
  ["ask-section", "recipe-section", "form-section",
189
  "profile-section", "compare-section", "inspector-section",
190
+ "diagnose-section", "phase-section", "unmask-section",
191
+ "template-section"].forEach(id => {
192
  const el = $(id);
193
  if (el) el.style.display = "none";
194
  });
 
197
  ask: "ask-section", recipe: "recipe-section", profile: "profile-section",
198
  compare: "compare-section", inspector: "inspector-section",
199
  diagnose: "diagnose-section", phase: "phase-section", unmask: "unmask-section",
200
+ template: "template-section",
201
  };
202
  const sectionId = sectionMap[mode];
203
  if (sectionId) $(sectionId).style.display = "";
 
584
  if (e.key === "Enter") { e.preventDefault(); runUnmaskFromId(); }
585
  });
586
 
587
+ // ════════════════════════════════════════════════════════════════════
588
+ // 📜 Chat-template Sniffer (v0.7.1 anti-bullshit pack #2)
589
+ // ════════════════════════════════════════════════════════════════════
590
+
591
+ const TEMPLATE_VERDICT_COLOR = {
592
+ ok: "#3fb950",
593
+ custom: "#f1c40f",
594
+ missing: "#f85149",
595
+ base_model: "#8b949e",
596
+ unknown: "#8b949e",
597
+ };
598
+
599
+ async function fetchHfTokenizerConfig(modelId) {
600
+ const url = `https://huggingface.co/${modelId}/raw/main/tokenizer_config.json`;
601
+ const resp = await fetch(url);
602
+ if (!resp.ok) {
603
+ if (resp.status === 401 || resp.status === 403) {
604
+ throw new Error(`Model is gated (${resp.status}). Accept license on HF Hub first.`);
605
+ }
606
+ throw new Error(`HTTP ${resp.status} — tokenizer_config.json not found at ${url}`);
607
+ }
608
+ return await resp.json();
609
+ }
610
+
611
+ function renderTemplateCard(result, modelId = "") {
612
+ const color = TEMPLATE_VERDICT_COLOR[result.verdict] || TEMPLATE_VERDICT_COLOR.unknown;
613
+ const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
614
+ ({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
615
+
616
+ const verdictLabel = t(`template.verdict.${result.verdict}`) || result.verdict;
617
+ const labelFamily = t("template.label.family") || "Detected family";
618
+ const labelMarkers = t("template.label.markers") || "Matched markers";
619
+ const labelTplLen = t("template.label.tpl_len") || "Template length";
620
+ const sectionWarn = t("template.section.warnings") || "Warnings";
621
+ const sectionCmd = t("template.section.commands") || "Commands by framework";
622
+ const sectionRaw = t("template.section.raw") || "Raw template (preview)";
623
+
624
+ // Human-readable family name
625
+ const familyName = result.detectedLabel
626
+ ? result.detectedLabel
627
+ : (result.detectedFamily === "custom" ? (t("template.family.custom") || "custom (unknown family)")
628
+ : (t("template.family.none") || "(no chat_template)"));
629
+
630
+ const warningsHtml = result.warnings.length
631
+ ? `<details class="unmask-panel" open>
632
+ <summary class="unmask-panel-title">${sectionWarn}</summary>
633
+ <ul>${result.warnings.map(w => `<li>${tFmt("template.warn." + w.code, w.params)}</li>`).join("")}</ul>
634
+ </details>`
635
+ : "";
636
+
637
+ // Framework commands — only show when we have a chat_template to apply.
638
+ let cmdHtml = "";
639
+ if (result.hasChatTemplate) {
640
+ const lmEvalCmd = "lm_eval --model hf --model_args pretrained=" + (modelId || "MODEL_ID") +
641
+ " --tasks gsm8k --apply_chat_template --batch_size 8";
642
+ const vllmCmd = result.vllmTemplate
643
+ ? `vllm serve ${modelId || "MODEL_ID"} --chat-template ${result.vllmTemplate}`
644
+ : `vllm serve ${modelId || "MODEL_ID"} # template auto-detected from tokenizer_config`;
645
+ const transformersCmd =
646
+ `from transformers import AutoTokenizer\n` +
647
+ `tok = AutoTokenizer.from_pretrained("${modelId || "MODEL_ID"}")\n` +
648
+ `prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)`;
649
+
650
+ cmdHtml = `
651
+ <details class="unmask-panel" open>
652
+ <summary class="unmask-panel-title">${sectionCmd}</summary>
653
+ <div class="template-cmd-block">
654
+ <div class="template-cmd-label">lm-evaluation-harness</div>
655
+ <pre class="template-cmd"><code>${escapeHtml(lmEvalCmd)}</code></pre>
656
+ <div class="template-cmd-label">vLLM serve</div>
657
+ <pre class="template-cmd"><code>${escapeHtml(vllmCmd)}</code></pre>
658
+ <div class="template-cmd-label">transformers (Python)</div>
659
+ <pre class="template-cmd"><code>${escapeHtml(transformersCmd)}</code></pre>
660
+ </div>
661
+ </details>
662
+ `;
663
+ }
664
+
665
+ // Raw preview only when present
666
+ const rawHtml = result.rawTemplate
667
+ ? `<details class="unmask-panel">
668
+ <summary class="unmask-panel-title">${sectionRaw}</summary>
669
+ <pre class="template-cmd"><code>${escapeHtml(result.rawTemplate)}</code></pre>
670
+ </details>`
671
+ : "";
672
+
673
+ return `
674
+ <div class="unmask-result">
675
+ <div class="unmask-hero" style="border-color: ${color};">
676
+ <div class="unmask-verdict" style="color: ${color};">${verdictLabel}</div>
677
+ ${modelId ? `<div class="unmask-model"><code>${escapeHtml(modelId)}</code></div>` : ""}
678
+ <div class="unmask-numbers">
679
+ <div><span class="unmask-num-label">${labelFamily}</span><span class="unmask-num-val">${escapeHtml(familyName)}</span></div>
680
+ <div><span class="unmask-num-label">${labelMarkers}</span><span class="unmask-num-val">${result.matchedMarkers.length}</span></div>
681
+ <div><span class="unmask-num-label">${labelTplLen}</span><span class="unmask-num-val">${result.rawTemplateLength.toLocaleString()}</span></div>
682
+ </div>
683
+ </div>
684
+
685
+ <div class="unmask-details">
686
+ ${warningsHtml}
687
+ ${cmdHtml}
688
+ ${rawHtml}
689
+ </div>
690
+ </div>
691
+ `;
692
+ }
693
+
694
+ async function runTemplateFromId() {
695
+ const modelId = ($("template-id").value || "").trim();
696
+ if (!modelId) {
697
+ $("template-status").textContent = t("template.status.empty_id") || "⚠ Enter a model id.";
698
+ return;
699
+ }
700
+ $("template-status").textContent = tFmt("template.status.fetching", { modelId });
701
+ $("template-fetch-btn").disabled = true;
702
+ try {
703
+ const cfg = await fetchHfTokenizerConfig(modelId);
704
+ const result = sniffChatTemplate(cfg);
705
+ $("template-output").innerHTML = renderTemplateCard(result, modelId);
706
+ const verdictLocalized = t(`template.verdict.${result.verdict}`) || result.verdict;
707
+ $("template-status").textContent = tFmt("template.status.success", { modelId, verdict: verdictLocalized });
708
+ } catch (err) {
709
+ $("template-status").textContent = `❌ ${err.message}`;
710
+ $("template-output").innerHTML = "";
711
+ } finally {
712
+ $("template-fetch-btn").disabled = false;
713
+ }
714
+ }
715
+
716
+ function runTemplateFromPaste() {
717
+ const raw = ($("template-paste").value || "").trim();
718
+ if (!raw) {
719
+ $("template-status").textContent = t("template.status.empty_paste") || "⚠ Paste a tokenizer_config.json first.";
720
+ return;
721
+ }
722
+ let cfg;
723
+ try {
724
+ cfg = JSON.parse(raw);
725
+ } catch (e) {
726
+ $("template-status").textContent = tFmt("template.status.invalid_json", { error: e.message });
727
+ return;
728
+ }
729
+ const result = sniffChatTemplate(cfg);
730
+ const pastedLabel = t("template.pasted_label") || "(pasted config)";
731
+ $("template-output").innerHTML = renderTemplateCard(result, pastedLabel);
732
+ const verdictLocalized = t(`template.verdict.${result.verdict}`) || result.verdict;
733
+ $("template-status").textContent = tFmt("template.status.success_paste", { verdict: verdictLocalized });
734
+ }
735
+
736
+ $("template-fetch-btn")?.addEventListener("click", runTemplateFromId);
737
+ $("template-paste-btn")?.addEventListener("click", runTemplateFromPaste);
738
+ $("template-id")?.addEventListener("keydown", (e) => {
739
+ if (e.key === "Enter") { e.preventDefault(); runTemplateFromId(); }
740
+ });
741
+
742
  function configToPreset(cfg, modelId) {
743
  const n_attn = cfg.num_attention_heads || cfg.n_head || 0;
744
  const n_kv = cfg.num_key_value_heads || cfg.num_attention_heads || cfg.n_head || 0;
style.css CHANGED
@@ -33,6 +33,33 @@
33
  flex: 1;
34
  }
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  /* v0.7.0 — Unmask mode (SWA + RoPE-scaling detector) */
37
  .unmask-result {
38
  margin-top: 0.8em;
 
33
  flex: 1;
34
  }
35
 
36
+ /* v0.7.1 — Chat-template Sniffer mode */
37
+ .template-cmd-block {
38
+ display: flex;
39
+ flex-direction: column;
40
+ gap: 0.5em;
41
+ }
42
+ .template-cmd-label {
43
+ font-size: 0.78em;
44
+ font-weight: 600;
45
+ color: #58a6ff;
46
+ text-transform: uppercase;
47
+ letter-spacing: 0.04em;
48
+ margin-top: 0.4em;
49
+ }
50
+ .template-cmd {
51
+ margin: 0;
52
+ padding: 0.6em 0.8em;
53
+ background: rgba(0, 0, 0, 0.35);
54
+ border: 1px solid rgba(255, 255, 255, 0.06);
55
+ border-radius: 6px;
56
+ font-family: monospace;
57
+ font-size: 0.85em;
58
+ line-height: 1.45;
59
+ white-space: pre-wrap;
60
+ overflow-x: auto;
61
+ }
62
+
63
  /* v0.7.0 — Unmask mode (SWA + RoPE-scaling detector) */
64
  .unmask-result {
65
  margin-top: 0.8em;