// TAF Agent — main orchestration (v0.2 — i18n + Profile + Compare) // // Phases: // 1. Pyodide loads + TAF formulas → deterministic computation // 2. WebLLM loads on demand → plain-English synthesis // 3. Router (LLM) → free-form question → recipe + params // 4. Modes: Profile (all recipes) + Compare (multi-model side-by-side) // 5. i18n: EN/ES/FR/ZH import { initI18n, setLang, t } from "./i18n.js"; import { initPhaseDiagram } from "./phase_diagram.js"; import { gammaCheckAll, REGIME_META } from "./gamma_check.js"; import { loadLeanManifest, badgeHtml, badgesForUiBinding, renderTheoremTable, getManifest } from "./lean_badges.js"; import { unmaskConfig } from "./swa_unmasker.js"; import { sniffChatTemplate } from "./chat_template_sniffer.js"; import { parseVotesCSV, computeArenaCI, SAMPLE_VOTES_CSV } from "./arena_ci.js"; import { rateAllBenchmarks, BENCHMARK_DB } from "./contamination_prior.js"; import { predictQuantShift, predictAllSchemes, QUANT_SCHEMES } from "./quant_regime.js"; import { attachAllHfAutocompletes } from "./hf_autocomplete.js"; import { computeDriftBound, FRAMEWORKS as DRIFT_FRAMEWORKS, DTYPES as DRIFT_DTYPES } from "./cross_drift.js"; import { predictNIAHReasoning, sweepContextLengths, loadRulerKB, calibrateNIAH, listRulerModels } from "./niah_reasoning.js"; import { loadSaturationKB, classifyAll, classifyBenchmark, listBenchmarks, attribution as saturationAttribution, tryFetchLive, } from "./saturation_detector.js"; import { loadHub, listCategories, listEntries, searchEntries, hubStats, getCategoryMeta, } from "./solutions_hub.js"; import { lintJsonCot, reorderJsonText, classifyFieldName } from "./json_cot_linter.js"; import { lintPeftCode, ARCH_TARGET_MODULES } from "./peft_anti_pattern.js"; import { diffPromptCache, PROVIDERS as CACHE_PROVIDERS } from "./prompt_cache_diff.js"; import { checkCompatibility as specCheckCompat, parseParamHint } from "./spec_decode_compat.js"; import { tokenizeAll, detectLanguageBlocks, PRESET_TOKENIZERS as TAX_PRESETS, SAMPLE_TEXTS as TAX_SAMPLES, } from "./tokenizer_tax.js"; import { loadKB as loadLongscoreKB, lookup as longscoreLookup, rank as longscoreRank, } from "./longscore.js"; // Attach HF Hub search-as-you-type to all 5 model id inputs (Profile, Recipe, // Unmask, Template, Quant). Hits public huggingface.co/api/models. Idempotent. attachAllHfAutocompletes(); const TAF_BROWSER_URL = "python/taf_browser.py"; const ENABLE_WEBLLM = true; // Smaller model = fits in default browser quota (~350MB vs 700MB for Llama-1B) const WEBLLM_MODEL = "Qwen2.5-0.5B-Instruct-q4f16_1-MLC"; const WEBLLM_FALLBACK = "SmolLM2-360M-Instruct-q4f16_1-MLC"; const $ = (id) => document.getElementById(id); const state = { pyodide: null, webllm: null, presets: [], recipes: [], recipesById: {}, currentMode: "ask", currentRecipe: null, }; const EXAMPLES = [ "Will Meta-Llama-3-8B handle 32000-token NIAH retrieval reliably?", "I have $5000 to spend on training. What model can I afford?", "Should I use Mistral-7B-v0.1 at 16K context or extend it first?", "Compare cheapest GPU to serve Llama-3-8B at 10 million tokens per day.", "Should I use soft KV decay or hard cutoff for Qwen2.5-7B at 32K?", "Is it cheaper to train an 8B custom model or use GPT-4o for 50M tokens/month?", ]; // ════════════════════════════════════════════════════════════════════ // Bootstrap // ════════════════════════════════════════════════════════════════════ function showLoadingBar(show, progress=null) { const wrap = $("loading-bar-wrap"); const bar = $("loading-bar"); if (!wrap || !bar) return; if (!show) { wrap.style.display = "none"; return; } wrap.style.display = "block"; if (progress === null) { bar.classList.add("indeterminate"); bar.style.width = "100%"; } else { bar.classList.remove("indeterminate"); bar.style.width = `${Math.min(100, Math.max(0, progress * 100))}%`; } } async function loadPyodideAndTaf() { showLoadingBar(true, null); setStatus(t("status.loading_pyodide")); state.pyodide = await loadPyodide({ indexURL: "https://cdn.jsdelivr.net/pyodide/v0.26.4/full/", }); showLoadingBar(true, 0.5); setStatus(t("status.loading_taf")); const tafCode = await fetch(TAF_BROWSER_URL).then(r => r.text()); await state.pyodide.runPythonAsync(tafCode); state.presets = JSON.parse(state.pyodide.runPython("list_presets()")); state.recipes = JSON.parse(state.pyodide.runPython("list_recipes()")); state.recipesById = Object.fromEntries(state.recipes.map(r => [r.id, r])); showLoadingBar(true, 0.95); populatePresets(); populateRecipes(); enableUI(); showLoadingBar(false); setStatus(t("status.ready")); } function populatePresets() { // Recipe form preset ["preset", "profile-preset"].forEach(id => { const sel = $(id); if (!sel) return; sel.innerHTML = ''; state.presets.forEach(p => { const opt = document.createElement("option"); opt.value = p.id; opt.textContent = `${p.label} (θ=${p.theta.toLocaleString()}, T_train=${p.T_train})`; sel.appendChild(opt); }); }); // Compare slot presets document.querySelectorAll(".compare-preset").forEach(sel => { sel.innerHTML = ''; state.presets.forEach(p => { const opt = document.createElement("option"); opt.value = p.id; opt.textContent = p.label; sel.appendChild(opt); }); }); } function populateRecipes() { ["recipe-select", "compare-recipe"].forEach(id => { const sel = $(id); if (!sel) return; sel.innerHTML = ''; state.recipes.forEach(r => { const opt = document.createElement("option"); opt.value = r.id; opt.textContent = `${r.id} — ${r.name}`; sel.appendChild(opt); }); }); } function enableUI() { $("ask-btn").disabled = false; $("recipe-select").disabled = false; $("preset").disabled = false; $("profile-preset").disabled = false; $("profile-btn").disabled = false; $("compare-recipe").disabled = false; $("compare-btn").disabled = false; $("inspector-btn").disabled = false; // Render community feed + falsification (independent of Pyodide) renderFalsificationDashboard(); loadCommunityFeed(); // Restore from URL if present parseUrlState(); } function setStatus(msg) { $("status").textContent = msg; } // ════════════════════════════════════════════════════════════════════ // Main-panel wrap: every
section gets a foldable details/summary // shell at runtime so users can collapse any panel they don't need open. // h2 is moved INTO summary so its data-i18n binding survives. Idempotent. // ════════════════════════════════════════════════════════════════════ function wrapMainSectionsAsFoldable() { document.querySelectorAll("main > section").forEach(section => { if (section.id === "status-bar") return; // skip loading bar if (section.querySelector(":scope > details.main-panel")) return; // already wrapped const h2 = section.querySelector(":scope > h2"); if (!h2) return; const details = document.createElement("details"); details.className = "main-panel"; details.open = true; const summary = document.createElement("summary"); summary.className = "main-panel-title"; summary.appendChild(h2); // preserve h2 + its data-i18n + all children details.appendChild(summary); while (section.firstChild) details.appendChild(section.firstChild); section.appendChild(details); }); // Stop ⓘ tooltip clicks inside summaries from toggling the panel. document.querySelectorAll(".main-panel > .main-panel-title .info").forEach(el => { el.addEventListener("click", (e) => e.stopPropagation()); }); } wrapMainSectionsAsFoldable(); // v0.7.7 — task-tiles is the primary entry point; collapse the legacy 14-tab // strip by default so users don't see duplicated navigation. Power users can // still expand it with one click. const __modeDetails = document.querySelector("#mode-section > details.main-panel"); if (__modeDetails) __modeDetails.open = false; // ════════════════════════════════════════════════════════════════════ // Mode toggle // ════════════════════════════════════════════════════════════════════ // v0.7.7 — task tiles: clicking a tile-mode-link button triggers the equivalent mode-btn. // Reuses the mode switcher entirely (no duplicate state). Smoothly scrolls to the // activated section so the user immediately sees the form they expected. document.addEventListener("click", (e) => { const linkBtn = e.target.closest("[data-mode-link]"); if (!linkBtn) return; const targetMode = linkBtn.dataset.modeLink; const targetTab = document.querySelector(`.mode-btn[data-mode="${targetMode}"]`); if (targetTab) { targetTab.click(); // Scroll the activated section into view so the tile click feels responsive. const sectionId = { ask: "ask-section", recipe: "recipe-section", profile: "profile-section", compare: "compare-section", inspector: "inspector-section", diagnose: "diagnose-section", phase: "phase-section", unmask: "unmask-section", template: "template-section", arena: "arena-section", contam: "contam-section", quant: "quant-section", drift: "drift-section", niah: "niah-section", saturation: "saturation-section", cot: "cot-section", peft: "peft-section", cache: "cache-section", speculative: "speculative-section", tax: "tax-section", longscore: "longscore-section", hub: "hub-section", }[targetMode]; if (sectionId) { const sec = document.getElementById(sectionId); if (sec) sec.scrollIntoView({ behavior: "smooth", block: "start" }); } } }); document.querySelectorAll(".mode-btn").forEach(btn => { btn.addEventListener("click", () => { document.querySelectorAll(".mode-btn").forEach(b => { b.classList.remove("active"); b.setAttribute("aria-selected", "false"); }); btn.classList.add("active"); btn.setAttribute("aria-selected", "true"); const mode = btn.dataset.mode; state.currentMode = mode; // Hide all mode sections ["ask-section", "recipe-section", "form-section", "profile-section", "compare-section", "inspector-section", "diagnose-section", "phase-section", "unmask-section", "template-section", "arena-section", "contam-section", "quant-section", "drift-section", "niah-section", "saturation-section", "cot-section", "peft-section", "cache-section", "speculative-section", "tax-section", "longscore-section", "hub-section"].forEach(id => { const el = $(id); if (el) el.style.display = "none"; }); // Show selected const sectionMap = { ask: "ask-section", recipe: "recipe-section", profile: "profile-section", compare: "compare-section", inspector: "inspector-section", diagnose: "diagnose-section", phase: "phase-section", unmask: "unmask-section", template: "template-section", arena: "arena-section", contam: "contam-section", quant: "quant-section", drift: "drift-section", niah: "niah-section", saturation: "saturation-section", cot: "cot-section", peft: "peft-section", cache: "cache-section", speculative: "speculative-section", tax: "tax-section", longscore: "longscore-section", hub: "hub-section", }; const sectionId = sectionMap[mode]; if (sectionId) $(sectionId).style.display = ""; $("mode-desc").textContent = t(`mode_desc.${mode}`) || ""; if (mode === "phase") initPhaseDiagram(); if (mode === "saturation") initSaturation(); if (mode === "cot") initCot(); if (mode === "peft") initPeft(); if (mode === "cache") initCacheDiff(); if (mode === "speculative") initSpeculative(); if (mode === "tax") initTax(); if (mode === "longscore") initLongscore(); if (mode === "hub") initHub(); }); }); // ════════════════════════════════════════════════════════════════════ // Diagnose mode: build the diagnose_model.py CLI command // ════════════════════════════════════════════════════════════════════ function buildDiagnoseCommand() { const model = ($("diag-model")?.value || "").trim(); if (!model) { return "# Please enter a HuggingFace model id"; } const theta = ($("diag-theta")?.value || "").trim(); const N = ($("diag-N")?.value || "2000").trim(); const local = ($("diag-local")?.value || "").trim(); const fast = $("diag-fast")?.checked; const cpu = $("diag-cpu")?.checked; const fourbit = $("diag-4bit")?.checked; const parts = ["python cli/diagnose_model.py"]; parts.push(`--model ${model}`); if (theta) parts.push(`--theta ${theta}`); if (N && N !== "2000") parts.push(`--N ${N}`); if (local) parts.push(`--local "${local}"`); if (fast) parts.push("--fast"); if (cpu) parts.push("--cpu"); if (fourbit) parts.push("--load_in_4bit"); return parts.join(" \\\n "); } const _diagBuildBtn = $("diag-build-btn"); if (_diagBuildBtn) { _diagBuildBtn.addEventListener("click", () => { const cmd = buildDiagnoseCommand(); $("diag-cmd").textContent = cmd; $("diag-output").style.display = ""; }); } const _diagCopyBtn = $("diag-copy-btn"); if (_diagCopyBtn) { _diagCopyBtn.addEventListener("click", async () => { const cmd = $("diag-cmd").textContent; if (!cmd) return; try { await navigator.clipboard.writeText(cmd); _diagCopyBtn.textContent = "✓ Copied"; setTimeout(() => { _diagCopyBtn.textContent = (window.t ? window.t("diagnose.copy_btn") : "📋 Copy to clipboard"); }, 1800); } catch (e) { _diagCopyBtn.textContent = "✗ Copy failed (browser blocks)"; } }); } // Make sure inspector section is hidden initially const _inspectorSection = $("inspector-section"); if (_inspectorSection) _inspectorSection.style.display = "none"; // ════════════════════════════════════════════════════════════════════ // Recipe selector // ════════════════════════════════════════════════════════════════════ $("recipe-select").addEventListener("change", (e) => { const rid = e.target.value; if (!rid) { $("form-section").style.display = "none"; return; } const r = state.recipesById[rid]; state.currentRecipe = r; $("recipe-desc-display").textContent = r.description; $("form-section").style.display = ""; buildDynamicForm(r); }); function buildDynamicForm(recipe) { const container = $("dynamic-form"); container.innerHTML = ""; const defaults = getRecipeDefaults(recipe.id); recipe.params.forEach(name => { const div = document.createElement("div"); div.className = "form-field"; const labelWrap = document.createElement("label"); labelWrap.htmlFor = `param_${name}`; labelWrap.innerHTML = paramLabel(name); if (PARAM_TOOLTIPS[name]) { const info = document.createElement("span"); info.className = "info"; info.innerHTML = `${PARAM_TOOLTIPS[name]}`; labelWrap.appendChild(info); } div.appendChild(labelWrap); const input = document.createElement("input"); input.type = "text"; input.id = `param_${name}`; input.dataset.param = name; input.value = defaults[name] !== undefined ? String(defaults[name]) : ""; div.appendChild(input); container.appendChild(div); }); $("run-btn").disabled = false; } function paramLabel(name) { const labels = { theta: "θ (rope_theta)", T_train: "T_train", T_eval: "T_eval (target context)", n_attention_heads: "num_attention_heads", n_kv_heads: "num_key_value_heads", d_head: "head_dim", n_layers: "num_hidden_layers", n_params: "n_params (e.g. 8e9)", has_SWA: "Has SWA? (true/false)", N_params: "N_params (e.g. 8e9)", D_tokens: "D_tokens (or empty for Chinchilla)", gpu: "GPU", n_gpus: "n_gpus", mfu: "MFU (default 0.45)", api_model: "API model to compare", monthly_tokens_M: "Monthly tokens (M)", USD_budget: "USD budget", bytes_per_weight: "Bytes per weight (BF16=2)", target_tokens_per_day: "Target tokens/day", concurrent_users: "Concurrent users", }; return labels[name] || name; } const PARAM_TOOLTIPS = { theta: "RoPE base frequency. From config.rope_theta. Higher = more long-range capacity. Typical: 10000 early models, 500000 Llama-3, 1000000 Qwen2.5.", T_train: "Max context the model was trained on. From max_position_embeddings. The model has never seen positions beyond this; extrapolating much further usually fails.", T_eval: "Your target inference context length. The key knob. The whole question is: will the model behave well at this length?", n_attention_heads: "Number of query heads. From num_attention_heads.", n_kv_heads: "Number of K/V heads. If < n_attention_heads → model uses GQA (Grouped Query Attention). Smaller = more memory-efficient KV cache but pushes γ toward Hagedorn boundary.", d_head: "Per-head dimension. Typically hidden_size / n_attention_heads. Common: 64, 80, 128.", n_layers: "Number of transformer layers. From num_hidden_layers.", n_params: "Total parameter count. Use scientific notation: 8e9 for 8B. Threshold ~400M is the induction-head emergence boundary (sign-flip in Δγ).", has_SWA: "Sliding Window Attention. true for Mistral, gemma-2, phi-3. SWA lowers γ_decomposition by ~0.21.", N_params: "Same as n_params. Total parameter count, scientific notation (e.g. 8e9).", D_tokens: "Number of training tokens. Leave empty to use Chinchilla 20:1 default (D = 20·N).", gpu: "GPU model from the catalog. Options: H100 SXM, H100 PCIe, H200, B200, A100 80GB, A100 40GB, L40S, MI300X, RTX 4090, RTX 5090, RTX 5060Ti.", n_gpus: "Number of GPUs in your training/serving cluster.", mfu: "Model FLOPs Utilization. Realistic fraction of peak FLOPs achieved. Typical: 0.4-0.5 for well-tuned. Default 0.45.", api_model: "Frontier API to compare against. Options: GPT-4o, GPT-4o-mini, Claude-Opus-4, Claude-Sonnet-4, Claude-Haiku-4, Gemini-1.5-Pro, DeepSeek-V3, Llama-3.3-70B (Together).", monthly_tokens_M: "Expected monthly token volume in millions. e.g. 10 = 10 million tokens/month.", USD_budget: "Your training budget in US dollars (no symbol). e.g. 5000 for $5K.", bytes_per_weight: "Memory per parameter. BF16/FP16 = 2, INT8 = 1, INT4 = 0.5.", target_tokens_per_day: "How many tokens/day you need to serve. e.g. 10000000 = 10M tokens/day.", concurrent_users: "Simultaneous concurrent requests. Affects KV cache memory needed.", }; function getRecipeDefaults(recipeId) { const D = { "X-1": { N_params: "8e9", D_tokens: "", gpu: "H100 SXM", n_gpus: 8, mfu: 0.45, api_model: "GPT-4o", monthly_tokens_M: 10.0 }, "X-2": { theta: 500000, T_train: 8192, T_eval: 32000, n_attention_heads: 32, n_kv_heads: 8, d_head: 128, n_layers: 32, n_params: "8e9", has_SWA: false }, "X-3": { USD_budget: 5000, gpu: "H100 SXM", mfu: 0.45, n_gpus: 1 }, "X-5": { N_params: "8e9", T_eval: 4096, n_layers: 32, n_kv_heads: 8, d_head: 128, bytes_per_weight: 2.0, target_tokens_per_day: 10000000, concurrent_users: 1 }, "X-19": { theta: 500000, T_train: 8192, T_eval: 8192, n_attention_heads: 32, n_kv_heads: 8, d_head: 128, n_layers: 32, n_params: "8e9", has_SWA: false }, }; return D[recipeId] || {}; } // ════════════════════════════════════════════════════════════════════ // Preset autofill (works in recipe mode) // ════════════════════════════════════════════════════════════════════ $("preset").addEventListener("change", (e) => { if (!e.target.value) return; const modelId = e.target.value; state.lastModelId = modelId; // remember for filename/hash // Mirror behavior with profile-preset: also fill HF id input if present. if ($("hf-id")) { $("hf-id").value = modelId; if ($("hf-status")) $("hf-status").textContent = tFmt("profile.preset_loaded", { id: modelId }); } const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(modelId)})`); const preset = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy; if (!preset || Object.keys(preset).length === 0) return; fillRecipeForm(preset); }); function fillRecipeForm(p) { // Fill any matching field in dynamic form Object.entries(p).forEach(([k, v]) => { const map = { theta: "theta", T_train: "T_train", n_attention_heads: "n_attention_heads", n_kv_heads: "n_kv_heads", d_head: "d_head", n_layers: "n_layers", n_params: "n_params", has_SWA: "has_SWA", }; const formId = "param_" + (map[k] || k); const el = $(formId); if (el) el.value = (typeof v === "number" && (k === "n_params" || v > 1e6)) ? v.toExponential(2) : String(v); // Also fill N_params for cost recipes if (k === "n_params") { const np = $("param_N_params"); if (np) np.value = (typeof v === "number" ? v.toExponential(2) : String(v)); } }); } // ════════════════════════════════════════════════════════════════════ // HF Hub fetch (any model) // ════════════════════════════════════════════════════════════════════ // Build the same unsloth mirror candidates used in spec-decode. Lets us // fetch config.json for gated families (Llama / Mistral / Gemma) without // requiring HF auth — the unsloth redistributions are public and ship the // original config.json verbatim (they only quantize weights, not metadata). function _hfMirrorCandidates(modelId) { const last = modelId.split("/").slice(-1)[0]; if (!last) return []; const out = [ `unsloth/${last}`, last.startsWith("Meta-") ? null : `unsloth/Meta-${last}`, `unsloth/${last}-bnb-4bit`, last.startsWith("Meta-") ? null : `unsloth/Meta-${last}-bnb-4bit`, ].filter(c => c && c !== modelId); // Dedupe in case last starts with Meta- already. return [...new Set(out)]; } async function _tryConfigUrl(modelId) { // /resolve/main/ rather than /raw/main/ — same lesson as spec-decode: // /resolve follows LFS for large files (irrelevant for config.json which // is always small, but consistent & future-proof). CORS is granted on both. const url = `https://huggingface.co/${modelId}/resolve/main/config.json`; const resp = await fetch(url); if (!resp.ok) return { ok: false, status: resp.status }; try { const j = await resp.json(); return { ok: true, data: j }; } catch (e) { return { ok: false, error: "parse_failed" }; } } async function fetchHfConfig(modelId) { // 1. Try the user-pasted id directly. let r = await _tryConfigUrl(modelId); if (r.ok) return r.data; // 2. On 401/403, try open-mirror fallback (unsloth/...). On other // errors (404/network/parse), surface as before — mirror won't help. if (r.status === 401 || r.status === 403) { for (const cand of _hfMirrorCandidates(modelId)) { const m = await _tryConfigUrl(cand); if (m.ok) { // Stamp the mirror id so callers can surface a "fetched via mirror" // hint if they want; backwards-compatible with code that ignores it. m.data.__via_mirror = cand; m.data.__mirror_of = modelId; return m.data; } } const err = new Error(`🔒 ${modelId} is gated — accept license at https://huggingface.co/${modelId}`); err.code = "gated"; err.modelId = modelId; throw err; } throw new Error(`HTTP ${r.status} — config.json not found at https://huggingface.co/${modelId}/resolve/main/config.json`); } $("hf-fetch-btn").addEventListener("click", async () => { const modelId = $("hf-id").value.trim(); if (!modelId) { $("hf-status").textContent = "⚠ Enter a model id like 'Qwen/Qwen2.5-32B-Instruct'"; return; } $("hf-status").textContent = `⏳ Fetching config.json from HF Hub for ${modelId}...`; $("hf-fetch-btn").disabled = true; state.lastModelId = modelId; // remember for filename/hash try { const cfg = await fetchHfConfig(modelId); const preset = configToPreset(cfg, modelId); fillRecipeForm(preset); $("hf-status").innerHTML = `✅ Config loaded for ${modelId} (family: ${preset._family}). Verify values, click Analyze.`; } catch (err) { $("hf-status").textContent = `❌ ${err.message}`; } finally { $("hf-fetch-btn").disabled = false; } }); // ════════════════════════════════════════════════════════════════════ // 🪟 Unmask mode (v0.7.0 anti-bullshit pack #1) // ════════════════════════════════════════════════════════════════════ // Tiny string-template helper: t(key) with {placeholder} substitution. // Falls back to the raw key when the i18n entry is missing so dev sees the gap. function tFmt(key, params = {}) { let s = t(key) || key; for (const [k, v] of Object.entries(params)) { const fmtVal = v === null || v === undefined ? "—" : (typeof v === "number" ? v.toLocaleString() : String(v)); s = s.replace(new RegExp(`\\{${k}\\}`, "g"), fmtVal); } return s; } const VERDICT_COLOR = { honest: "#3fb950", inflated: "#f1c40f", severely_inflated: "#f85149", yarn_extended: "#f1c40f", unknown: "#8b949e", }; function renderUnmaskCard(result, modelId = "") { const color = VERDICT_COLOR[result.verdict] || VERDICT_COLOR.unknown; const ratioPct = (result.ratio * 100).toFixed(1); const f = result.flags; const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString(); const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); const verdictLabel = t(`unmask.verdict.${result.verdict}`) || result.verdict; const labelDeclared = t("unmask.label.declared") || "Declared context"; const labelEffective = t("unmask.label.effective") || "Effective (estimate)"; const labelRatio = t("unmask.label.ratio") || "Ratio"; const sectionFlags = t("unmask.section.flags") || "Architecture flags"; const sectionWarn = t("unmask.section.warnings")|| "Warnings"; const sectionReco = t("unmask.section.reco") || "Recommendation"; // Architecture flags row labels const flagSwa = t("unmask.flag.swa") || "SWA"; const flagRope = t("unmask.flag.rope") || "RoPE scaling"; const flagGqa = t("unmask.flag.gqa") || "GQA"; const flagLayers = t("unmask.flag.layers") || "Layers"; const flagDhead = t("unmask.flag.dhead") || "d_head"; const flagTheta = t("unmask.flag.theta") || "RoPE θ"; const flagYes = t("unmask.flag.yes") || "yes"; const flagNo = t("unmask.flag.no") || "no"; const swaText = f.hasSWA ? `${flagYes} (window = ${fmtN(f.swaWindow)})` : flagNo; const ropeText = f.hasYaRN ? `${f.ropeScalingType} (factor = ${f.yarnFactor}, original = ${fmtN(f.yarnOriginal)})` : flagNo; const gqaText = f.hasGQA ? `${flagYes} (${f.n_kv_heads} kv / ${f.n_attn_heads} attn heads)` : (t("unmask.flag.full_mha") || "no (full MHA, {n} heads)").replace("{n}", f.n_attn_heads ?? "?"); const warningsHtml = result.warnings.length ? `
${sectionWarn}
` : ""; const recoHtml = result.recoCode ? `
${sectionReco}

${tFmt("unmask.reco." + result.recoCode, result.recoParams)}

` : ""; return `
${verdictLabel}
${modelId ? `
${escapeHtml(modelId)}
` : ""}
${labelDeclared}${fmtN(result.declaredContext)}
${labelEffective}${fmtN(result.effectiveContext)}
${labelRatio}${ratioPct}%
${sectionFlags}
  • ${flagSwa}: ${swaText}
  • ${flagRope}: ${ropeText}
  • ${flagGqa}: ${gqaText}
  • ${flagLayers}: ${fmtN(f.n_layers)} · ${flagDhead}: ${fmtN(f.d_head)} · ${flagTheta}: ${fmtN(f.rope_theta)}
${warningsHtml} ${recoHtml}
`; } async function runUnmaskFromId() { const modelId = ($("unmask-id").value || "").trim(); if (!modelId) { $("unmask-status").textContent = t("unmask.status.empty_id") || "⚠ Enter a model id."; return; } $("unmask-status").textContent = tFmt("unmask.status.fetching", { modelId }); $("unmask-fetch-btn").disabled = true; try { const cfg = await fetchHfConfig(modelId); const result = unmaskConfig(cfg); $("unmask-output").innerHTML = renderUnmaskCard(result, modelId); const verdictLocalized = t(`unmask.verdict.${result.verdict}`) || result.verdict; $("unmask-status").textContent = tFmt("unmask.status.success", { modelId, verdict: verdictLocalized }); } catch (err) { if (err.code === "gated") { $("unmask-status").innerHTML = `🔒 ${err.modelId} ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} huggingface.co/${err.modelId}`; } else { $("unmask-status").textContent = `❌ ${err.message}`; } $("unmask-output").innerHTML = ""; } finally { $("unmask-fetch-btn").disabled = false; } } function runUnmaskFromPaste() { const raw = ($("unmask-paste").value || "").trim(); if (!raw) { $("unmask-status").textContent = t("unmask.status.empty_paste") || "⚠ Paste a config.json first."; return; } let cfg; try { cfg = JSON.parse(raw); } catch (e) { $("unmask-status").textContent = tFmt("unmask.status.invalid_json", { error: e.message }); return; } const result = unmaskConfig(cfg); const pastedLabel = t("unmask.pasted_label") || "(pasted config)"; $("unmask-output").innerHTML = renderUnmaskCard(result, pastedLabel); const verdictLocalized = t(`unmask.verdict.${result.verdict}`) || result.verdict; $("unmask-status").textContent = tFmt("unmask.status.success_paste", { verdict: verdictLocalized }); } $("unmask-fetch-btn")?.addEventListener("click", runUnmaskFromId); $("unmask-paste-btn")?.addEventListener("click", runUnmaskFromPaste); $("unmask-id")?.addEventListener("keydown", (e) => { if (e.key === "Enter") { e.preventDefault(); runUnmaskFromId(); } }); // ════════════════════════════════════════════════════════════════════ // 📜 Chat-template Sniffer (v0.7.1 anti-bullshit pack #2) // ════════════════════════════════════════════════════════════════════ const TEMPLATE_VERDICT_COLOR = { ok: "#3fb950", custom: "#f1c40f", missing: "#f85149", base_model: "#8b949e", unknown: "#8b949e", }; async function fetchHfTokenizerConfig(modelId) { const url = `https://huggingface.co/${modelId}/raw/main/tokenizer_config.json`; const resp = await fetch(url); if (!resp.ok) { if (resp.status === 401 || resp.status === 403) { const err = new Error(`🔒 ${modelId} is gated — accept license at https://huggingface.co/${modelId}`); err.code = "gated"; err.modelId = modelId; throw err; } throw new Error(`HTTP ${resp.status} — tokenizer_config.json not found at ${url}`); } return await resp.json(); } function renderTemplateCard(result, modelId = "") { const color = TEMPLATE_VERDICT_COLOR[result.verdict] || TEMPLATE_VERDICT_COLOR.unknown; const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); const verdictLabel = t(`template.verdict.${result.verdict}`) || result.verdict; const labelFamily = t("template.label.family") || "Detected family"; const labelMarkers = t("template.label.markers") || "Matched markers"; const labelTplLen = t("template.label.tpl_len") || "Template length"; const sectionWarn = t("template.section.warnings") || "Warnings"; const sectionCmd = t("template.section.commands") || "Commands by framework"; const sectionRaw = t("template.section.raw") || "Raw template (preview)"; // Human-readable family name const familyName = result.detectedLabel ? result.detectedLabel : (result.detectedFamily === "custom" ? (t("template.family.custom") || "custom (unknown family)") : (t("template.family.none") || "(no chat_template)")); const warningsHtml = result.warnings.length ? `
${sectionWarn}
` : ""; // Framework commands — only show when we have a chat_template to apply. let cmdHtml = ""; if (result.hasChatTemplate) { const lmEvalCmd = "lm_eval --model hf --model_args pretrained=" + (modelId || "MODEL_ID") + " --tasks gsm8k --apply_chat_template --batch_size 8"; const vllmCmd = result.vllmTemplate ? `vllm serve ${modelId || "MODEL_ID"} --chat-template ${result.vllmTemplate}` : `vllm serve ${modelId || "MODEL_ID"} # template auto-detected from tokenizer_config`; const transformersCmd = `from transformers import AutoTokenizer\n` + `tok = AutoTokenizer.from_pretrained("${modelId || "MODEL_ID"}")\n` + `prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)`; cmdHtml = `
${sectionCmd}
lm-evaluation-harness
${escapeHtml(lmEvalCmd)}
vLLM serve
${escapeHtml(vllmCmd)}
transformers (Python)
${escapeHtml(transformersCmd)}
`; } // Raw preview only when present const rawHtml = result.rawTemplate ? `
${sectionRaw}
${escapeHtml(result.rawTemplate)}
` : ""; return `
${verdictLabel}
${modelId ? `
${escapeHtml(modelId)}
` : ""}
${labelFamily}${escapeHtml(familyName)}
${labelMarkers}${result.matchedMarkers.length}
${labelTplLen}${result.rawTemplateLength.toLocaleString()}
${warningsHtml} ${cmdHtml} ${rawHtml}
`; } async function runTemplateFromId() { const modelId = ($("template-id").value || "").trim(); if (!modelId) { $("template-status").textContent = t("template.status.empty_id") || "⚠ Enter a model id."; return; } $("template-status").textContent = tFmt("template.status.fetching", { modelId }); $("template-fetch-btn").disabled = true; try { const cfg = await fetchHfTokenizerConfig(modelId); const result = sniffChatTemplate(cfg); $("template-output").innerHTML = renderTemplateCard(result, modelId); const verdictLocalized = t(`template.verdict.${result.verdict}`) || result.verdict; $("template-status").textContent = tFmt("template.status.success", { modelId, verdict: verdictLocalized }); } catch (err) { if (err.code === "gated") { $("template-status").innerHTML = `🔒 ${err.modelId} ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} huggingface.co/${err.modelId}`; } else { $("template-status").textContent = `❌ ${err.message}`; } $("template-output").innerHTML = ""; } finally { $("template-fetch-btn").disabled = false; } } function runTemplateFromPaste() { const raw = ($("template-paste").value || "").trim(); if (!raw) { $("template-status").textContent = t("template.status.empty_paste") || "⚠ Paste a tokenizer_config.json first."; return; } let cfg; try { cfg = JSON.parse(raw); } catch (e) { $("template-status").textContent = tFmt("template.status.invalid_json", { error: e.message }); return; } const result = sniffChatTemplate(cfg); const pastedLabel = t("template.pasted_label") || "(pasted config)"; $("template-output").innerHTML = renderTemplateCard(result, pastedLabel); const verdictLocalized = t(`template.verdict.${result.verdict}`) || result.verdict; $("template-status").textContent = tFmt("template.status.success_paste", { verdict: verdictLocalized }); } $("template-fetch-btn")?.addEventListener("click", runTemplateFromId); $("template-paste-btn")?.addEventListener("click", runTemplateFromPaste); $("template-id")?.addEventListener("keydown", (e) => { if (e.key === "Enter") { e.preventDefault(); runTemplateFromId(); } }); // ════════════════════════════════════════════════════════════════════ // 🎯 Arena-Elo CI reconstructor (v0.7.2 anti-bullshit pack #3) // ════════════════════════════════════════════════════════════════════ function renderArenaCard(result) { const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString(); const titleRanked = t("arena.section.ranked") || "Ranked Elos with 95% CIs"; const titleTies = t("arena.section.ties") || "Statistical ties (CI overlap)"; const titleSummary = t("arena.section.summary") || "Summary"; const colRank = t("arena.col.rank") || "#"; const colModel = t("arena.col.model") || "Model"; const colElo = t("arena.col.elo") || "Elo"; const colCi = t("arena.col.ci") || "95% CI"; const colSpread = t("arena.col.ci_width") || "CI width"; const colMatches = t("arena.col.matches") || "Matches"; const colWins = t("arena.col.wins") || "W / L / T"; const noTies = t("arena.no_ties") || "No statistical ties — all pairs distinguishable at 95% CI."; // Ranked table let tableRows = ""; for (const r of result.ratings) { tableRows += ` #${r.rank} ${escapeHtml(r.model)} ${fmtN(r.elo)} [${fmtN(r.ci_low)}, ${fmtN(r.ci_high)}] ±${fmtN(Math.round(r.ci_width / 2 * 10) / 10)} ${fmtN(r.matches)} ${fmtN(r.wins)} / ${fmtN(r.losses)} / ${fmtN(r.ties_count)} `; } // Ties section let tiesHtml = ""; if (result.ties.length === 0) { tiesHtml = `

${noTies}

`; } else { tiesHtml = ``; for (const tieEntry of result.ties) { tiesHtml += ``; } tiesHtml += `
${t("arena.col.tie_pair") || "Pair"} ${t("arena.col.tie_diff") || "Elo gap"} ${t("arena.col.tie_overlap") || "CI overlap"}
#${tieEntry.rank_a} ${escapeHtml(tieEntry.model_a)} vs #${tieEntry.rank_b} ${escapeHtml(tieEntry.model_b)} ${fmtN(Math.round(tieEntry.elo_diff * 10) / 10)} Elo ${fmtN(Math.round(tieEntry.overlap_elo * 10) / 10)} Elo
`; } // Summary panel const s = result.summary; const summaryHtml = ` `; return `
${titleRanked}
${tableRows}
${colRank}${colModel}${colElo} ${colCi}${colSpread} ${colMatches}${colWins}
${titleTies} (${result.ties.length}) ${tiesHtml}
${titleSummary} ${summaryHtml}
`; } function runArenaCompute() { const csv = ($("arena-csv").value || "").trim(); if (!csv) { $("arena-status").textContent = t("arena.status.empty") || "⚠ Paste vote CSV or click Load sample."; return; } let votes; try { votes = parseVotesCSV(csv); } catch (e) { $("arena-status").textContent = `❌ ${e.message}`; return; } if (votes.length < 10) { $("arena-status").textContent = tFmt("arena.status.too_few", { n: votes.length }); return; } $("arena-status").textContent = tFmt("arena.status.computing", { n: votes.length }); // Defer to next tick so the status text actually paints before the heavy bootstrap. setTimeout(() => { const t0 = performance.now(); const result = computeArenaCI(votes, { bootstrapN: 200, ciLevel: 0.95 }); const ms = Math.round(performance.now() - t0); $("arena-output").innerHTML = renderArenaCard(result); $("arena-status").textContent = tFmt("arena.status.done", { n: votes.length, models: result.summary.n_models, ties: result.summary.n_ties, ms, }); }, 30); } $("arena-sample-btn")?.addEventListener("click", () => { $("arena-csv").value = SAMPLE_VOTES_CSV; $("arena-status").textContent = t("arena.status.sample_loaded") || "✅ Sample loaded. Click Compute CIs."; }); $("arena-run-btn")?.addEventListener("click", runArenaCompute); $("arena-clear-btn")?.addEventListener("click", () => { $("arena-csv").value = ""; $("arena-output").innerHTML = ""; $("arena-status").textContent = ""; }); // ════════════════════════════════════════════════════════════════════ // 🧪 Contamination Prior (v0.7.3 anti-bullshit pack #4) // ════════════════════════════════════════════════════════════════════ const CONTAM_LEVEL_COLOR = { high: "#f85149", medium: "#f1c40f", low: "#3fb950" }; function renderContamCard(rows, modelCutoff) { const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); const titleRanked = t("contam.section.ranked") || "Benchmark contamination priors"; const titleHigh = t("contam.section.high") || "🔴 High-risk benchmarks (treat scores as unreliable)"; const titleMed = t("contam.section.medium") || "🟡 Medium-risk (verify with alternates)"; const titleLow = t("contam.section.low") || "🟢 Low-risk (likely clean)"; const colBench = t("contam.col.benchmark") || "Benchmark"; const colReleased = t("contam.col.released") || "Released"; const colGap = t("contam.col.gap") || "Gap (months)"; const colPrior = t("contam.col.prior") || "P(contam)"; const colLevel = t("contam.col.level") || "Level"; const colCorpora = t("contam.col.corpora") || "In corpora"; const colCategory = t("contam.col.category") || "Category"; const high = rows.filter(r => r.level === "high"); const medium = rows.filter(r => r.level === "medium"); const low = rows.filter(r => r.level === "low"); function tableFor(group) { if (group.length === 0) return `

${t("contam.no_entries") || "(none in this category)"}

`; let body = ""; for (const r of group) { body += ` ${escapeHtml(r.benchmark)} ${escapeHtml(r.benchmark_released)} ${r.gap_months > 0 ? "+" : ""}${r.gap_months} ${(r.prior * 100).toFixed(0)}% ${r.benchmark_in_corpora ? "✓" : "✗"} ${escapeHtml(r.benchmark_category)} `; } return `${body}
${colBench}${colReleased}${colGap}${colPrior}${colCorpora}${colCategory}
`; } const adviceHigh = t("contam.advice.high") || "Treat these scores as unreliable. Replace with newer / private-test alternates (MMLU-Pro, GPQA, MUSR, MATH-500)."; const adviceMedium = t("contam.advice.medium") || "Take with caution. Look for replication on a held-out subset or community reproductions."; const adviceLow = t("contam.advice.low") || "Score likely uncontaminated, but absence of leak is not proof — still cross-check with alternate test."; return `
${tFmt("contam.summary.headline", { cutoff: modelCutoff, n: rows.length })}
🔴 ${t("contam.label.high") || "High risk"}${high.length}
🟡 ${t("contam.label.medium") || "Medium"}${medium.length}
🟢 ${t("contam.label.low") || "Low"}${low.length}
${titleHigh} (${high.length})

${adviceHigh}

${tableFor(high)}
${titleMed} (${medium.length})

${adviceMedium}

${tableFor(medium)}
${titleLow} (${low.length})

${adviceLow}

${tableFor(low)}
`; } function runContamCompute() { const cutoff = ($("contam-cutoff").value || "").trim(); if (!cutoff) { $("contam-status").textContent = t("contam.status.empty") || "⚠ Enter a model training cutoff date (e.g. 2023-12)."; return; } if (!/^\d{4}(-\d{1,2})?(-\d{1,2})?$/.test(cutoff)) { $("contam-status").textContent = t("contam.status.bad_date") || "⚠ Bad date format. Use YYYY-MM or YYYY-MM-DD."; return; } const rows = rateAllBenchmarks(cutoff); $("contam-output").innerHTML = renderContamCard(rows, cutoff); $("contam-status").textContent = tFmt("contam.status.done", { cutoff, n: rows.length, high: rows.filter(r => r.level === "high").length, }); } $("contam-run-btn")?.addEventListener("click", runContamCompute); $("contam-cutoff")?.addEventListener("keydown", (e) => { if (e.key === "Enter") { e.preventDefault(); runContamCompute(); } }); // ════════════════════════════════════════════════════════════════════ // ⚖️ Quant-regime classifier (v0.7.3 anti-bullshit pack #5) // ════════════════════════════════════════════════════════════════════ const QUANT_REGIME_COLOR = { safe: "#3fb950", mild: "#3fb950", significant: "#f1c40f", cliff: "#f85149", }; // Populate scheme dropdown from QUANT_SCHEMES on first render. Idempotent. function populateQuantSchemes() { const sel = $("quant-scheme"); if (!sel || sel.options.length > 1) return; for (const s of QUANT_SCHEMES) { const opt = document.createElement("option"); opt.value = s.id; opt.textContent = s.label; sel.appendChild(opt); } } // Cache config across "Fetch" + "Predict" / "Compare" actions on the same id. let __quantLastConfig = null; let __quantLastModelId = null; async function quantFetchConfig() { const modelId = ($("quant-id").value || "").trim(); if (!modelId) { $("quant-status").textContent = t("quant.status.empty_id") || "⚠ Enter a model id."; return null; } $("quant-status").textContent = tFmt("quant.status.fetching", { modelId }); $("quant-fetch-btn").disabled = true; try { const cfg = await fetchHfConfig(modelId); __quantLastConfig = cfg; __quantLastModelId = modelId; $("quant-status").textContent = tFmt("quant.status.fetched", { modelId }); return cfg; } catch (err) { if (err.code === "gated") { $("quant-status").innerHTML = `🔒 ${err.modelId} ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} huggingface.co/${err.modelId}`; } else { $("quant-status").textContent = `❌ ${err.message}`; } return null; } finally { $("quant-fetch-btn").disabled = false; } } function renderQuantSingle(result, modelId) { const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString(); const color = QUANT_REGIME_COLOR[result.regime] || "#8b949e"; const regimeLabel = t(`quant.regime.${result.regime}`) || result.regime; let recoHtml = ""; if (result.recommend_code) { const recoText = result.recommend_scheme ? tFmt("quant.reco." + result.recommend_code, { scheme: QUANT_SCHEMES.find(s => s.id === result.recommend_scheme)?.label || result.recommend_scheme, }) : (t("quant.reco." + result.recommend_code) || result.recommend_code); recoHtml = `

${recoText}

`; } else { recoHtml = `

${t("quant.reco.no_action") || "No action needed — quantization is safe for this architecture."}

`; } return `
${regimeLabel}
${escapeHtml(modelId)} + ${escapeHtml(result.scheme_label)}
${t("quant.label.gamma_shift") || "γ shift"}+${result.gamma_shift.toFixed(3)}
${t("quant.label.delta_ppl") || "ΔPPL (est.)"}+${result.delta_ppl.mid.toFixed(2)}
${t("quant.label.arch_mult") || "Arch multiplier"}×${result.arch_multiplier}
${t("quant.section.breakdown") || "Breakdown"}
  • ${t("quant.field.scheme") || "Scheme"}: ${escapeHtml(result.scheme_label)} (${result.scheme_bits}-bit, ${result.scheme_calibrated ? (t("quant.field.calibrated") || "calibrated") : (t("quant.field.uncalibrated") || "uncalibrated")})
  • ${t("quant.field.base_penalty") || "Base penalty"}: ${result.base_penalty.toFixed(3)}
  • ${t("quant.field.arch_mult_full") || "Architecture multiplier"}: ×${result.arch_multiplier} (d_head, GQA, SWA, params)
  • ${t("quant.field.gamma_shift") || "Predicted γ shift"}: +${result.gamma_shift.toFixed(3)}
  • ${t("quant.field.ppl_band") || "ΔPPL band (est.)"}: ${result.delta_ppl.low.toFixed(2)} – ${result.delta_ppl.high.toFixed(2)}
  • ${t("quant.field.params") || "Parameters"}: ${fmtN(result.n_params)}
${t("quant.section.reco") || "Recommendation"} ${recoHtml}
`; } function renderQuantAll(rows, modelId) { const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); let body = ""; for (const r of rows) { const color = QUANT_REGIME_COLOR[r.regime] || "#8b949e"; const regimeLabel = t(`quant.regime.${r.regime}`) || r.regime; body += ` ${escapeHtml(r.scheme_label)} ${r.scheme_bits}-bit ${r.scheme_calibrated ? "✓" : ""} +${r.gamma_shift.toFixed(3)} ${r.delta_ppl.low.toFixed(2)}–${r.delta_ppl.high.toFixed(2)} ${regimeLabel} `; } return `
${tFmt("quant.summary.headline_all", { modelId })}
${t("quant.section.compare") || "All schemes (sorted by safety)"} ${body}
${t("quant.col.scheme") || "Scheme"} ${t("quant.col.bits") || "Bits"} ${t("quant.col.gamma_shift") || "γ shift"} ${t("quant.col.ppl_band") || "ΔPPL band"} ${t("quant.col.regime") || "Regime"}
`; } async function runQuantPredict() { const cfg = __quantLastConfig || await quantFetchConfig(); if (!cfg) return; const schemeId = $("quant-scheme").value; if (!schemeId) { $("quant-status").textContent = t("quant.status.no_scheme") || "⚠ Pick a quant scheme."; return; } const result = predictQuantShift(cfg, schemeId); if (!result) { $("quant-status").textContent = "❌ Unknown scheme."; return; } $("quant-output").innerHTML = renderQuantSingle(result, __quantLastModelId); $("quant-status").textContent = tFmt("quant.status.done", { regime: t(`quant.regime.${result.regime}`) || result.regime }); } async function runQuantAll() { const cfg = __quantLastConfig || await quantFetchConfig(); if (!cfg) return; const rows = predictAllSchemes(cfg); $("quant-output").innerHTML = renderQuantAll(rows, __quantLastModelId); $("quant-status").textContent = tFmt("quant.status.done_all", { n: rows.length }); } populateQuantSchemes(); $("quant-fetch-btn")?.addEventListener("click", quantFetchConfig); $("quant-run-btn")?.addEventListener("click", runQuantPredict); $("quant-all-btn")?.addEventListener("click", runQuantAll); $("quant-id")?.addEventListener("keydown", (e) => { if (e.key === "Enter") { e.preventDefault(); quantFetchConfig(); } }); // ════════════════════════════════════════════════════════════════════ // 🔀 Cross-framework drift bound (v0.7.5 anti-bullshit pack #6) // ════════════════════════════════════════════════════════════════════ const DRIFT_VERDICT_COLOR = { noise: "#3fb950", suspicious: "#f1c40f", bug: "#f85149", bug_template: "#f85149", }; function populateDriftDropdowns() { for (const side of ["a", "b"]) { const fwSel = $(`drift-${side}-framework`); const dtSel = $(`drift-${side}-dtype`); if (fwSel && fwSel.options.length === 0) { for (const f of DRIFT_FRAMEWORKS) { const opt = document.createElement("option"); opt.value = f.id; opt.textContent = f.label; fwSel.appendChild(opt); } } if (dtSel && dtSel.options.length === 0) { for (const d of DRIFT_DTYPES) { const opt = document.createElement("option"); opt.value = d.id; opt.textContent = d.label; dtSel.appendChild(opt); } } } } function readDriftSetup(side) { return { score: parseFloat($(`drift-${side}-score`).value), framework: $(`drift-${side}-framework`).value, dtype: $(`drift-${side}-dtype`).value, batch: parseInt($(`drift-${side}-batch`).value, 10) || 1, chat_template: $(`drift-${side}-template`).value, }; } function renderDriftCard(result) { const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); const color = DRIFT_VERDICT_COLOR[result.verdict] || "#8b949e"; const verdictLabel = t(`drift.verdict.${result.verdict}`) || result.verdict; const a = result.setup_a, b = result.setup_b; const fwLabel = (id) => DRIFT_FRAMEWORKS.find(f => f.id === id)?.label || id; const dtLabel = (id) => DRIFT_DTYPES.find(d => d.id === id)?.label || id; let causeHtml = ""; if (result.dominant_cause) { const causeText = t(`drift.cause.${result.dominant_cause}`) || result.dominant_cause; causeHtml = `

${t("drift.dominant_cause") || "Dominant cause"}: ${causeText}

`; } const recoText = t(`drift.reco.${result.verdict}`) || ""; return `
${verdictLabel}
${t("drift.label.observed") || "Observed gap"}${result.observed_gap.toFixed(2)}
${t("drift.label.band") || "Numerical band"}±${result.numerical_band.toFixed(2)}
${t("drift.label.ratio") || "Gap / band"}${result.numerical_band > 0 ? (result.observed_gap / result.numerical_band).toFixed(1) : "∞"}×
${t("drift.section.setups") || "Setups"}
${t("drift.setup_a") || "Setup A"}${t("drift.setup_b") || "Setup B"}
${t("drift.score") || "Score"}${a.score?.toFixed(2)}${b.score?.toFixed(2)}
${t("drift.framework") || "Framework"}${escapeHtml(fwLabel(a.framework))}${escapeHtml(fwLabel(b.framework))}
${t("drift.dtype") || "Dtype"}${escapeHtml(dtLabel(a.dtype))}${escapeHtml(dtLabel(b.dtype))}
${t("drift.batch") || "Batch"}${a.batch}${b.batch}
${t("drift.template") || "Chat-template"}${escapeHtml(t("drift.template." + a.chat_template) || a.chat_template)}${escapeHtml(t("drift.template." + b.chat_template) || b.chat_template)}
${t("drift.section.breakdown") || "Drift contributors (numerical band)"}
  • ${t("drift.contrib.dtype") || "Dtype mismatch"}: ${result.breakdown.dtype.toFixed(2)} pts
  • ${t("drift.contrib.framework") || "Framework"}: ${result.breakdown.framework.toFixed(2)} pts
  • ${t("drift.contrib.batch") || "Batch difference"}: ${result.breakdown.batch.toFixed(2)} pts
  • ${result.breakdown.template_mismatch !== null ? `
  • ${t("drift.contrib.template") || "Chat-template MISMATCH"}: ~${result.breakdown.template_mismatch.toFixed(0)} pts (dominant)
  • ` : ""}
${t("drift.section.verdict") || "Verdict & recommendation"} ${causeHtml} ${recoText ? `

${recoText}

` : ""}
`; } function runDriftCompute() { const a = readDriftSetup("a"); const b = readDriftSetup("b"); if (Number.isNaN(a.score) || Number.isNaN(b.score)) { $("drift-status").textContent = t("drift.status.empty_scores") || "⚠ Enter both scores."; return; } const result = computeDriftBound(a, b); $("drift-output").innerHTML = renderDriftCard(result); if (window.__taf_applyTranslations) window.__taf_applyTranslations(); $("drift-status").textContent = tFmt("drift.status.done", { verdict: t(`drift.verdict.${result.verdict}`) || result.verdict }); } function loadDriftSample() { // Canonical chat-template bug: same model on lm-eval-hf (no template applied) // gets ~50 on multi-turn, vLLM-served (template auto-applied) gets ~75. $("drift-a-score").value = 50.2; $("drift-a-framework").value = "lm-eval-hf"; $("drift-a-dtype").value = "bf16"; $("drift-a-batch").value = 1; $("drift-a-template").value = "not_applied"; $("drift-b-score").value = 74.8; $("drift-b-framework").value = "vllm-served"; $("drift-b-dtype").value = "bf16"; $("drift-b-batch").value = 8; $("drift-b-template").value = "applied"; $("drift-status").textContent = t("drift.status.sample_loaded") || "✅ Sample loaded (canonical chat-template bug). Click Compute drift bound."; } populateDriftDropdowns(); $("drift-run-btn")?.addEventListener("click", runDriftCompute); $("drift-sample-btn")?.addEventListener("click", loadDriftSample); // ════════════════════════════════════════════════════════════════════ // 🔍 NIAH → reasoning gap predictor (v0.7.6 anti-bullshit pack #7) // ════════════════════════════════════════════════════════════════════ const NIAH_VERDICT_COLOR = { robust: "#3fb950", marginal: "#f1c40f", degraded: "#f1c40f", retrieval_only: "#f85149", broken: "#f85149", }; let __niahLastConfig = null; let __niahLastModelId = null; async function niahFetchConfig() { const modelId = ($("niah-id").value || "").trim(); if (!modelId) { $("niah-status").textContent = t("niah.status.empty_id") || "⚠ Enter a model id."; return null; } $("niah-status").textContent = tFmt("niah.status.fetching", { modelId }); $("niah-fetch-btn").disabled = true; try { const cfg = await fetchHfConfig(modelId); __niahLastConfig = cfg; // Keep the user-pasted id for RULER lookup (it has the canonical // alias mapping). The mirror id is recorded in cfg.__via_mirror // for any UI that wants to surface "fetched via mirror" — niah // status string already shows it below. __niahLastModelId = modelId; if (cfg.__via_mirror) { $("niah-status").innerHTML = `${tFmt("niah.status.fetched", { modelId })} (via mirror ${cfg.__via_mirror})`; } else { $("niah-status").textContent = tFmt("niah.status.fetched", { modelId }); } return cfg; } catch (err) { if (err.code === "gated") { $("niah-status").innerHTML = `🔒 ${err.modelId} ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} huggingface.co/${err.modelId}`; } else { $("niah-status").textContent = `❌ ${err.message}`; } return null; } finally { $("niah-fetch-btn").disabled = false; } } function renderNIAHCard(result, modelId, calib = null) { const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString(); const color = NIAH_VERDICT_COLOR[result.verdict] || "#8b949e"; const verdictLabel = t(`niah.verdict.${result.verdict}`) || result.verdict; const reco = t(`niah.reco.${result.verdict}`) || ""; const safeText = result.safe_context ? tFmt("niah.safe_context", { ctx: result.safe_context }) : (t("niah.safe_context_none") || "No safe context found below your target — model fails reasoning even at small contexts."); // RULER calibration block — appears only when KB lookup hits. // Shows measured RULER aggregate, derived NIAH/reasoning, and the // delta vs the heuristic so users see when the predictor was off. let calibBlock = ""; if (calib) { const fmtPct = (v) => `${(v * 100).toFixed(0)}%`; const fmtDelta = (d) => { if (d == null) return "—"; const pp = Math.round(d * 100); const sign = pp > 0 ? "+" : ""; const col = Math.abs(pp) >= 10 ? "#f0883e" : Math.abs(pp) >= 5 ? "#d29922" : "#8b949e"; return `${sign}${pp} pp`; }; const extrapNote = calib.extrapolated ? ` ⚠ ${t("niah.calib.extrapolated") || "extrapolated outside RULER's measured range"}` : ""; calibBlock = `
📊 ${t("niah.calib.heading") || "RULER-calibrated (NVIDIA published data)"}

${tFmt("niah.calib.matched", { alias: escapeHtml(calib.matched_alias), canonical: escapeHtml(calib.canonical_id), }) || `Matched ${escapeHtml(calib.matched_alias)} → KB row ${escapeHtml(calib.canonical_id)}.`}

${t("niah.calib.aggregate") || "RULER aggregate"} @ ${fmtN(result.T_eval)}: ${calib.ruler_avg_pct}% (${t("niah.calib.interp") || "interpolated between"} ${calib.interp_anchor})${extrapNote}

${t("niah.calib.col.heuristic") || "Heuristic"} ${t("niah.calib.col.calibrated") || "RULER-calibrated"} ${t("niah.calib.col.delta") || "Δ"}
NIAH ${fmtPct(result.niah_rate)} ${fmtPct(calib.niah_calibrated)} ${fmtDelta(calib.delta_niah)}
${t("niah.label.reasoning") || "Reasoning"} ${fmtPct(result.reasoning_rate)} ${fmtPct(calib.reasoning_calibrated)} ${fmtDelta(calib.delta_reasoning)}

${t("niah.calib.factors") || "Per-task factors from RULER paper Appendix Tables 13-16:"} retrieval = ${calib.retrieval_factor}× aggregate, reasoning = ${calib.reasoning_factor}× aggregate (${t("niah.calib.factors_caveat") || "honest range: retrieval 0.95-1.10×, reasoning 0.60-0.85×"}).

${t("niah.calib.claimed_vs_effective") || "Paper-reported"}: ${t("niah.calib.claimed") || "claimed"} ${fmtN(calib.claimed_context)} / ${t("niah.calib.effective") || "effective"} ${fmtN(calib.effective_context)}. ${t("niah.calib.source") || "Source"}: RULER paper (Hsieh et al., COLM 2024)

`; } else if (modelId) { // KB miss — explicitly state we're heuristic-only. calibBlock = `

💡 ${t("niah.calib.miss") || "RULER calibration unavailable for this model — using architectural heuristic only. Add to data/ruler_kb.json if you have measured numbers."}

`; } return `
${verdictLabel}
${escapeHtml(modelId)} @ ${fmtN(result.T_eval)} tokens
${t("niah.label.niah") || "NIAH pass rate"}${(result.niah_rate * 100).toFixed(0)}%
${t("niah.label.reasoning") || "Reasoning pass rate"}${(result.reasoning_rate * 100).toFixed(0)}%
${t("niah.label.gap") || "Gap"}${(result.gap * 100).toFixed(0)} pts
${calibBlock}
${t("niah.section.breakdown") || "Architecture breakdown"}
  • γ_Padé @ T_eval: ${result.gamma_pade}
  • ${t("niah.field.dhorizon") || "d_horizon (effective)"}: ${fmtN(result.d_horizon)} tokens
  • ${t("niah.field.ratio") || "T_eval / d_horizon"}: ${result.horizon_ratio}×
  • ${t("niah.field.arch_pressure") || "Arch pressure (small d_head + GQA + SWA)"}: ×${result.arch_pressure}
  • ${t("niah.field.theta") || "RoPE θ"}: ${fmtN(result.theta)}
  • ${t("niah.field.t_train") || "T_train (claimed)"}: ${fmtN(result.T_train)}
${t("niah.section.reco") || "Recommendation"}

${reco}

${t("niah.label.safe_ctx") || "Safe reasoning context"}: ${safeText}

`; } function renderNIAHSweep(rows, modelId) { const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c => ({"&":"&","<":"<",">":">",'"':""","'":"'"}[c])); const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString(); let body = ""; for (const r of rows) { const color = NIAH_VERDICT_COLOR[r.verdict] || "#8b949e"; const label = t(`niah.verdict.${r.verdict}`) || r.verdict; body += ` ${fmtN(r.T_eval)} ${(r.niah_rate * 100).toFixed(0)}% ${(r.reasoning_rate * 100).toFixed(0)}% ${(r.gap * 100).toFixed(0)} pts ${label} `; } return `
${tFmt("niah.summary.sweep", { modelId })}
${t("niah.section.sweep") || "Pass rate sweep across context lengths"} ${body}
${t("niah.col.context") || "T_eval"} ${t("niah.col.niah") || "NIAH"} ${t("niah.col.reasoning") || "Reasoning"} ${t("niah.col.gap") || "Gap"} ${t("niah.col.verdict") || "Verdict"}
`; } async function runNIAHPredict() { const cfg = __niahLastConfig || await niahFetchConfig(); if (!cfg) return; const T_eval = parseInt($("niah-teval").value, 10); if (Number.isNaN(T_eval) || T_eval < 512) { $("niah-status").textContent = t("niah.status.bad_teval") || "⚠ Enter a target context (≥512)."; return; } const result = predictNIAHReasoning(cfg, T_eval); // Ensure RULER KB is loaded once; idempotent. No-op if already loaded. await loadRulerKB(); // Calibrate against published RULER measurements if available. const calib = calibrateNIAH(__niahLastModelId, T_eval, result); $("niah-output").innerHTML = renderNIAHCard(result, __niahLastModelId, calib); $("niah-status").textContent = tFmt("niah.status.done", { verdict: t(`niah.verdict.${result.verdict}`) || result.verdict, niah: (result.niah_rate * 100).toFixed(0), reasoning: (result.reasoning_rate * 100).toFixed(0), }); } async function runNIAHSweep() { const cfg = __niahLastConfig || await niahFetchConfig(); if (!cfg) return; const rows = sweepContextLengths(cfg); $("niah-output").innerHTML = renderNIAHSweep(rows, __niahLastModelId); $("niah-status").textContent = tFmt("niah.status.sweep_done", { n: rows.length }); } $("niah-fetch-btn")?.addEventListener("click", niahFetchConfig); $("niah-run-btn")?.addEventListener("click", runNIAHPredict); $("niah-sweep-btn")?.addEventListener("click", runNIAHSweep); $("niah-id")?.addEventListener("keydown", (e) => { if (e.key === "Enter") { e.preventDefault(); niahFetchConfig(); } }); function configToPreset(cfg, modelId) { const n_attn = cfg.num_attention_heads || cfg.n_head || 0; const n_kv = cfg.num_key_value_heads || cfg.num_attention_heads || cfg.n_head || 0; const hidden = cfg.hidden_size || cfg.d_model || cfg.n_embd || 0; const d_head = cfg.head_dim || (n_attn > 0 ? Math.floor(hidden / n_attn) : 0); const theta = cfg.rope_theta || cfg.rotary_emb_base || (cfg.alibi ? null : (cfg.position_embedding_type === "absolute" ? null : 10000)); const T_train = cfg.max_position_embeddings || cfg.max_sequence_length || cfg.n_positions || cfg.n_ctx || 0; const n_layers = cfg.num_hidden_layers || cfg.n_layer || 0; const has_SWA = !!(cfg.sliding_window || cfg.use_sliding_window); let family = "rope-mha"; if (cfg.alibi) family = "alibi"; else if (cfg.model_type === "mamba" || cfg.model_type === "mamba2") family = "ssm"; else if (theta == null) family = "abspe"; else if (n_kv < n_attn) family = "rope-gqa"; const n_params_est = estimateParams(cfg); return { theta: theta || 10000, T_train: T_train || 2048, n_attention_heads: n_attn, n_kv_heads: n_kv, d_head: d_head, n_layers: n_layers, n_params: n_params_est, has_SWA: has_SWA, _family: family, _model_id: modelId, }; } function estimateParams(cfg) { const h = cfg.hidden_size || cfg.d_model || 0; const L = cfg.num_hidden_layers || cfg.n_layer || 0; const V = cfg.vocab_size || 32000; return Math.round(12 * h * h * L + 2 * V * h); } // ════════════════════════════════════════════════════════════════════ // Run recipe (manual mode) // ════════════════════════════════════════════════════════════════════ $("run-btn").addEventListener("click", async () => { if (!state.currentRecipe) { alert("Select a recipe first."); return; } const rid = state.currentRecipe.id; const params = collectParams(state.currentRecipe.params); await runAndDisplay(rid, params); }); function collectParams(paramNames) { const p = {}; paramNames.forEach(name => { const el = $("param_" + name); if (!el || el.value === "") return; let v = el.value; if (v === "true" || v === "false") { p[name] = (v === "true"); } else if (!isNaN(parseFloat(v)) && isFinite(v)) { p[name] = parseFloat(v); } else { p[name] = v; } }); return p; } // ════════════════════════════════════════════════════════════════════ // Ask mode (free-form question via router) // ════════════════════════════════════════════════════════════════════ $("ask-btn").addEventListener("click", async () => { const q = $("question").value.trim(); if (!q) { alert("Please type a question."); return; } $("ask-btn").disabled = true; setStatus("🤔 Asking the in-browser LLM to pick a recipe..."); try { const route = await routeQuestion(q); setStatus(`📋 Selected recipe ${route.recipe_id}. Running...`); await runAndDisplay(route.recipe_id, route.params, q); } catch (err) { setStatus(`❌ Routing failed: ${err.message}`); $("output-section").style.display = "block"; $("verdict-box").className = "verdict-no"; $("verdict-box").innerHTML = `Could not route question.
${escapeHtml(err.message)}

Try the Recipe mode for full manual control.`; } finally { $("ask-btn").disabled = false; } }); $("example-btn").addEventListener("click", () => { const ex = EXAMPLES[Math.floor(Math.random() * EXAMPLES.length)]; $("question").value = ex; }); async function routeQuestion(question) { const engine = await loadWebLLM(); const recipesDesc = state.recipes.map(r => ` ${r.id}: ${r.name} — ${r.description}\n params: ${r.params.join(", ")}` ).join("\n"); const systemPrompt = `You are a routing function. Given a user's free-form question about transformer LLM viability, you MUST output a single JSON object with two fields: - recipe_id: one of [${state.recipes.map(r => r.id).join(", ")}] - params: an object with parameter values inferred from the question Available recipes: ${recipesDesc} Common model facts you may use: Meta-Llama-3-8B: theta=500000, T_train=8192, n_attention_heads=32, n_kv_heads=8, d_head=128, n_layers=32, n_params=8e9 Mistral-7B-v0.1: theta=10000, T_train=8192, n_attention_heads=32, n_kv_heads=8, d_head=128, n_layers=32, n_params=7e9, has_SWA=true Qwen2.5-7B: theta=1000000, T_train=32768, n_attention_heads=28, n_kv_heads=4, d_head=128, n_layers=28, n_params=7.6e9 Llama-3.3-70B-Instruct: theta=500000, T_train=131072, n_attention_heads=64, n_kv_heads=8, d_head=128, n_layers=80, n_params=70e9 Respond with ONLY the JSON object. No prose, no markdown fences, no explanation.`; const reply = await engine.chat.completions.create({ messages: [ { role: "system", content: systemPrompt }, { role: "user", content: question }, ], max_tokens: 400, temperature: 0.0, response_format: { type: "json_object" }, }); const raw = reply.choices[0].message.content.trim(); let parsed; try { parsed = JSON.parse(raw); } catch (e) { // Try extracting JSON from markdown fences const m = raw.match(/\{[\s\S]*\}/); if (!m) throw new Error(`LLM returned non-JSON: ${raw.slice(0, 200)}`); parsed = JSON.parse(m[0]); } if (!parsed.recipe_id || !state.recipesById[parsed.recipe_id]) { throw new Error(`Unknown recipe: ${parsed.recipe_id}`); } return parsed; } // ════════════════════════════════════════════════════════════════════ // Run + display + synthesize // ════════════════════════════════════════════════════════════════════ async function runAndDisplay(recipeId, params, originalQuestion=null) { setStatus("🧮 Computing TAF chain..."); state.pyodide.globals.set("__rid", recipeId); state.pyodide.globals.set("__params", state.pyodide.toPy(params)); const resultJSON = state.pyodide.runPython(` import json result = run_recipe(__rid, **__params) json.dumps(result) `); const result = JSON.parse(resultJSON); result._original_question = originalQuestion; renderResult(result); $("output-section").style.display = "block"; $("profile-output").style.display = "none"; $("compare-output").style.display = "none"; state.lastResult = { type: "recipe", recipeId, params }; state.lastFullResult = result; setStatus("✅ Done. Numbers below."); if (ENABLE_WEBLLM) { await synthesizeAnswer(result); } } function renderResult(r) { console.log("[TAF] renderResult called with:", r); if (r.error) { $("verdict-box").className = "verdict-no"; $("verdict-box").innerHTML = `Error: ${escapeHtml(r.error)}`; $("chain-box").innerHTML = ""; return; } const vBox = $("verdict-box"); if (!vBox) { console.error("[TAF] verdict-box element not found!"); return; } const verdictStr = String(r.verdict || "UNKNOWN"); let vClass = ""; if (verdictStr.startsWith("YES") || verdictStr === "GO" || verdictStr.startsWith("USE SOFT")) vClass = "verdict-yes"; else if (verdictStr.startsWith("NO") || verdictStr.startsWith("MEMORY") || verdictStr === "TINY-MODEL") vClass = "verdict-no"; else vClass = "verdict-degraded"; vBox.className = vClass; const verdictEmoji = vClass === "verdict-yes" ? "✅" : (vClass === "verdict-no" ? "❌" : "⚠"); vBox.innerHTML = `
${verdictEmoji} ${escapeHtml(verdictStr)}
${escapeHtml(r.recipe_id || "")} — ${escapeHtml(r.recipe_name || "")}
Reason: ${escapeHtml(r.reason || "(none)")}
${r.mitigation && r.mitigation !== "None required." && r.mitigation !== "None — proceed with Chinchilla-optimal recipe." ? `
Action: ${escapeHtml(r.mitigation)}
` : ""} `; console.log("[TAF] verdict-box populated with class:", vClass, "verdict:", verdictStr); const cBox = $("chain-box"); cBox.innerHTML = ""; r.chain.forEach(step => { const div = document.createElement("details"); div.className = "chain-step"; div.innerHTML = ` Step ${step.step} — ${escapeHtml(step.name)} ${escapeHtml(step.section)}
${escapeHtml(step.formula)}
Inputs: ${escapeHtml(JSON.stringify(step.inputs))}
Result: ${formatResult(step.result)}
${step.interpretation ? `
${escapeHtml(step.interpretation)}
` : ""} `; cBox.appendChild(div); }); } function formatResult(r) { if (r === null || r === undefined) return "n/a (not applicable)"; if (typeof r === "number") return r.toLocaleString(undefined, { maximumFractionDigits: 4 }); if (typeof r === "object") return `
${escapeHtml(JSON.stringify(r, null, 2))}
`; return String(r); } function escapeHtml(s) { return String(s) .replace(/&/g, "&").replace(//g, ">") .replace(/"/g, """).replace(/'/g, "'"); } // ════════════════════════════════════════════════════════════════════ // WebLLM (synthesis + router) // ════════════════════════════════════════════════════════════════════ async function loadWebLLM() { if (state.webllm) return state.webllm; // Request persistent storage to avoid quota issues with cached model weights if (navigator.storage && navigator.storage.persist) { try { const persistent = await navigator.storage.persist(); console.log(persistent ? "Persistent storage granted" : "Persistent storage denied"); } catch (e) { console.warn("storage.persist() failed:", e); } } setStatus(`⏳ Loading WebLLM library + ${WEBLLM_MODEL.split("-")[0]} (~350MB first time, cached after)...`); const { CreateMLCEngine } = await import("https://esm.run/@mlc-ai/web-llm"); const tryLoad = async (modelId) => { return await CreateMLCEngine(modelId, { initProgressCallback: (info) => setStatus(`⏳ ${info.text || "Loading model..."}`), }); }; try { state.webllm = await tryLoad(WEBLLM_MODEL); } catch (err) { if (String(err).includes("QuotaExceeded") || String(err).includes("storage")) { setStatus(`⚠ Quota exceeded for ${WEBLLM_MODEL}. Trying smaller fallback ${WEBLLM_FALLBACK}...`); try { state.webllm = await tryLoad(WEBLLM_FALLBACK); } catch (err2) { throw new Error( `Both models failed. Browser storage too constrained. ` + `Try: (1) Settings → Privacy → Site settings → allow more storage for this site, ` + `(2) clear browser cache, (3) use Chrome/Edge in non-incognito mode. ` + `Original error: ${err2.message || err2}` ); } } else { throw err; } } return state.webllm; } async function synthesizeAnswer(result) { $("answer-header").style.display = "block"; $("answer-box").style.display = "block"; $("answer-box").innerHTML = 'Generating plain-English summary...'; let engine; try { engine = await loadWebLLM(); } catch (err) { $("answer-box").innerHTML = `⚠ WebLLM failed: ${escapeHtml(String(err))}
Numbers above are still correct.
`; return; } const prompt = buildSynthesisPrompt(result); let answer = ""; try { const reply = await engine.chat.completions.create({ messages: [ { role: "system", content: t("synthesis.system") }, { role: "user", content: prompt }, ], max_tokens: 400, temperature: 0.2, }); answer = reply.choices[0].message.content; } catch (err) { $("answer-box").innerHTML = `⚠ Synthesis failed: ${escapeHtml(String(err))}`; return; } $("answer-box").innerHTML = `
${escapeHtml(answer)}
↑ Synthesised by Llama-3.2-1B in your browser. Numbers are deterministic Python.
`; setStatus("✅ Done."); } function buildSynthesisPrompt(r) { const numbersBlock = r.chain.map(s => `Step ${s.step} (${s.section}) ${s.name}: ${formatResultPlain(s.result)} — ${s.interpretation || ""}` ).join("\n"); return `Recipe: ${r.recipe_id} — ${r.recipe_name} ${r._original_question ? `User question: "${r._original_question}"\n` : ""} Computed chain: ${numbersBlock} Verdict: ${r.verdict} Reason: ${r.reason} Action: ${r.mitigation} Summarize for non-technical user in 4-6 sentences. Cite section numbers (§X.Y). Mention verdict and most important action.`; } function formatResultPlain(r) { if (r === null || r === undefined) return "n/a"; if (typeof r === "number") return r.toLocaleString(undefined, { maximumFractionDigits: 4 }); if (typeof r === "object") return JSON.stringify(r); return String(r); } // ════════════════════════════════════════════════════════════════════ // INSPECTOR mode (paste raw config.json) // ════════════════════════════════════════════════════════════════════ $("inspector-btn").addEventListener("click", async () => { const raw = $("inspector-json").value.trim(); if (!raw) { $("inspector-status").textContent = "⚠ Paste a config.json first"; return; } let cfg; try { cfg = JSON.parse(raw); } catch (e) { $("inspector-status").textContent = `❌ Invalid JSON: ${e.message}`; return; } $("inspector-status").textContent = "⏳ Parsing + profiling..."; $("inspector-btn").disabled = true; try { const preset = configToPreset(cfg, cfg.model_type ? `` : ""); state.lastModelId = preset._model_id || ""; const T_eval = parseInt($("inspector-T_eval").value) || preset.T_train; const params = { theta: preset.theta, T_train: preset.T_train, T_eval: T_eval, n_attention_heads: preset.n_attention_heads, n_kv_heads: preset.n_kv_heads, d_head: preset.d_head, n_layers: preset.n_layers, n_params: preset.n_params, has_SWA: preset.has_SWA, }; state.pyodide.globals.set("__pp", state.pyodide.toPy(params)); const json = state.pyodide.runPython(` import json result = profile_model(**__pp) json.dumps(result) `); const profile = JSON.parse(json); renderProfile(profile, params); state.lastResult = { type: "profile", params }; state.lastFullResult = profile; $("inspector-status").innerHTML = `✅ Profiled: ${preset._family} (${preset.n_params.toExponential(2)} params)`; } catch (err) { $("inspector-status").textContent = `❌ ${err.message}`; console.error(err); } finally { $("inspector-btn").disabled = false; } }); // ════════════════════════════════════════════════════════════════════ // What-if T_eval slider — interactive exploration // ════════════════════════════════════════════════════════════════════ function renderWhatIfSlider(profile, params, targetEl) { if (!profile || !params) return; const minL = 256; const maxL = Math.max(params.T_eval * 4, 200000); const initialL = params.T_eval; targetEl.innerHTML = `

🎚 What-if: drag T_eval to see γ change live

Pure JS recompute (no Pyodide call). Shows the geometric γ_Padé and d_horizon as you slide. The full chain re-runs on click.

T_eval${initialL.toLocaleString()}
γ_Padé
d_horizon
L_NIAH ceiling
Predicted geometric verdict
`; if (window.__taf_applyTranslations) window.__taf_applyTranslations(); const update = () => { const T = parseInt($("whatif-slider").value); const sqrt2 = Math.SQRT2; const g_pade = (2 * params.theta - T * sqrt2) / (2 * params.theta + T * sqrt2); // Apply same decomposition as Python const g_corr = g_pade + (params.n_kv_heads < params.n_attention_heads ? 0.11 : 0) + (params.has_SWA ? -0.21 : 0) + (params.n_params >= 4e8 ? -0.15 : 0); let dh = null, niah = null, verdict, vClass; if (g_corr > 0 && g_corr < 1) { dh = params.theta * (1 - g_corr) * sqrt2 / (1 + g_corr); niah = 2 * dh; if (T < dh) { verdict = `✅ YES (margin ${((1 - T / dh) * 100).toFixed(0)}%)`; vClass = "yes"; } else if (T < niah) { verdict = `⚠ DEGRADED`; vClass = "deg"; } else { verdict = `❌ NO (past NIAH ceiling)`; vClass = "no"; } } else { verdict = `❌ NO (Phase B)`; vClass = "no"; } $("whatif-T_eval").textContent = T.toLocaleString(); $("whatif-gamma").textContent = g_pade.toFixed(4) + (g_corr !== g_pade ? ` → ${g_corr.toFixed(4)}` : ""); $("whatif-dh").textContent = dh !== null ? Math.round(dh).toLocaleString() : "n/a (Phase B)"; $("whatif-niah").textContent = niah !== null ? Math.round(niah).toLocaleString() : "n/a"; const vEl = $("whatif-verdict"); vEl.textContent = verdict; vEl.className = "verdict-text " + vClass; }; $("whatif-slider").addEventListener("input", update); $("whatif-rerun").addEventListener("click", () => { const T = parseInt($("whatif-slider").value); // Update params and trigger full re-profile $("profile-T_eval").value = T; $("profile-btn").click(); }); update(); } // ════════════════════════════════════════════════════════════════════ // FALSIFICATION dashboard inline // ════════════════════════════════════════════════════════════════════ const FALSIFICATION_STATUS = [ { id: "F1", claim: "γ_Padé MAE < 5% on non-anomalous Phase A models", status: "confirmed", evidence: "n=9, paper Tab. 4" }, { id: "F2", claim: "d_horizon predicts NIAH collapse within 1% (pythia-70m)", status: "confirmed", evidence: "predicted 4078, observed 4096" }, { id: "F3", claim: "Fisher info predicts forward-hook recovery within 0.2%", status: "confirmed", evidence: "12.5% predicted vs 12.3% observed" }, { id: "F4", claim: "Layer asymmetry early/late ratio ≈ 13.5× (pythia-70m)", status: "confirmed", evidence: "F2 thermostat experiment" }, { id: "F5", claim: "Area law S_γ = O(log N) for all γ > 0", status: "confirmed", evidence: "n=56, r=-0.954" }, { id: "F6", claim: "KV truncation at D_f gives ΔPPL ≤ 0 in γ ∈ [0.65, 0.85]", status: "confirmed", evidence: "pythia-2.8b ΔPPL=-0.51" }, { id: "F7", claim: "Linear pruning cost: ΔPPL ≈ 0.18 × %Q/K_pruned", status: "confirmed", evidence: "pythia-1b 0.17, 2.8b 0.18" }, { id: "F8", claim: "Padé saturates at [1,1] in LLM regime z<<1", status: "confirmed", evidence: "sage round 4" }, { id: "F9", claim: "RoPE attention is Euclidean fractional (d_eff=1/γ), not hyperbolic", status: "confirmed", evidence: "EXP-METRIC-RoPE sage" }, { id: "F10", claim: "Δγ < -0.1 in models ≥ 400M ⇒ GQA / induction-head dominance", status: "confirmed", evidence: "n=20+ models" }, { id: "F11", claim: "Δγ > +0.3 ⇒ alternating SWA (Gemma family signature)", status: "confirmed", evidence: "Gemma-2-9b Δγ=+0.51" }, { id: "F12", claim: "Mamba L_crit = 45, α = 0.703", status: "confirmed", evidence: "3 seeds" }, { id: "F13", claim: "Phase boundary at γ = 1 (Hagedorn)", status: "confirmed", evidence: "χ → ∞" }, { id: "F14", claim: "RLHF Δγ shift ≤ 0.072 (recipe-specific)", status: "partial", evidence: "n=8 recipe-locked" }, { id: "F15", claim: "R_c boundary at R_c★ ≈ 1.68", status: "refuted", evidence: "overlap zone [0.92, 3.08] n=9" }, { id: "F16", claim: "Holographic pruning: alive bands in ℓ > L_crit ΔPPL ≈ 0", status: "refuted", evidence: "linear cost law instead" }, { id: "F17", claim: "Soft d_horizon decay beats hard in regime d_h ≳ T_train/2", status: "partial", evidence: "n=2/3 (pythia-1b refuted)" }, { id: "F18", claim: "Mittag-Leffler prefactor 1/Γ(1-γ) governs A_0", status: "refuted", evidence: "n=39, ratio 0.23" }, { id: "F19", claim: "γ_Padé predicts γ_obs across-model variance", status: "partial", evidence: "centroid OK, ~0.1% var explained, see §sec:gamma_decomposition" }, { id: "F20", claim: "β-flow exactly equivalent to logistic ODE", status: "confirmed", evidence: "sage symbolic check" }, { id: "F21", claim: "tanh trajectory γ(t)~tanh(log step) on pythia-1b checkpoints", status: "refuted", evidence: "R²=0.15 on 4 checkpoints" }, { id: "F22", claim: "χ(z*) = (5+√17)/4 closed form at Cayley fixed point", status: "confirmed", evidence: "sage symbolic, minimal poly 2y²-5y+1" }, { id: "F23", claim: "T ↔ d_horizon involution: θ_design ∘ γ_Padé = id", status: "confirmed", evidence: "sage symbolic" }, ]; function renderFalsificationDashboard() { const target = $("falsification-table"); if (!target) return; const counts = { confirmed: 0, partial: 0, refuted: 0, untested: 0 }; FALSIFICATION_STATUS.forEach(f => counts[f.status]++); const summary = `

${counts.confirmed} confirmed · ⚠ ${counts.partial} partial · ❌ ${counts.refuted} refuted · ⏳ ${counts.untested} untested (out of ${FALSIFICATION_STATUS.length} total predictions)

`; let table = ``; FALSIFICATION_STATUS.forEach(f => { const icon = ({ confirmed: "✅", partial: "⚠", refuted: "❌", untested: "⏳" })[f.status]; table += ``; }); table += "
IDClaimStatusEvidence
${f.id} ${escapeHtml(f.claim)} ${icon} ${f.status} ${escapeHtml(f.evidence)}
"; target.innerHTML = summary + table; } // ════════════════════════════════════════════════════════════════════ // Browse community submissions (live from GitHub Issues API) // ════════════════════════════════════════════════════════════════════ async function loadCommunityFeed() { const target = $("community-feed"); if (!target) return; try { const resp = await fetch(`https://api.github.com/repos/${REGISTRY_REPO}/issues?state=open&per_page=15&sort=created&direction=desc`); if (!resp.ok) { if (resp.status === 404) { target.innerHTML = `The registry repo isn't created yet. Once ${REGISTRY_REPO} exists with submissions, they'll appear here live.`; return; } throw new Error(`HTTP ${resp.status}`); } const issues = await resp.json(); if (!issues || issues.length === 0) { target.innerHTML = `No submissions yet. Be the first — generate a Profile and click 📤 Submit to registry.`; return; } const html = issues.map(issue => { const verdict = extractVerdictFromTitle(issue.title); const vClass = verdictClass(verdict); const time = relativeTime(new Date(issue.created_at)); return `
${escapeHtml(verdict)} ${escapeHtml(issue.title)} ${time}
`; }).join(""); target.innerHTML = html; } catch (err) { target.innerHTML = `⚠ Couldn't load community feed: ${escapeHtml(err.message)}`; } } function extractVerdictFromTitle(title) { const m = title.match(/→\s*(\S+)/); if (m) return m[1]; if (title.includes("YES")) return "YES"; if (title.includes("NO")) return "NO"; if (title.includes("DEGRADED")) return "DEG"; if (title.includes("Profile")) return "📇"; if (title.includes("Compare")) return "🆚"; return "?"; } function verdictClass(v) { if (v.startsWith("YES") || v === "GO") return "yes"; if (v.startsWith("NO")) return "no"; if (v === "DEG" || v === "DEGRADED") return "deg"; return ""; } function relativeTime(d) { const sec = Math.floor((Date.now() - d.getTime()) / 1000); if (sec < 60) return `${sec}s ago`; if (sec < 3600) return `${Math.floor(sec / 60)}m ago`; if (sec < 86400) return `${Math.floor(sec / 3600)}h ago`; return `${Math.floor(sec / 86400)}d ago`; } // ════════════════════════════════════════════════════════════════════ // PROFILE mode // ════════════════════════════════════════════════════════════════════ $("profile-preset").addEventListener("change", (e) => { if (!e.target.value) return; const modelId = e.target.value; state.lastModelId = modelId; // remember for filename/hash // Preset keys ARE valid HF model ids (e.g. "meta-llama/Llama-3.2-1B"). Auto-fill // the HF id input so the user can also click 📥 Fetch to refresh from HF Hub // without retyping. Status hint clarifies the dual source of truth. if ($("profile-hf-id")) { $("profile-hf-id").value = modelId; if ($("profile-hf-status")) { $("profile-hf-status").textContent = tFmt("profile.preset_loaded", { id: modelId }); } } const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(modelId)})`); const p = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy; if (!p || Object.keys(p).length === 0) return; $("profile-theta").value = p.theta; $("profile-T_train").value = p.T_train; $("profile-n_attn").value = p.n_attention_heads; $("profile-n_kv").value = p.n_kv_heads; $("profile-d_head").value = p.d_head; $("profile-n_layers").value = p.n_layers; $("profile-n_params").value = p.n_params.toExponential(2); $("profile-has_swa").value = String(p.has_SWA); }); $("profile-fetch-btn").addEventListener("click", async () => { const id = $("profile-hf-id").value.trim(); if (!id) { $("profile-hf-status").textContent = "⚠ Enter a model id"; return; } $("profile-hf-status").textContent = `⏳ Fetching ${id}...`; $("profile-fetch-btn").disabled = true; state.lastModelId = id; // remember for filename/hash try { const cfg = await fetchHfConfig(id); const p = configToPreset(cfg, id); $("profile-theta").value = p.theta; $("profile-T_train").value = p.T_train; $("profile-n_attn").value = p.n_attention_heads; $("profile-n_kv").value = p.n_kv_heads; $("profile-d_head").value = p.d_head; $("profile-n_layers").value = p.n_layers; $("profile-n_params").value = p.n_params.toExponential(2); $("profile-has_swa").value = String(p.has_SWA); $("profile-hf-status").innerHTML = `✅ ${id} (${p._family})`; } catch (err) { $("profile-hf-status").textContent = `❌ ${err.message}`; } finally { $("profile-fetch-btn").disabled = false; } }); $("profile-btn").addEventListener("click", async () => { const params = { theta: parseFloat($("profile-theta").value), T_train: parseInt($("profile-T_train").value), T_eval: parseInt($("profile-T_eval").value), n_attention_heads: parseInt($("profile-n_attn").value), n_kv_heads: parseInt($("profile-n_kv").value), d_head: parseInt($("profile-d_head").value), n_layers: parseInt($("profile-n_layers").value), n_params: parseFloat($("profile-n_params").value), has_SWA: $("profile-has_swa").value === "true", }; setStatus("🧮 Profiling — running all 5 recipes..."); $("profile-btn").disabled = true; try { state.pyodide.globals.set("__pp", state.pyodide.toPy(params)); const json = state.pyodide.runPython(` import json result = profile_model(**__pp) json.dumps(result) `); const profile = JSON.parse(json); renderProfile(profile, params); state.lastResult = { type: "profile", params }; state.lastFullResult = profile; setStatus("✅ Profile ready."); } catch (err) { setStatus(`❌ ${err.message}`); console.error(err); } finally { $("profile-btn").disabled = false; } }); function renderProfile(p, params) { $("profile-output").style.display = "block"; // Hide other outputs $("output-section").style.display = "none"; $("compare-output").style.display = "none"; const verdictClass = (v) => { if (v.startsWith("YES") || v === "GO" || v.startsWith("USE SOFT")) return "v-yes"; if (v.startsWith("NO") || v.startsWith("MEMORY") || v === "TINY-MODEL") return "v-no"; return "v-deg"; }; const verdictEmoji = (v) => verdictClass(v) === "v-yes" ? "✅" : verdictClass(v) === "v-no" ? "❌" : "⚠"; const ms = p.model_summary; const kn = p.key_numbers; const formatN = (x) => x === null || x === undefined ? "n/a" : (typeof x === "number" ? x.toLocaleString(undefined, { maximumFractionDigits: 4 }) : String(x)); const recipesHtml = Object.entries(p.recipes).map(([rid, r]) => `
${escapeHtml(rid)} — ${escapeHtml(r.name)} ${verdictEmoji(r.verdict)} ${escapeHtml(r.verdict)}
${escapeHtml(r.reason || "")}
${r.mitigation && r.mitigation !== "None required." && r.mitigation !== "None — proceed with Chinchilla-optimal recipe." ? `
Action: ${escapeHtml(r.mitigation)}
` : ""}
`).join(""); // Reusable tooltip helper — keeps tooltip pattern uniform across the card const ttip = (key, fallback) => `${fallback}`; const numbersHtml = `
γ_Padé(T_eval) ${ttip("tooltip.gamma_pade", "Closed-form prediction (2−z)/(2+z), z = T√2/θ. Paper §sec:gamma_decomposition.")}${formatN(kn.gamma_pade)}
γ_decomposed ${ttip("tooltip.gamma_decomposed", "γ from full architectural decomposition: Padé baseline + GQA shift + SWA shift + post-IH shift.")}${formatN(kn.gamma_decomposed)}
d_horizon ${ttip("tooltip.d_horizon", "Effective attention horizon at T_eval. Beyond this, attention scores fall below the noise floor (paper §26).")}${formatN(kn.d_horizon)}
L_NIAH ceiling ${ttip("tooltip.L_NIAH", "Predicted ceiling for needle-in-a-haystack retrieval reliability at the current d_horizon.")}${formatN(kn.L_NIAH_ceiling)}
χ susceptibility ${ttip("tooltip.chi", "Susceptibility exponent χ = 1/(1−γ). Diverges at the Hagedorn line γ=1.")}${formatN(kn.chi_susceptibility)}
KV memory @ T_eval (BF16) ${ttip("tooltip.kv_memory", "Per-request KV cache memory at T_eval in BF16 = 2 · n_layers · n_kv_heads · d_head · T_eval bytes.")}${formatN(kn.kv_memory_per_request_GB)} GB
`; const falsHtml = (p.falsification_status || []).map(f => `
${escapeHtml(f.id)} — ${escapeHtml(f.claim)}: ${escapeHtml(f.status)}
` ).join(""); // Per-verdict count breakdown — recipes test orthogonal axes (long-context, // budget, hardware, custom-vs-API, KV-compression). Worst-of-N would conflate // a "use API" recommendation with a long-context failure, so we show counts. const verdictCounts = Object.values(p.recipes).reduce((acc, r) => { const c = verdictClass(r.verdict); acc[c] = (acc[c] || 0) + 1; return acc; }, {}); const nYes = verdictCounts["v-yes"] || 0; const nDeg = verdictCounts["v-deg"] || 0; const nNo = verdictCounts["v-no"] || 0; const breakdownCls = nNo ? "v-no" : nDeg ? "v-deg" : "v-yes"; const gammaForPill = kn.gamma_decomposed ?? kn.gamma_pade; const recipeCount = Object.keys(p.recipes).length; $("profile-box").innerHTML = `
${escapeHtml(ms.architecture_class)}
n_params=${formatN(ms.n_params)} · T_train=${ms.T_train} · T_eval=${ms.T_eval} · θ=${formatN(ms.rope_theta)} · ${ms.has_GQA ? "GQA" : "MHA"}${ms.has_SWA ? " + SWA" : ""}
✅ ${nYes} · ⚠ ${nDeg} · ❌ ${nNo} ${ttip("tooltip.verdict_breakdown", "Per-recipe breakdown across the orthogonal axes (long-context, budget, hardware, custom-vs-API, KV-compression). Recipes are independent decisions — a ❌ on X-1 means \"use API\" not \"model fails\". Open the Recipes section for per-axis verdict.")} ${gammaForPill !== null && gammaForPill !== undefined ? `γ = ${formatN(gammaForPill)} ${ttip("tooltip.gamma_pill", "γ_decomposed (full architectural decomposition) or γ_Padé as fallback. Range (0,1) = Phase A (anti-Ising). γ ≥ 1 = Hagedorn / Phase B.")}` : ''} ${gammaForPill > 0 && gammaForPill < 1 ? `🧲 Anti-Ising (β=γ−1<0, machine-verified) ${ttip("tooltip.anti_ising", "Phase A class: β = γ−1 < 0 (anti-Ising). Machine-verified by Sage Groebner basis + Lean Mathlib4. See §35 v0.5.")} ${badgesForUiBinding("anti_ising_pill")}` : ''}
📋 Recipes — verdict per dimension ${recipeCount} ${t("tafcard.recipes_count_label", "dimensions")}
${recipesHtml}
🔬 Diagnostics — numbers + γ check + what-if

🔢 Key numbers (paper §26)

${numbersHtml}

🔍 γ predicted vs observed

Enter your empirically measured γ. Tool detects regime: fraud (θ inflated) / compressed / over-Padé / SWA-random / normal.

🎚️ What-if explorer

✓ Verification — Lean + Sage + falsification

📑 Lean+Mathlib theorem table

Every entry below is machine-proven against Lean 4 + Mathlib4. Click any L# link to jump to the source line on GitHub. The table is grouped by topic; click a header to expand.

🔬 Algebraic consistency (Sage + Lean v0.5)

Verifies 12 D-SAGE algebraic identities of TAF critical exponents (machine-proof Sage Groebner basis + Lean Mathlib4). Pass = framework intact. Fail = bf16 outlier / quantization artifact.
${badgesForUiBinding("algebraic_consistency_check")}

🔬 Falsification status (F1-F23)

${falsHtml || '
No falsifications applicable.
'}
📂 Provenance & share
🔬 v0.5.3 — Calibration audit (2026-05-02)
`; // Render the what-if slider for interactive exploration renderWhatIfSlider(p, params, $("whatif-container")); // Render Lean+Mathlib theorem table (graceful no-op if manifest missed). // Loaded async at bootstrap; if Profile clicked before fetch resolves we // wait once and then render. const renderLeanTable = () => { const host = $("lean-table-host"); if (!host) return; if (getManifest()) { host.innerHTML = renderTheoremTable(); if (window.__taf_applyTranslations) window.__taf_applyTranslations(); } else { host.innerHTML = `
Loading Lean manifest…
`; loadLeanManifest() .then(() => { host.innerHTML = renderTheoremTable(); if (window.__taf_applyTranslations) window.__taf_applyTranslations(); }) .catch(err => { host.innerHTML = `
Lean manifest unavailable: ${err.message}
`; }); } }; renderLeanTable(); // Re-apply translations to dynamically inserted buttons if (window.__taf_applyTranslations) window.__taf_applyTranslations(); // Wire share/download/submit buttons $("profile-share-btn").addEventListener("click", () => copyShareLink("profile", params)); $("profile-download-btn").addEventListener("click", async () => { const filename = await makeFilename("profile", p); const data = await exportableData("profile", p); downloadJSON(filename, data); $("profile-share-status").textContent = `✅ Downloaded ${filename}`; setTimeout(() => $("profile-share-status").textContent = "", 5000); }); $("profile-download-md-btn").addEventListener("click", async () => { const hash = await inputHash("profile", p); const base = (await makeFilename("profile", p)).replace(/\.json$/, ""); downloadText(`${base}.md`, profileToMarkdown(p, hash), "text/markdown;charset=utf-8"); $("profile-share-status").textContent = `✅ Downloaded ${base}.md`; setTimeout(() => $("profile-share-status").textContent = "", 5000); }); $("profile-download-tex-btn").addEventListener("click", async () => { const hash = await inputHash("profile", p); const base = (await makeFilename("profile", p)).replace(/\.json$/, ""); downloadText(`${base}.tex`, profileToLatex(p, hash), "application/x-tex;charset=utf-8"); $("profile-share-status").textContent = `✅ Downloaded ${base}.tex`; setTimeout(() => $("profile-share-status").textContent = "", 5000); }); $("profile-submit-btn").addEventListener("click", async () => { await submitToRegistry("profile", p, $("profile-share-status")); setTimeout(() => $("profile-share-status").textContent = "", 8000); }); // v0.6: γ predicted-vs-observed panel — interactive const updateGammaCheck = () => { const gObs = parseFloat($("gc-gobs").value); const isRandom = $("gc-random").value === "true"; const r = gammaCheckAll({ theta: params.theta, T: params.T_eval, gObs, isRandom }); const meta = REGIME_META[r.regime] || REGIME_META.unknown; const fmt = (x, d=4) => (x === null || x === undefined || Number.isNaN(x)) ? "n/a" : (!Number.isFinite(x) ? "∞" : Number(x).toLocaleString(undefined, { maximumFractionDigits: d })); $("gamma-check-results").innerHTML = `
γ_Padé(T_eval) ${ttip("tooltip.gamma_pade", "Closed-form prediction (2−z)/(2+z), z = T√2/θ.")}${fmt(r.gammaPade)}
θ_eff (observed) ${ttip("tooltip.theta_eff_obs", "Effective θ implied by your γ_observed: T√2 / (1 − γ_obs).")}${fmt(r.thetaEffObs, 1)}
θ_eff (Padé) ${ttip("tooltip.theta_eff_pade", "Effective θ predicted by closed-form: θ + T/√2.")}${fmt(r.thetaEffPade, 1)}
η = θ_eff_obs / θ_eff_Padé ${ttip("tooltip.efficiency", "Efficiency ratio. ≈1 = normal · <0.01 = fraud · <0.5 = compressed · >1.5 = over-Padé.")}${fmt(r.efficiency)}
ΔH_Cardy = log(θ_eff_obs / θ_nominal) ${ttip("tooltip.delta_h_cardy", "Cardy entropy shift. Negative = compression entropy. ~0 = nominal match.")}${fmt(r.deltaHCardy)}
Regime ${meta.emoji} ${r.regime}
ⓘ What do these mean?
`; if (window.__taf_applyTranslations) window.__taf_applyTranslations(); }; $("gc-gobs").addEventListener("input", updateGammaCheck); $("gc-random").addEventListener("change", updateGammaCheck); updateGammaCheck(); // v0.5.1: Algebraic consistency check button $("verify-consistency-btn").addEventListener("click", () => { const gammaVal = kn.gamma_decomposed ?? kn.gamma_pade; if (gammaVal === null || gammaVal === undefined) { $("consistency-result").innerHTML = `
⚠ No γ value available for verification.
`; return; } if (gammaVal <= 0 || gammaVal >= 1) { $("consistency-result").innerHTML = `
γ = ${gammaVal.toFixed(4)} out of Phase A — verification requires γ ∈ (0, 1). ${gammaVal >= 1 ? "Hagedorn boundary reached." : "Phase B / negative regime."}
`; return; } try { const json = state.pyodide.runPython(` import json result = verify_algebraic_consistency(${gammaVal}) json.dumps(result) `); const r = JSON.parse(json); const passed = r.n_checks_passed; const total = r.n_checks_total; const allOk = r.all_consistent; const tooltipText = (id) => { const key = `v05.tooltip.${id.replace(/[^a-zA-Z0-9]/g, '_')}`; const tip = t(key); return (tip === key) ? '' : tip; }; const checksRows = Object.entries(r.checks).map(([id, c]) => { const tip = tooltipText(id); return `
${escapeHtml(id)}: ${escapeHtml(c.claim)} ${c.passes ? "✓" : "✗"}
`; }).join(""); $("consistency-result").innerHTML = `
${allOk ? "✅" : "❌"} ${passed}/${total} D-SAGE identities ${allOk ? "consistent" : "FAILED"}
${escapeHtml(r.interpretation)}
Verified by: ${escapeHtml(r.framework_verified_by)}
🔍 Per-identity details (${total} checks)
${checksRows}
`; } catch (err) { $("consistency-result").innerHTML = `
❌ Error: ${escapeHtml(err.message || String(err))}
`; console.error(err); } }); } // ════════════════════════════════════════════════════════════════════ // COMPARE mode // ════════════════════════════════════════════════════════════════════ $("compare-recipe").addEventListener("change", () => { $("compare-btn").disabled = !$("compare-recipe").value; }); document.querySelectorAll(".compare-preset").forEach(sel => { sel.addEventListener("change", (e) => { const slot = e.target.closest(".compare-slot"); if (e.target.value) { slot.querySelector(".compare-hf-id").value = e.target.value; } }); }); $("compare-btn").addEventListener("click", async () => { const recipeId = $("compare-recipe").value; if (!recipeId) { alert("Pick a recipe first."); return; } const T_eval = parseInt($("compare-T_eval").value); const slots = document.querySelectorAll(".compare-slot"); const specs = []; setStatus("⏳ Fetching configs for compared models..."); $("compare-btn").disabled = true; for (const slot of slots) { const id = slot.querySelector(".compare-hf-id").value.trim(); if (!id) continue; try { let preset = null; const presetProxy = state.pyodide.runPython(`get_preset(${JSON.stringify(id)})`); const p = presetProxy.toJs ? presetProxy.toJs({ dict_converter: Object.fromEntries }) : presetProxy; if (p && Object.keys(p).length > 0) { preset = p; } else { const cfg = await fetchHfConfig(id); preset = configToPreset(cfg, id); } specs.push({ ...preset, label: id.split("/").pop() }); } catch (err) { console.error("compare fetch fail for", id, err); setStatus(`⚠ Skipped ${id}: ${err.message}`); } } if (specs.length < 2) { setStatus("❌ Need at least 2 models to compare."); $("compare-btn").disabled = false; return; } setStatus(`🧮 Comparing ${specs.length} models on ${recipeId}...`); try { state.pyodide.globals.set("__cspecs", state.pyodide.toPy(specs)); state.pyodide.globals.set("__crid", recipeId); state.pyodide.globals.set("__cshared", state.pyodide.toPy({ T_eval })); const json = state.pyodide.runPython(` import json result = compare_models(__cspecs.to_py(), __crid, __cshared.to_py()) json.dumps(result) `); const cmp = JSON.parse(json); renderCompare(cmp); state.lastResult = { type: "compare", recipeId, T_eval, specs }; state.lastFullResult = cmp; setStatus("✅ Comparison ready."); } catch (err) { setStatus(`❌ ${err.message}`); console.error(err); } finally { $("compare-btn").disabled = false; } }); function renderCompare(cmp) { $("compare-output").style.display = "block"; $("output-section").style.display = "none"; $("profile-output").style.display = "none"; const verdictClass = (v) => { if (v.startsWith("YES") || v === "GO" || v.startsWith("USE SOFT")) return "v-yes"; if (v.startsWith("NO") || v.startsWith("MEMORY")) return "v-no"; return "v-deg"; }; // Collect all unique key_numbers across rows const allKeys = new Set(); cmp.rows.forEach(r => Object.keys(r.key_numbers || {}).forEach(k => allKeys.add(k))); let html = `

Recipe: ${escapeHtml(cmp.recipe_id)} — ${escapeHtml(cmp.recipe_name)}

Shared params: ${escapeHtml(JSON.stringify(cmp.shared_params))}

`; allKeys.forEach(k => html += ``); html += ""; cmp.rows.forEach(r => { const cls = verdictClass(r.verdict); html += ``; html += ``; html += ``; allKeys.forEach(k => { const v = r.key_numbers ? r.key_numbers[k] : null; html += ``; }); html += ""; }); html += `
ModelVerdictReason${escapeHtml(k)}
${escapeHtml(r.label)}${escapeHtml(r.verdict)}${escapeHtml(r.reason)}${v === undefined || v === null ? "—" : (typeof v === "number" ? v.toLocaleString(undefined, { maximumFractionDigits: 2 }) : escapeHtml(String(v)))}
`; $("compare-box").innerHTML = html; if (window.__taf_applyTranslations) window.__taf_applyTranslations(); $("compare-share-btn").addEventListener("click", () => { const params = { recipeId: cmp.recipe_id, T_eval: cmp.shared_params.T_eval, models: cmp.rows.map(r => r.label) }; copyShareLink("compare", params); }); $("compare-download-btn").addEventListener("click", async () => { const filename = await makeFilename("compare", cmp); const data = await exportableData("compare", cmp); downloadJSON(filename, data); $("compare-share-status").textContent = `✅ Downloaded ${filename}`; setTimeout(() => $("compare-share-status").textContent = "", 5000); }); $("compare-download-md-btn").addEventListener("click", async () => { const hash = await inputHash("compare", cmp); const base = (await makeFilename("compare", cmp)).replace(/\.json$/, ""); downloadText(`${base}.md`, compareToMarkdown(cmp, hash), "text/markdown;charset=utf-8"); $("compare-share-status").textContent = `✅ Downloaded ${base}.md`; setTimeout(() => $("compare-share-status").textContent = "", 5000); }); $("compare-download-tex-btn").addEventListener("click", async () => { const hash = await inputHash("compare", cmp); const base = (await makeFilename("compare", cmp)).replace(/\.json$/, ""); downloadText(`${base}.tex`, compareToLatex(cmp, hash), "application/x-tex;charset=utf-8"); $("compare-share-status").textContent = `✅ Downloaded ${base}.tex`; setTimeout(() => $("compare-share-status").textContent = "", 5000); }); $("compare-submit-btn").addEventListener("click", async () => { await submitToRegistry("compare", cmp, $("compare-share-status")); setTimeout(() => $("compare-share-status").textContent = "", 8000); }); } // ════════════════════════════════════════════════════════════════════ // SHARE — encode current state to URL // ════════════════════════════════════════════════════════════════════ function copyShareLink(mode, params) { const url = new URL(window.location.href.split("?")[0]); url.searchParams.set("mode", mode); url.searchParams.set("p", btoa(JSON.stringify(params))); navigator.clipboard.writeText(url.toString()).then( () => { const tgt = $("share-status") || $("profile-share-status") || $("compare-share-status"); if (tgt) { tgt.textContent = "✅ Copied to clipboard!"; setTimeout(() => tgt.textContent = "", 3000); } }, () => alert("Copy failed. Manually copy: " + url.toString()) ); } function parseUrlState() { const params = new URLSearchParams(window.location.search); const mode = params.get("mode"); const pData = params.get("p"); if (!mode || !pData) return; try { const decoded = JSON.parse(atob(pData)); // Switch to right mode tab const btn = document.querySelector(`.mode-btn[data-mode="${mode}"]`); if (btn) btn.click(); // Wait a tick for tab to render setTimeout(() => { if (mode === "profile") { Object.entries(decoded).forEach(([k, v]) => { const map = { theta: "profile-theta", T_train: "profile-T_train", T_eval: "profile-T_eval", n_attention_heads: "profile-n_attn", n_kv_heads: "profile-n_kv", d_head: "profile-d_head", n_layers: "profile-n_layers", n_params: "profile-n_params", has_SWA: "profile-has_swa" }; const id = map[k]; if (id && $(id)) $(id).value = String(v); }); setTimeout(() => $("profile-btn").click(), 200); } // Other modes: future }, 200); } catch (e) { console.warn("Failed to parse URL state:", e); } } // Wire single-recipe share/download/submit buttons $("share-btn").addEventListener("click", () => { if (!state.lastResult) return; copyShareLink(state.lastResult.type || "recipe", state.lastResult.params || {}); }); $("recipe-download-btn").addEventListener("click", async () => { if (!state.lastFullResult) return; const filename = await makeFilename("recipe", state.lastFullResult); const data = await exportableData("recipe", state.lastFullResult); downloadJSON(filename, data); $("share-status").textContent = `✅ Downloaded ${filename}`; setTimeout(() => $("share-status").textContent = "", 5000); }); $("recipe-download-md-btn").addEventListener("click", async () => { if (!state.lastFullResult) return; const r = state.lastFullResult; const hash = await inputHash("recipe", r); const base = (await makeFilename("recipe", r)).replace(/\.json$/, ""); downloadText(`${base}.md`, recipeToMarkdown(r, hash), "text/markdown;charset=utf-8"); $("share-status").textContent = `✅ Downloaded ${base}.md`; setTimeout(() => $("share-status").textContent = "", 5000); }); $("recipe-download-tex-btn").addEventListener("click", async () => { if (!state.lastFullResult) return; const r = state.lastFullResult; const hash = await inputHash("recipe", r); const base = (await makeFilename("recipe", r)).replace(/\.json$/, ""); downloadText(`${base}.tex`, recipeToLatex(r, hash), "application/x-tex;charset=utf-8"); $("share-status").textContent = `✅ Downloaded ${base}.tex`; setTimeout(() => $("share-status").textContent = "", 5000); }); $("recipe-submit-btn").addEventListener("click", async () => { if (!state.lastFullResult) return; await submitToRegistry("recipe", state.lastFullResult, $("share-status")); setTimeout(() => $("share-status").textContent = "", 8000); }); // ════════════════════════════════════════════════════════════════════ // Help modal // ════════════════════════════════════════════════════════════════════ // a11y: focus trap + restore + Esc handling, generalized to any modal that follows // the [role="dialog"] + .open pattern. Each call to wireModal() returns { open, close } // and registers the modal so the global keyboard handler can find the active one. const __modalCloseFns = new Map(); function wireModal(modalId, btnId, closeId) { const modal = $(modalId); if (!modal) return null; let returnFocus = null; const open = () => { returnFocus = document.activeElement; modal.classList.add("open"); modal.setAttribute("aria-hidden", "false"); setTimeout(() => $(closeId)?.focus(), 0); }; const close = () => { modal.classList.remove("open"); modal.setAttribute("aria-hidden", "true"); if (returnFocus && typeof returnFocus.focus === "function") returnFocus.focus(); returnFocus = null; }; $(btnId)?.addEventListener("click", open); $(closeId)?.addEventListener("click", close); modal.addEventListener("click", (e) => { if (e.target.id === modalId) close(); }); __modalCloseFns.set(modalId, close); return { open, close }; } wireModal("help-modal", "help-btn", "help-close"); wireModal("quickstart-modal", "quickstart-btn", "quickstart-close"); wireModal("inventory-modal", "inventory-btn", "inventory-close"); // Quick-start modal "↓ Start now" link should also close the modal so user lands on mode-section. $("qs-start-link")?.addEventListener("click", () => __modalCloseFns.get("quickstart-modal")?.()); // Esc closes whichever modal is open; Tab cycles within it. document.addEventListener("keydown", (e) => { const openModal = document.querySelector('[role="dialog"].open'); if (!openModal) return; if (e.key === "Escape") { e.preventDefault(); __modalCloseFns.get(openModal.id)?.(); return; } if (e.key !== "Tab") return; const focusables = openModal.querySelectorAll( 'a[href], button:not([disabled]), input:not([disabled]), select:not([disabled]), textarea:not([disabled]), [tabindex]:not([tabindex="-1"])' ); if (!focusables.length) return; const first = focusables[0]; const last = focusables[focusables.length - 1]; if (e.shiftKey && document.activeElement === first) { e.preventDefault(); last.focus(); } else if (!e.shiftKey && document.activeElement === last) { e.preventDefault(); first.focus(); } }); // ════════════════════════════════════════════════════════════════════ // SHARING — Download / Upload / Submit to registry // ════════════════════════════════════════════════════════════════════ const REGISTRY_REPO = "karlesmarin/tafagent-registry"; function downloadJSON(filename, data) { const blob = new Blob([JSON.stringify(data, null, 2)], { type: "application/json" }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); setTimeout(() => { document.body.removeChild(a); URL.revokeObjectURL(url); }, 100); } function downloadText(filename, text, mime = "text/plain;charset=utf-8") { const blob = new Blob([text], { type: mime }); const url = URL.createObjectURL(blob); const a = document.createElement("a"); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); setTimeout(() => { document.body.removeChild(a); URL.revokeObjectURL(url); }, 100); } // LaTeX-escape a plain string for inclusion in a tabular cell. function latexEscape(s) { return String(s ?? "") .replace(/\\/g, "\\textbackslash{}") .replace(/[#$%&_{}]/g, m => "\\" + m) .replace(/~/g, "\\textasciitilde{}") .replace(/\^/g, "\\textasciicircum{}") .replace(//g, "\\textgreater{}"); } function profileToLatex(p, hash = "") { const ms = p.model_summary || {}; const kn = p.key_numbers || {}; let tex = `% TAF Profile — auto-generated by TAF Agent\n`; if (hash) tex += `% input hash: #${hash}\n`; tex += `\\begin{table}[ht]\n\\centering\n`; tex += `\\caption{TAF Profile — ${latexEscape(ms.architecture_class || "?")}${hash ? ` (\\#${latexEscape(hash)})` : ""}}\n`; tex += `\\begin{tabular}{lll}\n\\toprule\nRecipe & Verdict & Reason \\\\\n\\midrule\n`; Object.entries(p.recipes || {}).forEach(([rid, r]) => { tex += `${latexEscape(rid)} & ${latexEscape(r.verdict || "")} & ${latexEscape((r.reason || "").slice(0, 80))} \\\\\n`; }); tex += `\\bottomrule\n\\end{tabular}\n\\end{table}\n\n`; tex += `% Key numbers (JSON):\n`; for (const [k, v] of Object.entries(kn)) { tex += `% ${k} = ${typeof v === "object" ? JSON.stringify(v) : v}\n`; } return tex; } function compareToLatex(c, hash = "") { let tex = `% TAF Comparison — ${c.recipe_id} (${c.recipe_name})\n`; if (hash) tex += `% input hash: #${hash}\n`; tex += `\\begin{table}[ht]\n\\centering\n`; tex += `\\caption{TAF Comparison — ${latexEscape(c.recipe_id)} ${latexEscape(c.recipe_name || "")}${hash ? ` (\\#${latexEscape(hash)})` : ""}}\n`; tex += `\\begin{tabular}{lll}\n\\toprule\nModel & Verdict & Reason \\\\\n\\midrule\n`; c.rows.forEach(r => { tex += `${latexEscape(r.label)} & ${latexEscape(r.verdict)} & ${latexEscape((r.reason || "").slice(0, 80))} \\\\\n`; }); tex += `\\bottomrule\n\\end{tabular}\n\\end{table}\n`; return tex; } function recipeToLatex(r, hash = "") { let tex = `% TAF Recipe ${r.recipe_id} — ${r.recipe_name}\n`; if (hash) tex += `% input hash: #${hash}\n`; tex += `\\begin{table}[ht]\n\\centering\n`; tex += `\\caption{TAF Recipe \\texttt{${latexEscape(r.recipe_id)}} — verdict: ${latexEscape(r.verdict)}${hash ? ` (\\#${latexEscape(hash)})` : ""}}\n`; tex += `\\begin{tabular}{rll}\n\\toprule\nStep & Formula & Result \\\\\n\\midrule\n`; (r.chain || []).forEach(s => { tex += `${latexEscape(s.step)} & \\texttt{${latexEscape(s.formula || "")}} & ${latexEscape(formatResultPlain(s.result))} \\\\\n`; }); tex += `\\bottomrule\n\\end{tabular}\n\\end{table}\n\n`; tex += `% Reason: ${latexEscape(r.reason || "")}\n`; if (r.mitigation) tex += `% Mitigation: ${latexEscape(r.mitigation)}\n`; return tex; } // Sort object keys recursively for deterministic JSON function sortKeys(o) { if (Array.isArray(o)) return o.map(sortKeys); if (o && typeof o === "object") { return Object.keys(o).sort().reduce((acc, k) => { acc[k] = sortKeys(o[k]); return acc; }, {}); } return o; } // Compute 8-char hex hash of canonical inputs. // Identical inputs → identical hash (forever). Different inputs → different hash. async function inputHash(type, data) { let canonical; if (type === "profile") { const ms = data.model_summary || data; canonical = sortKeys({ type: "profile", theta: ms.rope_theta ?? ms.theta, T_train: ms.T_train, T_eval: ms.T_eval, n_attn: ms.n_attention_heads ?? ms.n_attn, n_kv: ms.n_kv_heads ?? ms.n_kv, d_head: ms.d_head, n_layers: ms.n_layers, n_params: ms.n_params, has_SWA: ms.has_SWA, }); } else if (type === "compare") { canonical = sortKeys({ type: "compare", recipe: data.recipe_id, T_eval: (data.shared_params || {}).T_eval, models: (data.rows || []).map(r => r.label).sort(), }); } else { canonical = sortKeys({ type: "recipe", recipe: data.recipe_id, inputs: data.inputs || {}, }); } const text = JSON.stringify(canonical); const buf = new TextEncoder().encode(text); const hashBuf = await crypto.subtle.digest("SHA-256", buf); return Array.from(new Uint8Array(hashBuf)).slice(0, 4) .map(b => b.toString(16).padStart(2, "0")).join(""); } function safeFilename(s) { return String(s).replace(/[/\\?%*:|"<>]/g, "-").replace(/^-+|-+$/g, "").slice(0, 60); } function modelShortName(data, fallback="model") { // Try to get from various places if (state.lastModelId) return safeFilename(state.lastModelId); if (data && data.model_summary) { const ms = data.model_summary; return safeFilename(`m${ms.n_params || 0}-θ${ms.rope_theta || 0}`); } if (data && data.inputs) { const i = data.inputs; return safeFilename(`m${i.n_params || ""}-θ${i.theta || ""}`); } return fallback; } async function exportableData(type, data) { const hash = await inputHash(type, data); return { _taf_export: true, _taf_type: type, _taf_version: "0.2", _taf_input_hash: hash, // identical inputs ⇒ identical hash _taf_timestamp: new Date().toISOString(), payload: data, }; } async function makeFilename(type, data) { const hash = await inputHash(type, data); const name = modelShortName(data); let suffix; if (type === "profile" && data.model_summary?.T_eval) suffix = `T${data.model_summary.T_eval}`; else if (type === "compare" && data.shared_params?.T_eval) suffix = `T${data.shared_params.T_eval}`; else if (type === "recipe" && data.inputs?.T_eval) suffix = `T${data.inputs.T_eval}`; else suffix = data.recipe_id || "result"; return `taf-${type}-${name}-${suffix}-${hash}.json`; } // v0.6 privacy fix: previously placed full JSON body in URL params → GH server logs + // referer headers captured user data. Now copy body to clipboard, open issue page // with title only, user pastes body manually. Title is non-sensitive (model name + // hash). On clipboard failure, fall back to console log so user can grab body. async function submitToRegistry(type, data, statusEl) { const hash = await inputHash(type, data); const modelName = modelShortName(data, "model"); let title, body; if (type === "profile") { const ms = data.model_summary || {}; title = `[TAF Profile] ${modelName} @ T=${ms.T_eval || "?"} #${hash}`; body = profileToMarkdown(data, hash); } else if (type === "compare") { title = `[TAF Compare] ${data.recipe_id} × ${data.rows.length} models #${hash}`; body = compareToMarkdown(data, hash); } else { title = `[TAF ${data.recipe_id}] ${modelName} → ${data.verdict} #${hash}`; body = recipeToMarkdown(data, hash); } const dedupNote = `\n\n> **Input hash**: \`#${hash}\` — search this hash in registry issues to find independent verifications. Same inputs always produce the same hash.`; const fullBody = body + dedupNote + "\n\n---\n*Submitted via [TAF Agent](https://karlesmarin.github.io/tafagent)*"; let clipboardOk = false; try { await navigator.clipboard.writeText(fullBody); clipboardOk = true; } catch (e) { console.warn("Clipboard write failed; body logged below:", e); console.log("[TAF Agent] Issue body to paste:\n\n" + fullBody); } // Title-only URL — body intentionally omitted to avoid leaking via GH server logs / referer. const params = new URLSearchParams({ title }); window.open(`https://github.com/${REGISTRY_REPO}/issues/new?${params.toString()}`, "_blank"); if (statusEl) { statusEl.textContent = clipboardOk ? (t("share.submit_clip_ok") || "↗ Opened GitHub. Body copied to clipboard — paste it into the issue body.") : (t("share.submit_clip_fail") || "↗ Opened GitHub. Clipboard blocked — body logged in browser console (F12)."); } } function profileToMarkdown(p, hash="") { const ms = p.model_summary || {}; const kn = p.key_numbers || {}; let md = `## TAF Profile`; if (hash) md += ` \`#${hash}\``; md += `\n\n`; md += `**Architecture**: ${ms.architecture_class || "?"}\n`; md += `**Params**: ${ms.n_params}, **T_train**: ${ms.T_train}, **T_eval**: ${ms.T_eval}\n`; md += `**θ**: ${ms.rope_theta}, GQA=${ms.has_GQA}, SWA=${ms.has_SWA}\n\n`; md += `### Recipes\n\n`; Object.entries(p.recipes || {}).forEach(([rid, r]) => { md += `- **${rid}** (${r.name || ""}): ${r.verdict} — ${r.reason}\n`; }); md += `\n### Key numbers\n\n\`\`\`json\n${JSON.stringify(kn, null, 2)}\n\`\`\`\n`; md += `\n### Full data\n\n
Click to expand\n\n\`\`\`json\n${JSON.stringify(p, null, 2)}\n\`\`\`\n\n
\n`; return md; } function compareToMarkdown(c, hash="") { let md = `## TAF Comparison — ${c.recipe_id} (${c.recipe_name})`; if (hash) md += ` \`#${hash}\``; md += `\n\n`; md += `**Shared params**: \`${JSON.stringify(c.shared_params)}\`\n\n`; md += `| Model | Verdict | Reason |\n|-------|---------|--------|\n`; c.rows.forEach(r => { md += `| ${r.label} | ${r.verdict} | ${r.reason.slice(0, 80)}${r.reason.length > 80 ? "..." : ""} |\n`; }); md += `\n
Full data\n\n\`\`\`json\n${JSON.stringify(c, null, 2)}\n\`\`\`\n\n
\n`; return md; } function recipeToMarkdown(r, hash="") { let md = `## TAF Recipe ${r.recipe_id} — ${r.recipe_name}`; if (hash) md += ` \`#${hash}\``; md += `\n\n`; md += `**Verdict**: ${r.verdict}\n`; md += `**Reason**: ${r.reason}\n`; if (r.mitigation) md += `**Action**: ${r.mitigation}\n`; md += `\n### Inputs\n\n\`\`\`json\n${JSON.stringify(r.inputs, null, 2)}\n\`\`\`\n`; md += `\n### Computation chain\n\n`; (r.chain || []).forEach(s => { md += `**Step ${s.step} ${s.section}** — ${s.name}: \`${s.formula}\` → ${formatResultPlain(s.result)}\n`; }); md += `\n
Full data\n\n\`\`\`json\n${JSON.stringify(r, null, 2)}\n\`\`\`\n\n
\n`; return md; } function importJSON(file, statusEl) { const reader = new FileReader(); reader.onload = (e) => { try { const data = JSON.parse(e.target.result); if (!data._taf_export) { statusEl.innerHTML = "❌ Not a TAF export file (missing _taf_export marker)."; return; } const type = data._taf_type; const payload = data.payload; if (type === "profile") { renderProfile(payload, payload.model_summary || {}); statusEl.innerHTML = `✅ Profile loaded (${data._taf_timestamp || "?"})`; } else if (type === "compare") { renderCompare(payload); statusEl.innerHTML = `✅ Comparison loaded (${data._taf_timestamp || "?"})`; } else if (type === "recipe") { renderResult(payload); $("output-section").style.display = "block"; statusEl.innerHTML = `✅ Recipe result loaded (${data._taf_timestamp || "?"})`; } else { statusEl.innerHTML = `❌ Unknown TAF type: ${type}`; } } catch (err) { statusEl.innerHTML = `❌ Failed to parse JSON: ${err.message}`; } }; reader.readAsText(file); } // Wire import button (always available) document.addEventListener("DOMContentLoaded", () => { const importBtn = document.getElementById("import-btn"); const importFile = document.getElementById("import-file"); if (importBtn && importFile) { importBtn.addEventListener("click", () => importFile.click()); importFile.addEventListener("change", (e) => { const file = e.target.files[0]; if (file) importJSON(file, document.getElementById("import-status")); }); } // Lean+Mathlib manifest — load in parallel with everything else; badges // appear once it resolves, but app stays usable if it fails. loadLeanManifest().catch(err => console.warn("Lean manifest unavailable:", err)); }); // ════════════════════════════════════════════════════════════════════ // Language switcher // ════════════════════════════════════════════════════════════════════ document.querySelectorAll(".lang-btn").forEach(btn => { btn.addEventListener("click", () => setLang(btn.dataset.lang)); }); // ════════════════════════════════════════════════════════════════════ // 📈 Benchmark Saturation Detector (v0.8.0 anti-bullshit pack #6) // ════════════════════════════════════════════════════════════════════ const SATURATION_VERDICT_COLOR = { saturated: "#f85149", near_saturated: "#d29922", discriminative: "#3fb950", sparse_data: "#8b949e", unknown_benchmark: "#8b949e", }; let __saturationInited = false; async function initSaturation() { if (__saturationInited) return; __saturationInited = true; try { await loadSaturationKB(); } catch (e) { $("saturation-status").textContent = (t("saturation.status.kb_fail") || "⚠ Could not load saturation KB.") + " " + (e.message || e); return; } const sel = $("saturation-select"); if (sel) { sel.innerHTML = ""; const allOpt = document.createElement("option"); allOpt.value = "__all__"; allOpt.textContent = t("saturation.select.all") || "— show all benchmarks —"; sel.appendChild(allOpt); listBenchmarks().forEach(name => { const opt = document.createElement("option"); opt.value = name; opt.textContent = name; sel.appendChild(opt); }); } // Try live fetch in the background; results that come back update _liveData. // If CORS / network fails the tool transparently uses the baked snapshot. tryFetchLive().then(live => { if (live) { $("saturation-status").textContent = tFmt("saturation.status.live", { count: live.model_count || (live.models?.length ?? 0) }); } else { $("saturation-status").textContent = t("saturation.status.baked") || "ℹ Using baked snapshot (live fetch unavailable)."; } }); } function renderSaturationCard(result) { if (result.code === "unknown_benchmark") { return `
${t("saturation.unknown") || "Unknown benchmark."}
`; } const color = SATURATION_VERDICT_COLOR[result.code] || "#8b949e"; const verdictLabel = t(`saturation.verdict.${result.code}`) || result.code; const top3Rows = (result.top3 || []) .filter(x => typeof x.score === "number") .map((x, i) => `${i + 1}${x.model}${x.score.toFixed(1)}`) .join(""); const recoItems = (result.recommendations || []) .map(r => `
  • ${r}
  • `) .join(""); const borderlineNote = result.borderline ? `

    ⚠ ${t("saturation.borderline") || "Borderline — within ±1pp of a threshold cutoff. Treat verdict as 'check carefully'."}

    ` : ""; const sourceTag = result.source === "live" ? `live` : (result.source === "baked_consensus" ? `consensus` : `baked`); const spreadStr = result.params.spread != null ? `${result.params.spread.toFixed(1)} pp` : "n/a"; const meanStr = result.params.mean != null ? `${result.params.mean.toFixed(1)}%` : "n/a"; return `
    ${result.params.name} — ${verdictLabel} ${sourceTag}
    ${t("saturation.col.spread") || "Top-3 spread"}${spreadStr}
    ${t("saturation.col.mean") || "Top-3 mean"}${meanStr}
    ${t("saturation.col.n") || "Models"}${result.params.n || 0}
    ${borderlineNote}
    ${top3Rows ? `
    ${t("saturation.section.top3") || "Top-3 frontier scores"} ${top3Rows}
    # ${t("saturation.col.model") || "Model"} ${t("saturation.col.score") || "Score"}
    ` : ""} ${recoItems ? `
    ${t("saturation.section.recommendations") || "Recommended alternatives"}
      ${recoItems}
    ` : ""} ${result.note ? `
    ${t("saturation.section.note") || "Notes"}

    ${result.note}

    ` : ""}
    `; } function renderSaturationAll(results) { const rows = results.map(r => { if (r.code === "unknown_benchmark") return ""; const color = SATURATION_VERDICT_COLOR[r.code] || "#8b949e"; const verdictLabel = t(`saturation.verdict.${r.code}`) || r.code; const spread = r.params.spread != null ? r.params.spread.toFixed(1) + " pp" : "—"; const mean = r.params.mean != null ? r.params.mean.toFixed(1) + "%" : "—"; const reco = (r.recommendations || []).slice(0, 2).join(", ") || "—"; const borderlineMark = r.borderline ? " ⚠" : ""; return ` ${r.params.name} ${spread} ${mean} ${verdictLabel}${borderlineMark} ${reco} `; }).join(""); return `
    ${t("saturation.section.all") || "All tracked benchmarks"} ${rows}
    ${t("saturation.col.bench") || "Benchmark"} ${t("saturation.col.spread") || "Spread"} ${t("saturation.col.mean") || "Mean"} ${t("saturation.col.verdict") || "Verdict"} ${t("saturation.col.reco") || "Top reco"}
    `; } function runSaturationOne() { const sel = $("saturation-select"); const name = sel?.value; if (!name || name === "__all__") { runSaturationAll(); return; } const result = classifyBenchmark(name); $("saturation-output").innerHTML = renderSaturationCard(result); $("saturation-status").textContent = tFmt("saturation.status.done", { name, verdict: t(`saturation.verdict.${result.code}`) || result.code, }); } function runSaturationAll() { const results = classifyAll(); $("saturation-output").innerHTML = renderSaturationAll(results); $("saturation-status").textContent = tFmt("saturation.status.all_done", { n: results.length }); } $("saturation-run-btn")?.addEventListener("click", runSaturationOne); $("saturation-all-btn")?.addEventListener("click", runSaturationAll); // ════════════════════════════════════════════════════════════════════ // 🧭 Solutions Hub (v0.8.1) — integrator portal // ════════════════════════════════════════════════════════════════════ const HUB_TYPE_BADGE = { tool: "🔧", leaderboard: "📊", paper: "📄", article: "📝", docs: "📘", issue: "🐛", spec: "📐", benchmark: "🧪", }; let __hubInited = false; async function initHub() { if (__hubInited) return; __hubInited = true; try { await loadHub(); } catch (e) { $("hub-status").textContent = (t("hub.status.fail") || "⚠ Could not load Solutions Hub.") + " " + (e.message || e); return; } const stats = hubStats(); $("hub-status").textContent = tFmt("hub.status.loaded", stats); renderHubAll(); } function renderEntry(e) { const modeBadge = e.tafagent_mode ? `${e.tafagent_mode}` : (e.tafagent_planned_mode ? `${t("hub.planned") || "planned:"} ${e.tafagent_planned_mode}` : `${t("hub.no_mode") || "external"}`); const tools = (e.external_tools || []) .map(tl => { const icon = HUB_TYPE_BADGE[tl.type] || "🔗"; return `
  • ${icon} ${tl.name} (${tl.type})
  • `; }) .join(""); const bestFor = e.best_for ? `

    ${t("hub.best_for") || "Best for"}: ${e.best_for}

    ` : ""; const notFor = e.not_for ? `

    ${t("hub.not_for") || "Not for"}: ${e.not_for}

    ` : ""; return `
    ${e.pain} ${modeBadge} ${bestFor} ${notFor} ${tools ? `

    ${t("hub.tools") || "External tools"}:

      ${tools}
    ` : ""}
    `; } function renderHubAll() { const cats = listCategories(); const html = cats.map(c => { const entries = listEntries(c.key); if (entries.length === 0) return ""; const inner = entries.map(renderEntry).join(""); return `
    ${c.icon} ${c.label} (${c.count})

    ${c.description}

    ${inner}
    `; }).join(""); $("hub-output").innerHTML = `
    ${html}
    `; } function renderHubSearch(query) { const matches = searchEntries(query); if (matches.length === 0) { $("hub-output").innerHTML = `

    ${tFmt("hub.search.empty", { query })}

    `; return; } const html = matches.map(renderEntry).join(""); $("hub-output").innerHTML = `

    ${tFmt("hub.search.results", { n: matches.length, query })}

    ${html}
    `; } let __hubSearchTimer = null; $("hub-search")?.addEventListener("input", (e) => { clearTimeout(__hubSearchTimer); const q = e.target.value; __hubSearchTimer = setTimeout(() => { if (!q.trim()) renderHubAll(); else renderHubSearch(q); }, 200); }); $("hub-clear-btn")?.addEventListener("click", () => { $("hub-search").value = ""; renderHubAll(); }); // ════════════════════════════════════════════════════════════════════ // 📋 JSON CoT-aware Linter (v0.8.2 anti-bullshit pack #8) // ════════════════════════════════════════════════════════════════════ const COT_FIELD_TYPE_BADGE = { reasoning: "🧠", answer: "🎯", other: "·", }; const COT_VERDICT_BADGE_BG = { good_order: "#3fb950", // green anti_pattern: "#f85149", // red missing_reasoning: "#d29922", // amber missing_answer: "#d29922", // amber no_cot_fields: "#8b949e", // gray non_object: "#8b949e", empty_fields: "#8b949e", invalid_json: "#f85149", // red }; let __cotInited = false; function initCot() { if (__cotInited) return; __cotInited = true; // No-op (no async data); placeholder kept for symmetry with other modes. } function renderCotResult(result, originalText) { const verdict = t(`cot.verdict.${result.code}`) || result.code; const verdictBg = COT_VERDICT_BADGE_BG[result.code] || "#8b949e"; const verdictBadge = `${verdict}`; // Failure cases short-circuit: just show the verdict + reason. if (result.code === "invalid_json") { const reason = result.params?.error || ""; return `

    ${verdictBadge}

    ${escapeHtml(reason)}
    `; } if (result.code === "empty_fields" || result.code === "non_object") { return `

    ${verdictBadge}

    ${t(`cot.hint.${result.code}`) || ""}

    `; } const fields = result.params?.fields || []; const fieldRows = fields.map(f => { const icon = COT_FIELD_TYPE_BADGE[f.type] || "·"; const typeLabel = t(`cot.field.${f.type}`) || f.type; const color = f.type === "reasoning" ? "#3fb950" : f.type === "answer" ? "#f0883e" : "#8b949e"; return ` ${f.idx} ${escapeHtml(f.name)} ${icon} ${typeLabel} `; }).join(""); const fieldTable = ` ${fieldRows}
    # Field Type
    `; // Suggested-fix block — only when there's a meaningful reorder. let fixBlock = ""; if (result.code === "anti_pattern") { const suggested = result.params?.suggested_order || []; const fixed = reorderJsonText(originalText, suggested); if (fixed) { fixBlock = `
    ${t("cot.suggested_fix.title") || "✓ Suggested fix"}

    ${t("cot.suggested_fix.desc") || ""}

    ${escapeHtml(fixed)}
    `; } } // Verdict explainer const explainer = t(`cot.explain.${result.code}`) || ""; const explainerBlock = explainer ? `

    ${explainer}

    ` : ""; // Source attribution footer const attribution = `

    ${t("cot.attribution") || ""} collinwilkins.com · JSONSchemaBench · llguidance

    `; return `

    ${verdictBadge} (${tFmt("cot.field_count", { n: result.params.field_count }) || `${result.params.field_count} fields`})

    ${explainerBlock} ${fieldTable} ${fixBlock} ${attribution}
    `; } function runCotLint() { const text = $("cot-input")?.value || ""; const result = lintJsonCot(text); $("cot-output").innerHTML = renderCotResult(result, text); $("cot-status").textContent = tFmt("cot.status.done", { verdict: t(`cot.verdict.${result.code}`) || result.code, }); } const COT_EXAMPLE_GOOD = JSON.stringify({ type: "object", properties: { reasoning: { type: "string", description: "Step-by-step rationale before committing to an answer.", }, answer: { type: "string", description: "Final answer, derived from the reasoning above.", }, }, required: ["reasoning", "answer"], }, null, 2); const COT_EXAMPLE_BAD = JSON.stringify({ type: "object", properties: { final_answer: { type: "string", description: "The model's final answer.", }, chain_of_thought: { type: "string", description: "Justification for the answer above.", }, }, required: ["final_answer", "chain_of_thought"], }, null, 2); $("cot-lint-btn")?.addEventListener("click", runCotLint); $("cot-example-good-btn")?.addEventListener("click", () => { $("cot-input").value = COT_EXAMPLE_GOOD; runCotLint(); }); $("cot-example-bad-btn")?.addEventListener("click", () => { $("cot-input").value = COT_EXAMPLE_BAD; runCotLint(); }); // ════════════════════════════════════════════════════════════════════ // 🔧 PEFT Anti-Pattern Checker (v0.8.3 anti-bullshit pack #9) // ════════════════════════════════════════════════════════════════════ const PEFT_SEVERITY_BG = { error: "#f85149", warning: "#d29922", info: "#58a6ff", }; const PEFT_VERDICT_BG = { errors_found: "#f85149", warnings_only: "#d29922", info_only: "#58a6ff", clean: "#3fb950", no_peft_calls: "#8b949e", empty_input: "#8b949e", }; let __peftInited = false; function initPeft() { if (__peftInited) return; __peftInited = true; // No-op (no async data); placeholder kept for symmetry with other modes. } function renderPeftFinding(f) { const sevBg = PEFT_SEVERITY_BG[f.severity] || "#8b949e"; const sevBadge = `${f.severity.toUpperCase()}`; const ruleLabel = t(`peft.rule.${f.rule}.label`) || f.rule; const lineLabel = f.line != null ? `${tFmt("peft.line", { n: f.line }) || `line ${f.line}`}` : ""; const explainer = t(`peft.rule.${f.rule}.explain`) || ""; const fixHint = t(`peft.rule.${f.rule}.fix`) || ""; // Per-rule rendering details let detail = ""; if (f.rule === "silent_base_load") { detail = `

    ${escapeHtml(f.params.checkpoint_hint)} ${t("peft.detected_at_line") || "appears at line"} ${f.params.checkpoint_line}

    ${t("peft.suggested_fix") || "Suggested:"} ${escapeHtml(f.params.fix)}

    `; } else if (f.rule === "qlora_order") { detail = `

    ${tFmt("peft.qlora_order.detail", f.params) || `prepare_model_for_kbit_training (line ${f.params.prepare_line}) runs AFTER get_peft_model (line ${f.params.get_peft_model_line}). Reverse the order.`}

    `; } else if (f.rule === "target_modules_mismatch") { detail = `

    ${t("peft.detected_arch") || "Detected arch"}: ${escapeHtml(f.params.detected_arch)} ${t("peft.from_model_id") || "(from model id"} ${escapeHtml(f.params.detected_from)})

    ${t("peft.your_modules") || "Your target_modules"}: ${escapeHtml(f.params.user_modules.join(", "))}

    ${t("peft.expected_modules") || "Expected for this arch"}: ${escapeHtml(f.params.expected_modules.join(", "))}

    ${tFmt("peft.match_ratio", f.params) || `${f.params.hits} of ${f.params.total} match.`}

    `; } else if (f.rule === "alpha_not_2r") { detail = `

    r=${f.params.r}, lora_alpha=${f.params.lora_alpha} → ${t("peft.ratio") || "ratio"} ${f.params.ratio}× (${t("peft.alpha.convention") || "convention is α=2r or α=r"})

    `; } else if (f.rule === "no_peft_calls") { detail = `

    ${t("peft.no_peft_calls.detail") || "No get_peft_model / PeftModel.from_pretrained / LoraConfig calls detected. Paste a PEFT/LoRA setup snippet."}

    `; } return `
    ${sevBadge} ${ruleLabel} ${lineLabel} ${explainer ? `

    ${explainer}

    ` : ""} ${detail} ${fixHint ? `

    ${fixHint}

    ` : ""}
    `; } function renderPeftResult(result) { const verdict = t(`peft.verdict.${result.code}`) || result.code; const verdictBg = PEFT_VERDICT_BG[result.code] || "#8b949e"; const verdictBadge = `${verdict}`; const findings = result.findings || []; const findingsHtml = findings.map(renderPeftFinding).join(""); const summary = result.summary ? `

    ${tFmt("peft.summary", result.summary) || `${result.summary.total} finding(s)`}

    ` : ""; // Source attribution const attribution = `

    ${t("peft.attribution") || "Refs:"} peft #2115 · PEFT troubleshooting · get_layer_status / get_model_status

    `; return `

    ${verdictBadge}

    ${summary} ${findingsHtml} ${attribution}
    `; } function runPeftLint() { const text = $("peft-input")?.value || ""; const result = lintPeftCode(text); $("peft-output").innerHTML = renderPeftResult(result); $("peft-status").textContent = tFmt("peft.status.done", { verdict: t(`peft.verdict.${result.code}`) || result.code, n: result.findings?.length || 0, }); } const PEFT_EXAMPLE_BUG = `from peft import LoraConfig, get_peft_model from transformers import AutoModelForCausalLM base = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3-8B") config = LoraConfig( r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], ) model = get_peft_model(base, config) # resume from saved checkpoint? model.load_state_dict("./outputs/checkpoint-1000/adapter_model.bin") `; const PEFT_EXAMPLE_QLORA = `from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training from transformers import AutoModelForCausalLM, BitsAndBytesConfig bnb = BitsAndBytesConfig(load_in_4bit=True) base = AutoModelForCausalLM.from_pretrained( "meta-llama/Llama-3-8B", quantization_config=bnb, ) config = LoraConfig(r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"]) model = get_peft_model(base, config) # WRONG ORDER: prepare_model_for_kbit_training must come BEFORE get_peft_model model = prepare_model_for_kbit_training(model) `; const PEFT_EXAMPLE_CLEAN = `from peft import PeftModel from transformers import AutoModelForCausalLM base = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3-8B") # Resume from saved adapter — correct PEFT pattern. model = PeftModel.from_pretrained(base, "./outputs/checkpoint-1000") `; $("peft-lint-btn")?.addEventListener("click", runPeftLint); $("peft-example-bug-btn")?.addEventListener("click", () => { $("peft-input").value = PEFT_EXAMPLE_BUG; runPeftLint(); }); $("peft-example-qlora-btn")?.addEventListener("click", () => { $("peft-input").value = PEFT_EXAMPLE_QLORA; runPeftLint(); }); $("peft-example-clean-btn")?.addEventListener("click", () => { $("peft-input").value = PEFT_EXAMPLE_CLEAN; runPeftLint(); }); // ════════════════════════════════════════════════════════════════════ // 🔁 Prompt-Cache Diff Predictor (v0.8.4 anti-bullshit pack #10) // ════════════════════════════════════════════════════════════════════ const CACHE_VERDICT_BG = { identical: "#3fb950", divergent_can_cache: "#d29922", divergent_below_min: "#f0883e", fully_divergent: "#f85149", empty_input: "#8b949e", }; let __cacheInited = false; function initCacheDiff() { if (__cacheInited) return; __cacheInited = true; // No-op (no async data); placeholder kept for symmetry. } function fmtUsd(n) { if (n == null || isNaN(n)) return "—"; if (n === 0) return "$0"; if (n < 0.01) return `$${n.toFixed(6)}`; if (n < 1) return `$${n.toFixed(4)}`; return `$${n.toFixed(2)}`; } function fmtPct(n) { if (n == null || isNaN(n)) return "—"; return `${Math.round(n * 100)}%`; } function renderCacheProvider(p) { const bgRow = p.reason === "below_min" ? "#21262d" : "#161b22"; const noteHtml = []; if (p.requires_explicit && p.reason !== "below_min") { noteHtml.push(`${t("cache.note.requires_marker") || "(requires cache_control marker)"}`); } if (p.reason === "below_min") { noteHtml.push(`${tFmt("cache.note.below_min", { min: p.min_cache_tokens.toLocaleString() }) || `(prefix < ${p.min_cache_tokens.toLocaleString()} tokens — provider min)`}`); } const noteCell = noteHtml.length ? `
    ${noteHtml.join(" ")}` : ""; const ttlMin = p.cache_ttl_seconds >= 3600 ? `${Math.round(p.cache_ttl_seconds / 3600)}h` : `${Math.round(p.cache_ttl_seconds / 60)}min`; const savingsColor = p.savings_usd > 0 ? "#3fb950" : (p.reason ? "#8b949e" : "#d29922"); const writeRow = p.cache_write_surcharge_usd && p.cache_write_surcharge_usd > 0 ? `${tFmt("cache.write_surcharge", { cost: fmtUsd(p.cache_write_surcharge_usd) }) || `+ ${fmtUsd(p.cache_write_surcharge_usd)} cache-write surcharge first time (Anthropic)`}` : ""; return ` ${escapeHtml(p.provider_name)}${noteCell}
    TTL ${ttlMin} ${fmtPct(p.hit_ratio)} ${fmtUsd(p.base_cost_usd)} → ${fmtUsd(p.cached_cost_usd)} ${fmtUsd(p.savings_usd)} (${fmtPct(p.savings_pct ?? 0)}) ${writeRow} `; } function renderCacheDiffVisualization(oldText, newText, lcpChars) { // Truncate context — show last 200 chars of common prefix, and the // first 200 chars of each diverging suffix. Keeps UI tight. const ctxBefore = 200; const startCommon = Math.max(0, lcpChars - ctxBefore); const commonTail = oldText.slice(startCommon, lcpChars); const oldDiv = oldText.slice(lcpChars); const newDiv = newText.slice(lcpChars); const commonLeader = startCommon > 0 ? "…" : ""; return `
    ${t("cache.diff.title") || "Where the cache breaks"}
    ${escapeHtml(commonLeader + commonTail)}${escapeHtml(oldDiv.slice(0, 200))} ← old ${escapeHtml(commonLeader + commonTail)}${escapeHtml(newDiv.slice(0, 200))} ← new

    ${t("cache.diff.legend") || "Green = shared prefix (cacheable). Red = first edit (everything from here is re-billed)."}

    `; } function renderCacheResult(result, oldText, newText) { const verdict = t(`cache.verdict.${result.code}`) || result.code; const verdictBg = CACHE_VERDICT_BG[result.code] || "#8b949e"; const verdictBadge = `${verdict}`; if (result.code === "empty_input") { return `

    ${verdictBadge}

    ${t("cache.hint.empty") || "Paste two prompts, then Predict."}

    `; } const p = result.params; const summary = `

    ${tFmt("cache.summary.tokens", { common: p.tokens_common.toLocaleString(), total: p.tokens_total.toLocaleString(), pct: Math.round(p.hit_ratio * 100) }) || `Common prefix ${p.tokens_common.toLocaleString()} / ${p.tokens_total.toLocaleString()} tokens (${Math.round(p.hit_ratio * 100)}% theoretical hit ratio).`}

    ${tFmt("cache.summary.diff_at", { line: p.diff_point.line }) || `First difference at line ${p.diff_point.line}.`}

    `; const rows = (result.providers || []).map(renderCacheProvider).join(""); const table = rows ? ` ${rows}
    ${t("cache.col.provider") || "Provider"} ${t("cache.col.hit") || "Hit"} ${t("cache.col.cost") || "Base → cached"} ${t("cache.col.savings") || "Savings"}
    ` : ""; const diffViz = result.code !== "identical" ? renderCacheDiffVisualization(oldText, newText, p.lcp_chars) : ""; const attribution = `

    ${t("cache.attribution") || "Refs:"} Anthropic prompt caching · OpenAI prompt caching · Gemini context caching
    ${t("cache.attribution.snapshot") || "Prices snapshot 2026-01; verify against current provider docs before acting on $."}

    `; return `

    ${verdictBadge}

    ${summary} ${table} ${diffViz} ${attribution}
    `; } function runCacheDiff() { const oldText = $("cache-old")?.value || ""; const newText = $("cache-new")?.value || ""; const profile = $("cache-profile")?.value || "english"; const outputTokens = parseInt($("cache-output-tokens")?.value || "500", 10); const result = diffPromptCache(oldText, newText, { profile, outputTokensEstimate: outputTokens, }); $("cache-output").innerHTML = renderCacheResult(result, oldText, newText); $("cache-status").textContent = tFmt("cache.status.done", { verdict: t(`cache.verdict.${result.code}`) || result.code, hit: Math.round((result.params?.hit_ratio || 0) * 100), }); } const CACHE_LONG_SYS = "You are a helpful, harmless, and honest assistant. " + "Always cite your sources. ".repeat(40) + "Always show your reasoning step by step. ".repeat(40) + "Be concise. Format code with backticks. ".repeat(40) + "\n\nUser tools available:\n- search\n- calculator\n- code_runner\n"; const CACHE_EXAMPLE_GOOD_OLD = CACHE_LONG_SYS + "\nUser: What is 2 + 2?"; const CACHE_EXAMPLE_GOOD_NEW = CACHE_LONG_SYS + "\nUser: What is 2 + 3?"; const CACHE_EXAMPLE_BROKEN_OLD = CACHE_LONG_SYS.replace("helpful, harmless, and honest", "helpful AND honest") + "\nUser: What is 2 + 2?"; const CACHE_EXAMPLE_BROKEN_NEW = CACHE_LONG_SYS + "\nUser: What is 2 + 2?"; const CACHE_EXAMPLE_BELOWMIN_OLD = "Q: name 3 colors"; const CACHE_EXAMPLE_BELOWMIN_NEW = "Q: name 4 colors"; $("cache-diff-btn")?.addEventListener("click", runCacheDiff); $("cache-example-good-btn")?.addEventListener("click", () => { $("cache-old").value = CACHE_EXAMPLE_GOOD_OLD; $("cache-new").value = CACHE_EXAMPLE_GOOD_NEW; runCacheDiff(); }); $("cache-example-broken-btn")?.addEventListener("click", () => { $("cache-old").value = CACHE_EXAMPLE_BROKEN_OLD; $("cache-new").value = CACHE_EXAMPLE_BROKEN_NEW; runCacheDiff(); }); $("cache-example-belowmin-btn")?.addEventListener("click", () => { $("cache-old").value = CACHE_EXAMPLE_BELOWMIN_OLD; $("cache-new").value = CACHE_EXAMPLE_BELOWMIN_NEW; runCacheDiff(); }); // ════════════════════════════════════════════════════════════════════ // 🔬 Speculative-Decode Compatibility (v0.8.5 anti-bullshit pack #11) // ════════════════════════════════════════════════════════════════════ const SPEC_VERDICT_BG = { compatible: "#3fb950", compatible_with_caveats: "#3fb950", partial_compatible: "#d29922", type_mismatch: "#f85149", vocab_size_mismatch: "#f85149", incompatible: "#f85149", fetch_failed: "#8b949e", identical_models: "#58a6ff", missing_input: "#8b949e", }; let __specInited = false; function initSpeculative() { if (__specInited) return; __specInited = true; // No-op (no async preload); placeholder kept for symmetry. } function fmtParams(p) { if (!p) return "—"; if (p >= 1e9) return `${(p / 1e9).toFixed(1)}B`; if (p >= 1e6) return `${(p / 1e6).toFixed(1)}M`; return p.toLocaleString(); } function renderSpecResult(result) { const verdict = t(`speculative.verdict.${result.code}`) || result.code; const verdictBg = SPEC_VERDICT_BG[result.code] || "#8b949e"; const verdictBadge = `${verdict}`; // Failure-mode short-circuits if (result.code === "missing_input" || result.code === "identical_models") { return `

    ${verdictBadge}

    ${t(`speculative.hint.${result.code}`) || ""}

    `; } if (result.code === "fetch_failed") { const errs = (result.errors || []).map(e => { const sideLabel = e.side === "target" ? (t("speculative.side.target") || "Target") : (t("speculative.side.draft") || "Draft"); const reason = t(`speculative.fetch_error.${e.error}`) || e.error; return `
  • ${sideLabel}: ${reason}${e.status ? ` (HTTP ${e.status})` : ""}
  • `; }).join(""); return `

    ${verdictBadge}

      ${errs}

    ${t("speculative.fetch_error.hint") || "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth."}

    `; } const p = result.params; // Mirror banner — when a gated model was fetched via an open mirror. let mirrorBanner = ""; if (p.target_via_mirror || p.draft_via_mirror) { const lines = []; if (p.target_via_mirror) { lines.push(tFmt("speculative.mirror.target_used", { original: escapeHtml(p.targetId), mirror: escapeHtml(p.target_via_mirror), }) || `Target was gated; used mirror ${escapeHtml(p.target_via_mirror)}.`); } if (p.draft_via_mirror) { lines.push(tFmt("speculative.mirror.draft_used", { original: escapeHtml(p.draftId), mirror: escapeHtml(p.draft_via_mirror), }) || `Draft was gated; used mirror ${escapeHtml(p.draft_via_mirror)}.`); } mirrorBanner = `
    ℹ ${t("speculative.mirror.heading") || "Open-mirror fallback"} ${lines.map(l => `
    ${l}`).join("")}
    ${t("speculative.mirror.warn") || "Mirror tokenizers (e.g. unsloth/) are usually byte-identical to the gated original because quantization touches weights, not tokens. Verify chat-template if exact match is required."}
    `; } // Section 1 — vocab summary const typeBadge = (label, val, bg) => `${label}: ${val ?? "—"}`; const typeRow = ` ${typeBadge(t("speculative.target_label_short") || "target", p.target_type, p.type_match ? "#3fb950" : "#f85149")} ${typeBadge(t("speculative.draft_label_short") || "draft", p.draft_type, p.type_match ? "#3fb950" : "#f85149")} ${p.type_match ? "" : ` ← ${t("speculative.type_mismatch_note") || "tokenizer types differ; spec-dec impossible"}`} `; const sizeRow = ` ${t("speculative.vocab_size") || "Vocab size"}: target = ${p.target_vocab_size.toLocaleString()}, draft = ${p.draft_vocab_size.toLocaleString()} ${p.vocab_size_match ? "" : ` ← ${t("speculative.size_diff") || "differ — every reused id is a misalignment"}`} `; // Sampled match const matchPct = p.sampled_total > 0 ? Math.round(p.sampled_match_ratio * 100) : 0; const matchColor = matchPct >= 99.9 ? "#3fb950" : matchPct >= 95 ? "#d29922" : "#f85149"; const sampleRow = ` ${t("speculative.sampled") || "Token-id sample match"}: ${matchPct}% (${p.sampled_match_count.toLocaleString()} / ${p.sampled_total.toLocaleString()} tokens) ${p.first_mismatch ? `
    ${t("speculative.first_mismatch") || "First mismatch"}: ${escapeHtml(p.first_mismatch.token).slice(0, 40)} → target id ${p.first_mismatch.target_id ?? "—"}, draft id ${p.first_mismatch.draft_id ?? "—"}` : ""} `; // Special / added token diffs const specDiffRows = (p.special_tokens_diff || []).map(d => `
  • ${d.name}: target=${escapeHtml(String(d.target ?? "—"))}, draft=${escapeHtml(String(d.draft ?? "—"))}
  • ` ).join(""); const specDiffBlock = specDiffRows ? `
    ${t("speculative.special_diff") || "Special-token differences"} (${p.special_tokens_diff.length})
      ${specDiffRows}
    ` : ""; const addedDiffPreview = (p.added_tokens_diff || []).slice(0, 12).map(d => `
  • ${d.side === "target_only" ? "target only" : "draft only"}: ${escapeHtml(d.token).slice(0, 40)}
  • ` ).join(""); const addedDiffBlock = addedDiffPreview ? `
    ${t("speculative.added_diff") || "Added-token differences"} (${(p.added_tokens_diff||[]).length})
      ${addedDiffPreview}${p.added_tokens_diff.length > 12 ? `
    • ${t("speculative.added_diff_more") || "+ more …"}
    • ` : ""}
    ` : ""; // Section 2 — speedup band (only when compatible-ish) let speedupBlock = ""; if (p.speedup_expected != null) { const ratio = p.param_ratio ? `${(p.param_ratio * 100).toFixed(1)}%` : "—"; speedupBlock = `
    ${t("speculative.speedup.title") || "Estimated speedup band"}
    ${tFmt("speculative.speedup.params", { target: fmtParams(p.target_params), draft: fmtParams(p.draft_params), ratio }) || `target ${fmtParams(p.target_params)} / draft ${fmtParams(p.draft_params)} (param ratio ${ratio})`}
    ${t("speculative.speedup.low") || "Low (α=0.50)"}:
    ${p.speedup_low}×
    ${t("speculative.speedup.expected") || "Expected (α=0.70)"}:
    ${p.speedup_expected}×
    ${t("speculative.speedup.high") || "High (α=0.85)"}:
    ${p.speedup_high}×

    ${t("speculative.speedup.disclaimer") || "α = draft acceptance rate. Real speedup depends on prompt domain, lookahead K, and engine overhead. Bands assume ideal verifier batching."}

    `; } else if (p.target_params && p.draft_params && p.param_ratio >= 1) { speedupBlock = `

    ${t("speculative.speedup.draft_not_smaller") || "Draft is not smaller than target — spec-dec is misuse here."}

    `; } // Attribution const attribution = `

    ${t("speculative.attribution") || "Refs:"} vLLM spec-dec docs · SGLang · transformers assistant_model · Leviathan et al. 2022

    `; return `

    ${verdictBadge}

    ${mirrorBanner}

    ${typeRow}

    ${sizeRow}

    ${sampleRow}

    ${specDiffBlock} ${addedDiffBlock} ${speedupBlock} ${attribution}
    `; } async function runSpecCheck() { const targetId = $("spec-target-id")?.value?.trim() || ""; const draftId = $("spec-draft-id")?.value?.trim() || ""; $("spec-status").textContent = t("speculative.status.fetching") || "🔄 Fetching tokenizer.json from HF Hub for both models…"; $("spec-output").innerHTML = ""; try { const result = await specCheckCompat(targetId, draftId); $("spec-output").innerHTML = renderSpecResult(result); $("spec-status").textContent = tFmt("speculative.status.done", { verdict: t(`speculative.verdict.${result.code}`) || result.code, }); } catch (e) { $("spec-status").textContent = (t("speculative.status.error") || "❌ Error") + " " + (e.message || e); } } $("spec-check-btn")?.addEventListener("click", runSpecCheck); // Examples mix gated + open: gated ids (Llama) trigger the open-mirror // fallback (unsloth/...) so the user sees both the demo result AND the // mirror-resolution mechanism. Pure open-weight pairs (Qwen + Phi) // stay as the "no fallback needed" path for the second example. $("spec-example-good-btn")?.addEventListener("click", () => { // Gated → triggers unsloth mirror fallback for both sides. $("spec-target-id").value = "meta-llama/Llama-3.1-70B-Instruct"; $("spec-draft-id").value = "meta-llama/Llama-3.1-8B-Instruct"; runSpecCheck(); }); $("spec-example-bad-btn")?.addEventListener("click", () => { // Open-weight cross-family → no fallback, plain incompatibility demo. $("spec-target-id").value = "Qwen/Qwen2.5-7B-Instruct"; $("spec-draft-id").value = "microsoft/Phi-3.5-mini-instruct"; runSpecCheck(); }); // (HF autocomplete on spec-target-id / spec-draft-id is registered via // the known-id list in hf_autocomplete.js; no extra wiring needed here.) // ════════════════════════════════════════════════════════════════════ // 🌍 Multilingual Tokenizer Tax (v0.8.7 anti-bullshit pack #13) // ════════════════════════════════════════════════════════════════════ let __taxInited = false; function initTax() { if (__taxInited) return; __taxInited = true; // No async preload — transformers.js + tokenizer.json are lazy-loaded // on the first Tokenize click so users don't pay download cost just // for opening the tab. Status string explains the wait. } function fmtBlocks(blocks) { // Build a compact "60% latin · 35% cjk · 5% other" string from the // detector output. Drops zero-counts and orders by descending size. if (!blocks || !blocks.blocks || !blocks.total_chars) return ""; const total = blocks.total_chars; const entries = Object.entries(blocks.blocks) .filter(([, n]) => n > 0) .sort((a, b) => b[1] - a[1]); if (entries.length === 0) return ""; const parts = entries.map(([name, n]) => { const pct = Math.round((n / total) * 100); return `${pct}% ${name}`; }); return parts.join(" · "); } function renderTaxResult(res, presetMeta) { if (res.code === "empty_input") { return `

    ${t("tax.hint.empty") || "Paste some text and click Tokenize."}

    `; } if (res.code === "all_failed") { const errLines = res.results.map(r => { const meta = presetMeta.find(p => p.id === r.modelId); return `
  • ${escapeHtml(r.modelId)} ${meta ? `(${escapeHtml(meta.label)})` : ""}: ${t(`tax.error.${r.error}`) || r.error}
  • `; }).join(""); return `

    ❌ ${t("tax.all_failed") || "All tokenizers failed to load."}

      ${errLines}
    `; } const baselineCount = res.baseline_count; const blocks = detectLanguageBlocks($("tax-input").value); const ratioColor = (r) => { if (r == null) return "#8b949e"; if (r >= 1.5) return "#f85149"; // big tax — red if (r >= 1.15) return "#f0883e"; // moderate if (r >= 0.85) return "#3fb950"; // about same return "#58a6ff"; // BETTER than baseline (rare) }; const fmtRatio = (r) => r == null ? "—" : `${r.toFixed(2)}×`; const rows = res.results.map(r => { const meta = presetMeta.find(p => p.id === r.modelId) || { label: r.modelId, family: "" }; if (!r.ok) { return ` ${escapeHtml(meta.label)}
    ${escapeHtml(meta.family)} ${t(`tax.error.${r.error}`) || r.error} `; } const isBaseline = r.modelId === res.baseline_id; const baselineMark = isBaseline ? ` (baseline)` : ""; return ` ${escapeHtml(meta.label)}${baselineMark}
    ${escapeHtml(meta.family)} ${r.token_count.toLocaleString()} ${r.chars_per_token != null ? r.chars_per_token.toFixed(2) : "—"} ${fmtRatio(r.ratio_vs_baseline)} `; }).join(""); // Worst-tax explanation — find the tokenizer that scored ≥1.5× baseline. const worst = res.results .filter(r => r.ok && r.ratio_vs_baseline != null) .sort((a, b) => b.ratio_vs_baseline - a.ratio_vs_baseline)[0]; let interpretation = ""; if (worst && worst.ratio_vs_baseline >= 1.3) { const meta = presetMeta.find(p => p.id === worst.modelId); const pct = Math.round((worst.ratio_vs_baseline - 1) * 100); interpretation = `

    ${tFmt("tax.interp.worst", { label: meta?.label || worst.modelId, pct, }) || `${meta?.label || worst.modelId} costs ${pct}% more tokens than baseline for this text.`}

    `; } else if (worst && worst.ratio_vs_baseline <= 1.05) { interpretation = `

    ${t("tax.interp.uniform") || "✓ All tokenizers within ±5% — text is well-handled across vendors."}

    `; } return `

    ${tFmt("tax.summary.input", { chars: res.chars.toLocaleString(), bytes: res.bytes.toLocaleString() }) || `Input: ${res.chars.toLocaleString()} chars, ${res.bytes.toLocaleString()} bytes`} ${blocks.dominant ? ` · ${t("tax.script_breakdown") || "scripts"}: ${fmtBlocks(blocks)}` : ""}

    ${interpretation} ${rows}
    ${t("tax.col.tokenizer") || "Tokenizer"} ${t("tax.col.tokens") || "Tokens"} ${t("tax.col.cpt") || "Chars/tok"} ${t("tax.col.ratio") || "Ratio"}

    ${t("tax.attribution") || "Tokenizers via"} @huggingface/transformers (browser BPE runtime). ${t("tax.attribution.privacy") || "Text is tokenized locally — never leaves the browser."}

    `; } async function runTaxTokenize() { const text = $("tax-input")?.value || ""; if (!text) { $("tax-status").textContent = t("tax.hint.empty") || "⚠ Paste some text first."; return; } $("tax-status").textContent = t("tax.status.loading") || "⏳ Loading transformers.js + tokenizers (first run can take 5-15s)…"; $("tax-output").innerHTML = ""; const ids = TAX_PRESETS.map(p => p.id); try { const t0 = Date.now(); const res = await tokenizeAll(ids, text); const ms = Date.now() - t0; $("tax-output").innerHTML = renderTaxResult(res, TAX_PRESETS); const okN = res.results.filter(r => r.ok).length; $("tax-status").textContent = tFmt("tax.status.done", { n: okN, total: ids.length, ms, }) || `✅ ${okN}/${ids.length} tokenizers ran in ${ms}ms`; } catch (e) { $("tax-status").textContent = `❌ ${e.message || e}`; } } $("tax-tokenize-btn")?.addEventListener("click", runTaxTokenize); $("tax-sample-en-btn")?.addEventListener("click", () => { $("tax-input").value = TAX_SAMPLES.english; runTaxTokenize(); }); $("tax-sample-zh-btn")?.addEventListener("click", () => { $("tax-input").value = TAX_SAMPLES.chinese; runTaxTokenize(); }); $("tax-sample-ar-btn")?.addEventListener("click", () => { $("tax-input").value = TAX_SAMPLES.arabic; runTaxTokenize(); }); $("tax-sample-mixed-btn")?.addEventListener("click", () => { $("tax-input").value = TAX_SAMPLES.mixed; runTaxTokenize(); }); $("tax-sample-code-btn")?.addEventListener("click", () => { $("tax-input").value = TAX_SAMPLES.code; runTaxTokenize(); }); // ════════════════════════════════════════════════════════════════════ // LongScore mode (v0.8.8 anti-bullshit pack #14) // ════════════════════════════════════════════════════════════════════ let __longscoreInited = false; function initLongscore() { if (__longscoreInited) return; __longscoreInited = true; // Eager-load KB so the first lookup is instant (KB is ~70KB, no real cost) loadLongscoreKB().catch(e => { console.warn("longscore_kb preload failed", e); }); } function lsFmtPct(x, sign) { if (x == null) return "—"; const v = (x * 100); return `${sign && v >= 0 ? "+" : ""}${v.toFixed(1)}%`; } function lcColor(avg) { if (avg == null) return "#8b949e"; if (avg >= -0.02) return "#3fb950"; // green: no degradation if (avg >= -0.10) return "#a5d36a"; // light green if (avg >= -0.20) return "#f0883e"; // orange if (avg >= -0.30) return "#f85149"; // red return "#a01b1b"; // dark red: extreme } function renderLongscoreResult(res) { if (res.code === "miss") { return `

    ${t("longscore.miss.title") || "Model not found in KB"}

    ${tFmt("longscore.miss.body", { id: res.normalized_id, n: res.n_kb_total }) || `Looked up ${res.normalized_id}. KB has ${res.n_kb_total} models. Try a canonical HF id (e.g. Qwen2.5-72B-Instruct, Llama-3.1-70B-Instruct, Jamba-1.5-Mini).`}

    ${t("longscore.miss.suggest") || "Check coverage at"} RULER · HELMET.

    `; } const verdictMap = { no_degradation: { color: "#3fb950", label: t("longscore.verdict.no_degradation") || "✅ No degradation past short context" }, mild: { color: "#a5d36a", label: t("longscore.verdict.mild") || "🟢 Mild degradation (<10%)" }, moderate: { color: "#f0883e", label: t("longscore.verdict.moderate") || "🟠 Moderate degradation (10-20%)" }, severe: { color: "#f85149", label: t("longscore.verdict.severe") || "🔴 Severe degradation (20-30%)" }, extreme: { color: "#a01b1b", label: t("longscore.verdict.extreme") || "🚨 Extreme degradation (>30%)" }, }; let html = `
    `; html += `

    ${escapeHtml(res.display_name)}`; if (res.params_b) html += ` · ${res.params_b}B params`; if (res.recipe_class) html += ` · ${escapeHtml(res.recipe_class)}`; if (res.native_context_k) html += ` · native ctx ${res.native_context_k}K`; html += `

    `; // RULER per-length + LongScore if (res.ruler_long_score) { const ls = res.ruler_long_score; const v = verdictMap[res.verdict] || { color: "#8b949e", label: res.verdict }; html += `

    ${t("longscore.score_label") || "LongScore"}: ${lsFmtPct(ls.avg_lc, true)} · Base = ${ls.base.toFixed(1)}% (mean of 4K, 8K)

    `; html += `

    ${v.label}

    `; // Per-length bars html += ``; const ctxKeys = ["4k", "8k", "16k", "32k", "64k", "128k"]; for (const k of ctxKeys) { const score = res.ruler_per_ctx?.[k]; if (score == null) continue; const isShort = k === "4k" || k === "8k"; const lc = ls.per_length_lc?.[k]; html += ``; } html += `
    ${t("longscore.col.ctx") || "Context"} ${t("longscore.col.score") || "Score"} ${t("longscore.col.lc") || "LC"}
    ${k.toUpperCase()}${isShort ? ` (base)` : ""} ${score.toFixed(1)}% ${lc != null ? lsFmtPct(lc, true) : "—"}
    `; } else { // Helmet-only or partial html += `

    ${t("longscore.no_ruler") || "⚠ No per-length data — LongScore not computable. Showing HELMET aggregate at 128K instead."}

    `; } // HELMET breakdown if available if (res.helmet) { html += `
    ${t("longscore.helmet_label") || "HELMET 7-task breakdown"} (at 128K) `; if (res.helmet.overall != null) { html += ``; } if (res.helmet.categories) { for (const [task, score] of Object.entries(res.helmet.categories)) { html += ``; } } html += `
    ${t("longscore.col.task") || "Task"} ${t("longscore.col.score") || "Score"}
    Overall${res.helmet.overall.toFixed(1)}
    ${escapeHtml(task)}${score != null ? score.toFixed(1) : "—"}
    `; } html += `

    ${t("longscore.source_note") || "Data source"}: ${escapeHtml(res.source)} · LongScore metric

    `; html += `
    `; return html; } async function runLongscoreLookup() { const id = $("longscore-input")?.value?.trim(); if (!id) { $("longscore-status").textContent = t("longscore.hint.empty") || "⚠ Paste a model id first."; return; } $("longscore-status").textContent = t("longscore.status.lookup") || "⏳ Looking up…"; $("longscore-output").innerHTML = ""; try { const res = await longscoreLookup(id); $("longscore-output").innerHTML = renderLongscoreResult(res); if (res.code === "miss") { $("longscore-status").textContent = t("longscore.status.miss") || "ℹ Model not in KB"; } else if (res.code === "ruler_hit") { $("longscore-status").textContent = t("longscore.status.ruler_hit") || "✅ RULER per-length data found"; } else { $("longscore-status").textContent = t("longscore.status.helmet_only") || "ℹ HELMET aggregate only (no per-length data)"; } } catch (e) { $("longscore-status").textContent = `❌ ${e.message || e}`; console.error(e); } } $("longscore-lookup-btn")?.addEventListener("click", runLongscoreLookup); $("longscore-input")?.addEventListener("keydown", e => { if (e.key === "Enter") { e.preventDefault(); runLongscoreLookup(); } }); $("longscore-example-good-btn")?.addEventListener("click", () => { $("longscore-input").value = "Jamba-1.5-Large"; runLongscoreLookup(); }); $("longscore-example-mid-btn")?.addEventListener("click", () => { $("longscore-input").value = "Llama-3.1-70B-Instruct"; runLongscoreLookup(); }); $("longscore-example-bad-btn")?.addEventListener("click", () => { $("longscore-input").value = "dbrx"; runLongscoreLookup(); }); // ════════════════════════════════════════════════════════════════════ // Bootstrap // ════════════════════════════════════════════════════════════════════ initI18n(); loadPyodideAndTaf().catch(err => { setStatus(`❌ Failed to initialise: ${err.message || err}`); console.error(err); });