taf-agent / js /main.js
karlexmarin's picture
fix(v0.8.8): rename fmtPct → lsFmtPct in LongScore (collided with existing top-level fmtPct)
b942b06
// TAF Agent — main orchestration (v0.2 — i18n + Profile + Compare)
//
// Phases:
// 1. Pyodide loads + TAF formulas → deterministic computation
// 2. WebLLM loads on demand → plain-English synthesis
// 3. Router (LLM) → free-form question → recipe + params
// 4. Modes: Profile (all recipes) + Compare (multi-model side-by-side)
// 5. i18n: EN/ES/FR/ZH
import { initI18n, setLang, t } from "./i18n.js";
import { initPhaseDiagram } from "./phase_diagram.js";
import { gammaCheckAll, REGIME_META } from "./gamma_check.js";
import { loadLeanManifest, badgeHtml, badgesForUiBinding, renderTheoremTable, getManifest } from "./lean_badges.js";
import { unmaskConfig } from "./swa_unmasker.js";
import { sniffChatTemplate } from "./chat_template_sniffer.js";
import { parseVotesCSV, computeArenaCI, SAMPLE_VOTES_CSV } from "./arena_ci.js";
import { rateAllBenchmarks, BENCHMARK_DB } from "./contamination_prior.js";
import { predictQuantShift, predictAllSchemes, QUANT_SCHEMES } from "./quant_regime.js";
import { attachAllHfAutocompletes } from "./hf_autocomplete.js";
import { computeDriftBound, FRAMEWORKS as DRIFT_FRAMEWORKS, DTYPES as DRIFT_DTYPES } from "./cross_drift.js";
import { predictNIAHReasoning, sweepContextLengths, loadRulerKB, calibrateNIAH, listRulerModels } from "./niah_reasoning.js";
import {
loadSaturationKB, classifyAll, classifyBenchmark,
listBenchmarks, attribution as saturationAttribution, tryFetchLive,
} from "./saturation_detector.js";
import {
loadHub, listCategories, listEntries, searchEntries,
hubStats, getCategoryMeta,
} from "./solutions_hub.js";
import { lintJsonCot, reorderJsonText, classifyFieldName } from "./json_cot_linter.js";
import { lintPeftCode, ARCH_TARGET_MODULES } from "./peft_anti_pattern.js";
import { diffPromptCache, PROVIDERS as CACHE_PROVIDERS } from "./prompt_cache_diff.js";
import { checkCompatibility as specCheckCompat, parseParamHint } from "./spec_decode_compat.js";
import {
tokenizeAll, detectLanguageBlocks,
PRESET_TOKENIZERS as TAX_PRESETS, SAMPLE_TEXTS as TAX_SAMPLES,
} from "./tokenizer_tax.js";
import {
loadKB as loadLongscoreKB, lookup as longscoreLookup, rank as longscoreRank,
} from "./longscore.js";
// Attach HF Hub search-as-you-type to all 5 model id inputs (Profile, Recipe,
// Unmask, Template, Quant). Hits public huggingface.co/api/models. Idempotent.
attachAllHfAutocompletes();
const TAF_BROWSER_URL = "python/taf_browser.py";
const ENABLE_WEBLLM = true;
// Smaller model = fits in default browser quota (~350MB vs 700MB for Llama-1B)
const WEBLLM_MODEL = "Qwen2.5-0.5B-Instruct-q4f16_1-MLC";
const WEBLLM_FALLBACK = "SmolLM2-360M-Instruct-q4f16_1-MLC";
const $ = (id) => document.getElementById(id);
const state = {
pyodide: null,
webllm: null,
presets: [],
recipes: [],
recipesById: {},
currentMode: "ask",
currentRecipe: null,
};
const EXAMPLES = [
"Will Meta-Llama-3-8B handle 32000-token NIAH retrieval reliably?",
"I have $5000 to spend on training. What model can I afford?",
"Should I use Mistral-7B-v0.1 at 16K context or extend it first?",
"Compare cheapest GPU to serve Llama-3-8B at 10 million tokens per day.",
"Should I use soft KV decay or hard cutoff for Qwen2.5-7B at 32K?",
"Is it cheaper to train an 8B custom model or use GPT-4o for 50M tokens/month?",
];
// ════════════════════════════════════════════════════════════════════
// Bootstrap
// ════════════════════════════════════════════════════════════════════
function showLoadingBar(show, progress=null) {
const wrap = $("loading-bar-wrap");
const bar = $("loading-bar");
if (!wrap || !bar) return;
if (!show) { wrap.style.display = "none"; return; }
wrap.style.display = "block";
if (progress === null) {
bar.classList.add("indeterminate");
bar.style.width = "100%";
} else {
bar.classList.remove("indeterminate");
bar.style.width = `${Math.min(100, Math.max(0, progress * 100))}%`;
}
}
async function loadPyodideAndTaf() {
showLoadingBar(true, null);
setStatus(t("status.loading_pyodide"));
state.pyodide = await loadPyodide({
indexURL: "https://cdn.jsdelivr.net/pyodide/v0.26.4/full/",
});
showLoadingBar(true, 0.5);
setStatus(t("status.loading_taf"));
const tafCode = await fetch(TAF_BROWSER_URL).then(r => r.text());
await state.pyodide.runPythonAsync(tafCode);
state.presets = JSON.parse(state.pyodide.runPython("list_presets()"));
state.recipes = JSON.parse(state.pyodide.runPython("list_recipes()"));
state.recipesById = Object.fromEntries(state.recipes.map(r => [r.id, r]));
showLoadingBar(true, 0.95);
populatePresets();
populateRecipes();
enableUI();
showLoadingBar(false);
setStatus(t("status.ready"));
}
function populatePresets() {
// Recipe form preset
["preset", "profile-preset"].forEach(id => {
const sel = $(id);
if (!sel) return;
sel.innerHTML = '<option value="">— select to autofill —</option>';
state.presets.forEach(p => {
const opt = document.createElement("option");
opt.value = p.id;
opt.textContent = `${p.label} (θ=${p.theta.toLocaleString()}, T_train=${p.T_train})`;
sel.appendChild(opt);
});
});
// Compare slot presets
document.querySelectorAll(".compare-preset").forEach(sel => {
sel.innerHTML = '<option value="">— or preset —</option>';
state.presets.forEach(p => {
const opt = document.createElement("option");
opt.value = p.id;
opt.textContent = p.label;
sel.appendChild(opt);
});
});
}
function populateRecipes() {
["recipe-select", "compare-recipe"].forEach(id => {
const sel = $(id);
if (!sel) return;
sel.innerHTML = '<option value="">— select a recipe —</option>';
state.recipes.forEach(r => {
const opt = document.createElement("option");
opt.value = r.id;
opt.textContent = `${r.id}${r.name}`;
sel.appendChild(opt);
});
});
}
function enableUI() {
$("ask-btn").disabled = false;
$("recipe-select").disabled = false;
$("preset").disabled = false;
$("profile-preset").disabled = false;
$("profile-btn").disabled = false;
$("compare-recipe").disabled = false;
$("compare-btn").disabled = false;
$("inspector-btn").disabled = false;
// Render community feed + falsification (independent of Pyodide)
renderFalsificationDashboard();
loadCommunityFeed();
// Restore from URL if present
parseUrlState();
}
function setStatus(msg) { $("status").textContent = msg; }
// ════════════════════════════════════════════════════════════════════
// Main-panel wrap: every <main> section gets a foldable details/summary
// shell at runtime so users can collapse any panel they don't need open.
// h2 is moved INTO summary so its data-i18n binding survives. Idempotent.
// ════════════════════════════════════════════════════════════════════
function wrapMainSectionsAsFoldable() {
document.querySelectorAll("main > section").forEach(section => {
if (section.id === "status-bar") return; // skip loading bar
if (section.querySelector(":scope > details.main-panel")) return; // already wrapped
const h2 = section.querySelector(":scope > h2");
if (!h2) return;
const details = document.createElement("details");
details.className = "main-panel";
details.open = true;
const summary = document.createElement("summary");
summary.className = "main-panel-title";
summary.appendChild(h2); // preserve h2 + its data-i18n + all children
details.appendChild(summary);
while (section.firstChild) details.appendChild(section.firstChild);
section.appendChild(details);
});
// Stop ⓘ tooltip clicks inside summaries from toggling the panel.
document.querySelectorAll(".main-panel > .main-panel-title .info").forEach(el => {
el.addEventListener("click", (e) => e.stopPropagation());
});
}
wrapMainSectionsAsFoldable();
// v0.7.7 — task-tiles is the primary entry point; collapse the legacy 14-tab
// strip by default so users don't see duplicated navigation. Power users can
// still expand it with one click.
const __modeDetails = document.querySelector("#mode-section > details.main-panel");
if (__modeDetails) __modeDetails.open = false;
// ════════════════════════════════════════════════════════════════════
// Mode toggle
// ════════════════════════════════════════════════════════════════════
// v0.7.7 — task tiles: clicking a tile-mode-link button triggers the equivalent mode-btn.
// Reuses the mode switcher entirely (no duplicate state). Smoothly scrolls to the
// activated section so the user immediately sees the form they expected.
document.addEventListener("click", (e) => {
const linkBtn = e.target.closest("[data-mode-link]");
if (!linkBtn) return;
const targetMode = linkBtn.dataset.modeLink;
const targetTab = document.querySelector(`.mode-btn[data-mode="${targetMode}"]`);
if (targetTab) {
targetTab.click();
// Scroll the activated section into view so the tile click feels responsive.
const sectionId = {
ask: "ask-section", recipe: "recipe-section", profile: "profile-section",
compare: "compare-section", inspector: "inspector-section",
diagnose: "diagnose-section", phase: "phase-section", unmask: "unmask-section",
template: "template-section", arena: "arena-section", contam: "contam-section",
quant: "quant-section", drift: "drift-section", niah: "niah-section",
saturation: "saturation-section",
cot: "cot-section",
peft: "peft-section",
cache: "cache-section",
speculative: "speculative-section",
tax: "tax-section",
longscore: "longscore-section",
hub: "hub-section",
}[targetMode];
if (sectionId) {
const sec = document.getElementById(sectionId);
if (sec) sec.scrollIntoView({ behavior: "smooth", block: "start" });
}
}
});
document.querySelectorAll(".mode-btn").forEach(btn => {
btn.addEventListener("click", () => {
document.querySelectorAll(".mode-btn").forEach(b => {
b.classList.remove("active");
b.setAttribute("aria-selected", "false");
});
btn.classList.add("active");
btn.setAttribute("aria-selected", "true");
const mode = btn.dataset.mode;
state.currentMode = mode;
// Hide all mode sections
["ask-section", "recipe-section", "form-section",
"profile-section", "compare-section", "inspector-section",
"diagnose-section", "phase-section", "unmask-section",
"template-section", "arena-section", "contam-section",
"quant-section", "drift-section", "niah-section",
"saturation-section", "cot-section", "peft-section", "cache-section", "speculative-section", "tax-section", "longscore-section", "hub-section"].forEach(id => {
const el = $(id);
if (el) el.style.display = "none";
});
// Show selected
const sectionMap = {
ask: "ask-section", recipe: "recipe-section", profile: "profile-section",
compare: "compare-section", inspector: "inspector-section",
diagnose: "diagnose-section", phase: "phase-section", unmask: "unmask-section",
template: "template-section", arena: "arena-section", contam: "contam-section",
quant: "quant-section", drift: "drift-section", niah: "niah-section",
saturation: "saturation-section",
cot: "cot-section",
peft: "peft-section",
cache: "cache-section",
speculative: "speculative-section",
tax: "tax-section",
longscore: "longscore-section",
hub: "hub-section",
};
const sectionId = sectionMap[mode];
if (sectionId) $(sectionId).style.display = "";
$("mode-desc").textContent = t(`mode_desc.${mode}`) || "";
if (mode === "phase") initPhaseDiagram();
if (mode === "saturation") initSaturation();
if (mode === "cot") initCot();
if (mode === "peft") initPeft();
if (mode === "cache") initCacheDiff();
if (mode === "speculative") initSpeculative();
if (mode === "tax") initTax();
if (mode === "longscore") initLongscore();
if (mode === "hub") initHub();
});
});
// ════════════════════════════════════════════════════════════════════
// Diagnose mode: build the diagnose_model.py CLI command
// ════════════════════════════════════════════════════════════════════
function buildDiagnoseCommand() {
const model = ($("diag-model")?.value || "").trim();
if (!model) {
return "# Please enter a HuggingFace model id";
}
const theta = ($("diag-theta")?.value || "").trim();
const N = ($("diag-N")?.value || "2000").trim();
const local = ($("diag-local")?.value || "").trim();
const fast = $("diag-fast")?.checked;
const cpu = $("diag-cpu")?.checked;
const fourbit = $("diag-4bit")?.checked;
const parts = ["python cli/diagnose_model.py"];
parts.push(`--model ${model}`);
if (theta) parts.push(`--theta ${theta}`);
if (N && N !== "2000") parts.push(`--N ${N}`);
if (local) parts.push(`--local "${local}"`);
if (fast) parts.push("--fast");
if (cpu) parts.push("--cpu");
if (fourbit) parts.push("--load_in_4bit");
return parts.join(" \\\n ");
}
const _diagBuildBtn = $("diag-build-btn");
if (_diagBuildBtn) {
_diagBuildBtn.addEventListener("click", () => {
const cmd = buildDiagnoseCommand();
$("diag-cmd").textContent = cmd;
$("diag-output").style.display = "";
});
}
const _diagCopyBtn = $("diag-copy-btn");
if (_diagCopyBtn) {
_diagCopyBtn.addEventListener("click", async () => {
const cmd = $("diag-cmd").textContent;
if (!cmd) return;
try {
await navigator.clipboard.writeText(cmd);
_diagCopyBtn.textContent = "✓ Copied";
setTimeout(() => {
_diagCopyBtn.textContent = (window.t ? window.t("diagnose.copy_btn") : "📋 Copy to clipboard");
}, 1800);
} catch (e) {
_diagCopyBtn.textContent = "✗ Copy failed (browser blocks)";
}
});
}
// Make sure inspector section is hidden initially
const _inspectorSection = $("inspector-section");
if (_inspectorSection) _inspectorSection.style.display = "none";
// ════════════════════════════════════════════════════════════════════
// Recipe selector
// ════════════════════════════════════════════════════════════════════
$("recipe-select").addEventListener("change", (e) => {
const rid = e.target.value;
if (!rid) {
$("form-section").style.display = "none";
return;
}
const r = state.recipesById[rid];
state.currentRecipe = r;
$("recipe-desc-display").textContent = r.description;
$("form-section").style.display = "";
buildDynamicForm(r);
});
function buildDynamicForm(recipe) {
const container = $("dynamic-form");
container.innerHTML = "";
const defaults = getRecipeDefaults(recipe.id);
recipe.params.forEach(name => {
const div = document.createElement("div");
div.className = "form-field";
const labelWrap = document.createElement("label");
labelWrap.htmlFor = `param_${name}`;
labelWrap.innerHTML = paramLabel(name);
if (PARAM_TOOLTIPS[name]) {
const info = document.createElement("span");
info.className = "info";
info.innerHTML = `<span class="tooltip">${PARAM_TOOLTIPS[name]}</span>`;
labelWrap.appendChild(info);
}
div.appendChild(labelWrap);
const input = document.createElement("input");
input.type = "text";
input.id = `param_${name}`;
input.dataset.param = name;
input.value = defaults[name] !== undefined ? String(defaults[name]) : "";
div.appendChild(input);
container.appendChild(div);
});
$("run-btn").disabled = false;
}
function paramLabel(name) {
const labels = {
theta: "θ (rope_theta)", T_train: "T_train", T_eval: "T_eval (target context)",
n_attention_heads: "num_attention_heads", n_kv_heads: "num_key_value_heads",
d_head: "head_dim", n_layers: "num_hidden_layers", n_params: "n_params (e.g. 8e9)",
has_SWA: "Has SWA? (true/false)",
N_params: "N_params (e.g. 8e9)", D_tokens: "D_tokens (or empty for Chinchilla)",
gpu: "GPU", n_gpus: "n_gpus", mfu: "MFU (default 0.45)",
api_model: "API model to compare", monthly_tokens_M: "Monthly tokens (M)",
USD_budget: "USD budget", bytes_per_weight: "Bytes per weight (BF16=2)",
target_tokens_per_day: "Target tokens/day", concurrent_users: "Concurrent users",
};
return labels[name] || name;
}
const PARAM_TOOLTIPS = {
theta: "<strong>RoPE base frequency</strong>. From <code>config.rope_theta</code>. Higher = more long-range capacity. Typical: <code>10000</code> early models, <code>500000</code> Llama-3, <code>1000000</code> Qwen2.5.",
T_train: "<strong>Max context the model was trained on</strong>. From <code>max_position_embeddings</code>. The model has never seen positions beyond this; extrapolating much further usually fails.",
T_eval: "<strong>Your target inference context length</strong>. The key knob. The whole question is: will the model behave well at <em>this</em> length?",
n_attention_heads: "Number of query heads. From <code>num_attention_heads</code>.",
n_kv_heads: "Number of K/V heads. If &lt; n_attention_heads → model uses GQA (Grouped Query Attention). Smaller = more memory-efficient KV cache but pushes γ toward Hagedorn boundary.",
d_head: "Per-head dimension. Typically <code>hidden_size / n_attention_heads</code>. Common: 64, 80, 128.",
n_layers: "Number of transformer layers. From <code>num_hidden_layers</code>.",
n_params: "<strong>Total parameter count</strong>. Use scientific notation: <code>8e9</code> for 8B. Threshold ~400M is the induction-head emergence boundary (sign-flip in Δγ).",
has_SWA: "Sliding Window Attention. <code>true</code> for Mistral, gemma-2, phi-3. SWA lowers γ_decomposition by ~0.21.",
N_params: "Same as n_params. Total parameter count, scientific notation (e.g. <code>8e9</code>).",
D_tokens: "Number of training tokens. Leave empty to use Chinchilla 20:1 default (D = 20·N).",
gpu: "GPU model from the catalog. Options: H100 SXM, H100 PCIe, H200, B200, A100 80GB, A100 40GB, L40S, MI300X, RTX 4090, RTX 5090, RTX 5060Ti.",
n_gpus: "Number of GPUs in your training/serving cluster.",
mfu: "<strong>Model FLOPs Utilization</strong>. Realistic fraction of peak FLOPs achieved. Typical: 0.4-0.5 for well-tuned. Default 0.45.",
api_model: "Frontier API to compare against. Options: GPT-4o, GPT-4o-mini, Claude-Opus-4, Claude-Sonnet-4, Claude-Haiku-4, Gemini-1.5-Pro, DeepSeek-V3, Llama-3.3-70B (Together).",
monthly_tokens_M: "Expected monthly token volume <em>in millions</em>. e.g. <code>10</code> = 10 million tokens/month.",
USD_budget: "Your training budget in US dollars (no symbol). e.g. <code>5000</code> for $5K.",
bytes_per_weight: "Memory per parameter. BF16/FP16 = 2, INT8 = 1, INT4 = 0.5.",
target_tokens_per_day: "How many tokens/day you need to serve. e.g. <code>10000000</code> = 10M tokens/day.",
concurrent_users: "Simultaneous concurrent requests. Affects KV cache memory needed.",
};
function getRecipeDefaults(recipeId) {
const D = {
"X-1": { N_params: "8e9", D_tokens: "", gpu: "H100 SXM", n_gpus: 8, mfu: 0.45,
api_model: "GPT-4o", monthly_tokens_M: 10.0 },
"X-2": { theta: 500000, T_train: 8192, T_eval: 32000,
n_attention_heads: 32, n_kv_heads: 8, d_head: 128,
n_layers: 32, n_params: "8e9", has_SWA: false },
"X-3": { USD_budget: 5000, gpu: "H100 SXM", mfu: 0.45, n_gpus: 1 },
"X-5": { N_params: "8e9", T_eval: 4096, n_layers: 32, n_kv_heads: 8, d_head: 128,
bytes_per_weight: 2.0, target_tokens_per_day: 10000000, concurrent_users: 1 },
"X-19": { theta: 500000, T_train: 8192, T_eval: 8192,
n_attention_heads: 32, n_kv_heads: 8, d_head: 128,
n_layers: 32, n_params: "8e9", has_SWA: false },
};
return D[recipeId] || {};
}
// ════════════════════════════════════════════════════════════════════
// Preset autofill (works in recipe mode)
// ════════════════════════════════════════════════════════════════════
$("preset").addEventListener("change", (e) => {
if (!e.target.value) return;
const modelId = e.target.value;
state.lastModelId = modelId; // remember for filename/hash
// Mirror behavior with profile-preset: also fill HF id input if present.
if ($("hf-id")) {
$("hf-id").value = modelId;
if ($("hf-status")) $("hf-status").textContent = tFmt("profile.preset_loaded", { id: modelId });
}
const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(modelId)})`);
const preset = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy;
if (!preset || Object.keys(preset).length === 0) return;
fillRecipeForm(preset);
});
function fillRecipeForm(p) {
// Fill any matching field in dynamic form
Object.entries(p).forEach(([k, v]) => {
const map = {
theta: "theta", T_train: "T_train",
n_attention_heads: "n_attention_heads", n_kv_heads: "n_kv_heads",
d_head: "d_head", n_layers: "n_layers", n_params: "n_params",
has_SWA: "has_SWA",
};
const formId = "param_" + (map[k] || k);
const el = $(formId);
if (el) el.value = (typeof v === "number" && (k === "n_params" || v > 1e6))
? v.toExponential(2) : String(v);
// Also fill N_params for cost recipes
if (k === "n_params") {
const np = $("param_N_params");
if (np) np.value = (typeof v === "number" ? v.toExponential(2) : String(v));
}
});
}
// ════════════════════════════════════════════════════════════════════
// HF Hub fetch (any model)
// ════════════════════════════════════════════════════════════════════
// Build the same unsloth mirror candidates used in spec-decode. Lets us
// fetch config.json for gated families (Llama / Mistral / Gemma) without
// requiring HF auth — the unsloth redistributions are public and ship the
// original config.json verbatim (they only quantize weights, not metadata).
function _hfMirrorCandidates(modelId) {
const last = modelId.split("/").slice(-1)[0];
if (!last) return [];
const out = [
`unsloth/${last}`,
last.startsWith("Meta-") ? null : `unsloth/Meta-${last}`,
`unsloth/${last}-bnb-4bit`,
last.startsWith("Meta-") ? null : `unsloth/Meta-${last}-bnb-4bit`,
].filter(c => c && c !== modelId);
// Dedupe in case last starts with Meta- already.
return [...new Set(out)];
}
async function _tryConfigUrl(modelId) {
// /resolve/main/ rather than /raw/main/ — same lesson as spec-decode:
// /resolve follows LFS for large files (irrelevant for config.json which
// is always small, but consistent & future-proof). CORS is granted on both.
const url = `https://huggingface.co/${modelId}/resolve/main/config.json`;
const resp = await fetch(url);
if (!resp.ok) return { ok: false, status: resp.status };
try {
const j = await resp.json();
return { ok: true, data: j };
} catch (e) {
return { ok: false, error: "parse_failed" };
}
}
async function fetchHfConfig(modelId) {
// 1. Try the user-pasted id directly.
let r = await _tryConfigUrl(modelId);
if (r.ok) return r.data;
// 2. On 401/403, try open-mirror fallback (unsloth/...). On other
// errors (404/network/parse), surface as before — mirror won't help.
if (r.status === 401 || r.status === 403) {
for (const cand of _hfMirrorCandidates(modelId)) {
const m = await _tryConfigUrl(cand);
if (m.ok) {
// Stamp the mirror id so callers can surface a "fetched via mirror"
// hint if they want; backwards-compatible with code that ignores it.
m.data.__via_mirror = cand;
m.data.__mirror_of = modelId;
return m.data;
}
}
const err = new Error(`🔒 ${modelId} is gated — accept license at https://huggingface.co/${modelId}`);
err.code = "gated";
err.modelId = modelId;
throw err;
}
throw new Error(`HTTP ${r.status} — config.json not found at https://huggingface.co/${modelId}/resolve/main/config.json`);
}
$("hf-fetch-btn").addEventListener("click", async () => {
const modelId = $("hf-id").value.trim();
if (!modelId) {
$("hf-status").textContent = "⚠ Enter a model id like 'Qwen/Qwen2.5-32B-Instruct'";
return;
}
$("hf-status").textContent = `⏳ Fetching config.json from HF Hub for ${modelId}...`;
$("hf-fetch-btn").disabled = true;
state.lastModelId = modelId; // remember for filename/hash
try {
const cfg = await fetchHfConfig(modelId);
const preset = configToPreset(cfg, modelId);
fillRecipeForm(preset);
$("hf-status").innerHTML = `✅ Config loaded for <strong>${modelId}</strong> (family: ${preset._family}). Verify values, click Analyze.`;
} catch (err) {
$("hf-status").textContent = `❌ ${err.message}`;
} finally {
$("hf-fetch-btn").disabled = false;
}
});
// ════════════════════════════════════════════════════════════════════
// 🪟 Unmask mode (v0.7.0 anti-bullshit pack #1)
// ════════════════════════════════════════════════════════════════════
// Tiny string-template helper: t(key) with {placeholder} substitution.
// Falls back to the raw key when the i18n entry is missing so dev sees the gap.
function tFmt(key, params = {}) {
let s = t(key) || key;
for (const [k, v] of Object.entries(params)) {
const fmtVal = v === null || v === undefined ? "—"
: (typeof v === "number" ? v.toLocaleString() : String(v));
s = s.replace(new RegExp(`\\{${k}\\}`, "g"), fmtVal);
}
return s;
}
const VERDICT_COLOR = {
honest: "#3fb950",
inflated: "#f1c40f",
severely_inflated: "#f85149",
yarn_extended: "#f1c40f",
unknown: "#8b949e",
};
function renderUnmaskCard(result, modelId = "") {
const color = VERDICT_COLOR[result.verdict] || VERDICT_COLOR.unknown;
const ratioPct = (result.ratio * 100).toFixed(1);
const f = result.flags;
const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString();
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
const verdictLabel = t(`unmask.verdict.${result.verdict}`) || result.verdict;
const labelDeclared = t("unmask.label.declared") || "Declared context";
const labelEffective = t("unmask.label.effective") || "Effective (estimate)";
const labelRatio = t("unmask.label.ratio") || "Ratio";
const sectionFlags = t("unmask.section.flags") || "Architecture flags";
const sectionWarn = t("unmask.section.warnings")|| "Warnings";
const sectionReco = t("unmask.section.reco") || "Recommendation";
// Architecture flags row labels
const flagSwa = t("unmask.flag.swa") || "SWA";
const flagRope = t("unmask.flag.rope") || "RoPE scaling";
const flagGqa = t("unmask.flag.gqa") || "GQA";
const flagLayers = t("unmask.flag.layers") || "Layers";
const flagDhead = t("unmask.flag.dhead") || "d_head";
const flagTheta = t("unmask.flag.theta") || "RoPE θ";
const flagYes = t("unmask.flag.yes") || "yes";
const flagNo = t("unmask.flag.no") || "no";
const swaText = f.hasSWA
? `${flagYes} (window = ${fmtN(f.swaWindow)})`
: flagNo;
const ropeText = f.hasYaRN
? `${f.ropeScalingType} (factor = ${f.yarnFactor}, original = ${fmtN(f.yarnOriginal)})`
: flagNo;
const gqaText = f.hasGQA
? `${flagYes} (${f.n_kv_heads} kv / ${f.n_attn_heads} attn heads)`
: (t("unmask.flag.full_mha") || "no (full MHA, {n} heads)").replace("{n}", f.n_attn_heads ?? "?");
const warningsHtml = result.warnings.length
? `<details class="unmask-panel" open><summary class="unmask-panel-title">${sectionWarn}</summary><ul>${result.warnings.map(w =>
`<li>${tFmt("unmask.warn." + w.code, w.params)}</li>`).join("")}</ul></details>`
: "";
const recoHtml = result.recoCode
? `<details class="unmask-panel" open><summary class="unmask-panel-title">${sectionReco}</summary><p class="unmask-reco">${tFmt("unmask.reco." + result.recoCode, result.recoParams)}</p></details>`
: "";
return `
<div class="unmask-result">
<div class="unmask-hero" style="border-color: ${color};">
<div class="unmask-verdict" style="color: ${color};">${verdictLabel}</div>
${modelId ? `<div class="unmask-model"><code>${escapeHtml(modelId)}</code></div>` : ""}
<div class="unmask-numbers">
<div><span class="unmask-num-label">${labelDeclared}</span><span class="unmask-num-val">${fmtN(result.declaredContext)}</span></div>
<div><span class="unmask-num-label">${labelEffective}</span><span class="unmask-num-val">${fmtN(result.effectiveContext)}</span></div>
<div><span class="unmask-num-label">${labelRatio}</span><span class="unmask-num-val">${ratioPct}%</span></div>
</div>
</div>
<div class="unmask-details">
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${sectionFlags}</summary>
<ul>
<li><strong>${flagSwa}:</strong> ${swaText}</li>
<li><strong>${flagRope}:</strong> ${ropeText}</li>
<li><strong>${flagGqa}:</strong> ${gqaText}</li>
<li><strong>${flagLayers}:</strong> ${fmtN(f.n_layers)} · <strong>${flagDhead}:</strong> ${fmtN(f.d_head)} · <strong>${flagTheta}:</strong> ${fmtN(f.rope_theta)}</li>
</ul>
</details>
${warningsHtml}
${recoHtml}
</div>
</div>
`;
}
async function runUnmaskFromId() {
const modelId = ($("unmask-id").value || "").trim();
if (!modelId) {
$("unmask-status").textContent = t("unmask.status.empty_id") || "⚠ Enter a model id.";
return;
}
$("unmask-status").textContent = tFmt("unmask.status.fetching", { modelId });
$("unmask-fetch-btn").disabled = true;
try {
const cfg = await fetchHfConfig(modelId);
const result = unmaskConfig(cfg);
$("unmask-output").innerHTML = renderUnmaskCard(result, modelId);
const verdictLocalized = t(`unmask.verdict.${result.verdict}`) || result.verdict;
$("unmask-status").textContent = tFmt("unmask.status.success", { modelId, verdict: verdictLocalized });
} catch (err) {
if (err.code === "gated") {
$("unmask-status").innerHTML = `🔒 <strong>${err.modelId}</strong> ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} <a href="https://huggingface.co/${err.modelId}" target="_blank" rel="noopener">huggingface.co/${err.modelId}</a>`;
} else {
$("unmask-status").textContent = `❌ ${err.message}`;
}
$("unmask-output").innerHTML = "";
} finally {
$("unmask-fetch-btn").disabled = false;
}
}
function runUnmaskFromPaste() {
const raw = ($("unmask-paste").value || "").trim();
if (!raw) {
$("unmask-status").textContent = t("unmask.status.empty_paste") || "⚠ Paste a config.json first.";
return;
}
let cfg;
try {
cfg = JSON.parse(raw);
} catch (e) {
$("unmask-status").textContent = tFmt("unmask.status.invalid_json", { error: e.message });
return;
}
const result = unmaskConfig(cfg);
const pastedLabel = t("unmask.pasted_label") || "(pasted config)";
$("unmask-output").innerHTML = renderUnmaskCard(result, pastedLabel);
const verdictLocalized = t(`unmask.verdict.${result.verdict}`) || result.verdict;
$("unmask-status").textContent = tFmt("unmask.status.success_paste", { verdict: verdictLocalized });
}
$("unmask-fetch-btn")?.addEventListener("click", runUnmaskFromId);
$("unmask-paste-btn")?.addEventListener("click", runUnmaskFromPaste);
$("unmask-id")?.addEventListener("keydown", (e) => {
if (e.key === "Enter") { e.preventDefault(); runUnmaskFromId(); }
});
// ════════════════════════════════════════════════════════════════════
// 📜 Chat-template Sniffer (v0.7.1 anti-bullshit pack #2)
// ════════════════════════════════════════════════════════════════════
const TEMPLATE_VERDICT_COLOR = {
ok: "#3fb950",
custom: "#f1c40f",
missing: "#f85149",
base_model: "#8b949e",
unknown: "#8b949e",
};
async function fetchHfTokenizerConfig(modelId) {
const url = `https://huggingface.co/${modelId}/raw/main/tokenizer_config.json`;
const resp = await fetch(url);
if (!resp.ok) {
if (resp.status === 401 || resp.status === 403) {
const err = new Error(`🔒 ${modelId} is gated — accept license at https://huggingface.co/${modelId}`);
err.code = "gated";
err.modelId = modelId;
throw err;
}
throw new Error(`HTTP ${resp.status} — tokenizer_config.json not found at ${url}`);
}
return await resp.json();
}
function renderTemplateCard(result, modelId = "") {
const color = TEMPLATE_VERDICT_COLOR[result.verdict] || TEMPLATE_VERDICT_COLOR.unknown;
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
const verdictLabel = t(`template.verdict.${result.verdict}`) || result.verdict;
const labelFamily = t("template.label.family") || "Detected family";
const labelMarkers = t("template.label.markers") || "Matched markers";
const labelTplLen = t("template.label.tpl_len") || "Template length";
const sectionWarn = t("template.section.warnings") || "Warnings";
const sectionCmd = t("template.section.commands") || "Commands by framework";
const sectionRaw = t("template.section.raw") || "Raw template (preview)";
// Human-readable family name
const familyName = result.detectedLabel
? result.detectedLabel
: (result.detectedFamily === "custom" ? (t("template.family.custom") || "custom (unknown family)")
: (t("template.family.none") || "(no chat_template)"));
const warningsHtml = result.warnings.length
? `<details class="unmask-panel" open>
<summary class="unmask-panel-title">${sectionWarn}</summary>
<ul>${result.warnings.map(w => `<li>${tFmt("template.warn." + w.code, w.params)}</li>`).join("")}</ul>
</details>`
: "";
// Framework commands — only show when we have a chat_template to apply.
let cmdHtml = "";
if (result.hasChatTemplate) {
const lmEvalCmd = "lm_eval --model hf --model_args pretrained=" + (modelId || "MODEL_ID") +
" --tasks gsm8k --apply_chat_template --batch_size 8";
const vllmCmd = result.vllmTemplate
? `vllm serve ${modelId || "MODEL_ID"} --chat-template ${result.vllmTemplate}`
: `vllm serve ${modelId || "MODEL_ID"} # template auto-detected from tokenizer_config`;
const transformersCmd =
`from transformers import AutoTokenizer\n` +
`tok = AutoTokenizer.from_pretrained("${modelId || "MODEL_ID"}")\n` +
`prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)`;
cmdHtml = `
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${sectionCmd}</summary>
<div class="template-cmd-block">
<div class="template-cmd-label">lm-evaluation-harness</div>
<pre class="template-cmd"><code>${escapeHtml(lmEvalCmd)}</code></pre>
<div class="template-cmd-label">vLLM serve</div>
<pre class="template-cmd"><code>${escapeHtml(vllmCmd)}</code></pre>
<div class="template-cmd-label">transformers (Python)</div>
<pre class="template-cmd"><code>${escapeHtml(transformersCmd)}</code></pre>
</div>
</details>
`;
}
// Raw preview only when present
const rawHtml = result.rawTemplate
? `<details class="unmask-panel">
<summary class="unmask-panel-title">${sectionRaw}</summary>
<pre class="template-cmd"><code>${escapeHtml(result.rawTemplate)}</code></pre>
</details>`
: "";
return `
<div class="unmask-result">
<div class="unmask-hero" style="border-color: ${color};">
<div class="unmask-verdict" style="color: ${color};">${verdictLabel}</div>
${modelId ? `<div class="unmask-model"><code>${escapeHtml(modelId)}</code></div>` : ""}
<div class="unmask-numbers">
<div><span class="unmask-num-label">${labelFamily}</span><span class="unmask-num-val">${escapeHtml(familyName)}</span></div>
<div><span class="unmask-num-label">${labelMarkers}</span><span class="unmask-num-val">${result.matchedMarkers.length}</span></div>
<div><span class="unmask-num-label">${labelTplLen}</span><span class="unmask-num-val">${result.rawTemplateLength.toLocaleString()}</span></div>
</div>
</div>
<div class="unmask-details">
${warningsHtml}
${cmdHtml}
${rawHtml}
</div>
</div>
`;
}
async function runTemplateFromId() {
const modelId = ($("template-id").value || "").trim();
if (!modelId) {
$("template-status").textContent = t("template.status.empty_id") || "⚠ Enter a model id.";
return;
}
$("template-status").textContent = tFmt("template.status.fetching", { modelId });
$("template-fetch-btn").disabled = true;
try {
const cfg = await fetchHfTokenizerConfig(modelId);
const result = sniffChatTemplate(cfg);
$("template-output").innerHTML = renderTemplateCard(result, modelId);
const verdictLocalized = t(`template.verdict.${result.verdict}`) || result.verdict;
$("template-status").textContent = tFmt("template.status.success", { modelId, verdict: verdictLocalized });
} catch (err) {
if (err.code === "gated") {
$("template-status").innerHTML = `🔒 <strong>${err.modelId}</strong> ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} <a href="https://huggingface.co/${err.modelId}" target="_blank" rel="noopener">huggingface.co/${err.modelId}</a>`;
} else {
$("template-status").textContent = `❌ ${err.message}`;
}
$("template-output").innerHTML = "";
} finally {
$("template-fetch-btn").disabled = false;
}
}
function runTemplateFromPaste() {
const raw = ($("template-paste").value || "").trim();
if (!raw) {
$("template-status").textContent = t("template.status.empty_paste") || "⚠ Paste a tokenizer_config.json first.";
return;
}
let cfg;
try {
cfg = JSON.parse(raw);
} catch (e) {
$("template-status").textContent = tFmt("template.status.invalid_json", { error: e.message });
return;
}
const result = sniffChatTemplate(cfg);
const pastedLabel = t("template.pasted_label") || "(pasted config)";
$("template-output").innerHTML = renderTemplateCard(result, pastedLabel);
const verdictLocalized = t(`template.verdict.${result.verdict}`) || result.verdict;
$("template-status").textContent = tFmt("template.status.success_paste", { verdict: verdictLocalized });
}
$("template-fetch-btn")?.addEventListener("click", runTemplateFromId);
$("template-paste-btn")?.addEventListener("click", runTemplateFromPaste);
$("template-id")?.addEventListener("keydown", (e) => {
if (e.key === "Enter") { e.preventDefault(); runTemplateFromId(); }
});
// ════════════════════════════════════════════════════════════════════
// 🎯 Arena-Elo CI reconstructor (v0.7.2 anti-bullshit pack #3)
// ════════════════════════════════════════════════════════════════════
function renderArenaCard(result) {
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString();
const titleRanked = t("arena.section.ranked") || "Ranked Elos with 95% CIs";
const titleTies = t("arena.section.ties") || "Statistical ties (CI overlap)";
const titleSummary = t("arena.section.summary") || "Summary";
const colRank = t("arena.col.rank") || "#";
const colModel = t("arena.col.model") || "Model";
const colElo = t("arena.col.elo") || "Elo";
const colCi = t("arena.col.ci") || "95% CI";
const colSpread = t("arena.col.ci_width") || "CI width";
const colMatches = t("arena.col.matches") || "Matches";
const colWins = t("arena.col.wins") || "W / L / T";
const noTies = t("arena.no_ties") || "No statistical ties — all pairs distinguishable at 95% CI.";
// Ranked table
let tableRows = "";
for (const r of result.ratings) {
tableRows += `<tr>
<td class="arena-rank">#${r.rank}</td>
<td class="arena-model"><code>${escapeHtml(r.model)}</code></td>
<td class="arena-elo"><strong>${fmtN(r.elo)}</strong></td>
<td class="arena-ci">[${fmtN(r.ci_low)}, ${fmtN(r.ci_high)}]</td>
<td class="arena-spread">±${fmtN(Math.round(r.ci_width / 2 * 10) / 10)}</td>
<td class="arena-matches">${fmtN(r.matches)}</td>
<td class="arena-wlt">${fmtN(r.wins)} / ${fmtN(r.losses)} / ${fmtN(r.ties_count)}</td>
</tr>`;
}
// Ties section
let tiesHtml = "";
if (result.ties.length === 0) {
tiesHtml = `<p class="unmask-reco">${noTies}</p>`;
} else {
tiesHtml = `<table class="arena-ties-table">
<thead><tr>
<th>${t("arena.col.tie_pair") || "Pair"}</th>
<th>${t("arena.col.tie_diff") || "Elo gap"}</th>
<th>${t("arena.col.tie_overlap") || "CI overlap"}</th>
</tr></thead><tbody>`;
for (const tieEntry of result.ties) {
tiesHtml += `<tr>
<td>#${tieEntry.rank_a} <code>${escapeHtml(tieEntry.model_a)}</code> vs #${tieEntry.rank_b} <code>${escapeHtml(tieEntry.model_b)}</code></td>
<td>${fmtN(Math.round(tieEntry.elo_diff * 10) / 10)} Elo</td>
<td>${fmtN(Math.round(tieEntry.overlap_elo * 10) / 10)} Elo</td>
</tr>`;
}
tiesHtml += `</tbody></table>`;
}
// Summary panel
const s = result.summary;
const summaryHtml = `
<ul>
<li><strong>${t("arena.summary.votes") || "Total votes"}:</strong> ${fmtN(s.total_votes)}</li>
<li><strong>${t("arena.summary.models") || "Models"}:</strong> ${fmtN(s.n_models)}</li>
<li><strong>${t("arena.summary.ties") || "Statistical ties"}:</strong> ${fmtN(s.n_ties)}</li>
<li><strong>${t("arena.summary.bootstrap") || "Bootstrap iters"}:</strong> ${fmtN(s.bootstrap_iters)}</li>
<li><strong>${t("arena.summary.ci_level") || "CI level"}:</strong> ${(s.ci_level * 100).toFixed(0)}%</li>
</ul>
`;
return `
<div class="arena-result">
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${titleRanked}</summary>
<div style="overflow-x:auto;">
<table class="arena-table">
<thead><tr>
<th>${colRank}</th><th>${colModel}</th><th>${colElo}</th>
<th>${colCi}</th><th>${colSpread}</th>
<th>${colMatches}</th><th>${colWins}</th>
</tr></thead>
<tbody>${tableRows}</tbody>
</table>
</div>
</details>
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${titleTies} <span class="arena-tie-count">(${result.ties.length})</span></summary>
${tiesHtml}
</details>
<details class="unmask-panel">
<summary class="unmask-panel-title">${titleSummary}</summary>
${summaryHtml}
</details>
</div>
`;
}
function runArenaCompute() {
const csv = ($("arena-csv").value || "").trim();
if (!csv) {
$("arena-status").textContent = t("arena.status.empty") || "⚠ Paste vote CSV or click Load sample.";
return;
}
let votes;
try {
votes = parseVotesCSV(csv);
} catch (e) {
$("arena-status").textContent = `❌ ${e.message}`;
return;
}
if (votes.length < 10) {
$("arena-status").textContent = tFmt("arena.status.too_few", { n: votes.length });
return;
}
$("arena-status").textContent = tFmt("arena.status.computing", { n: votes.length });
// Defer to next tick so the status text actually paints before the heavy bootstrap.
setTimeout(() => {
const t0 = performance.now();
const result = computeArenaCI(votes, { bootstrapN: 200, ciLevel: 0.95 });
const ms = Math.round(performance.now() - t0);
$("arena-output").innerHTML = renderArenaCard(result);
$("arena-status").textContent = tFmt("arena.status.done", {
n: votes.length, models: result.summary.n_models,
ties: result.summary.n_ties, ms,
});
}, 30);
}
$("arena-sample-btn")?.addEventListener("click", () => {
$("arena-csv").value = SAMPLE_VOTES_CSV;
$("arena-status").textContent = t("arena.status.sample_loaded") || "✅ Sample loaded. Click Compute CIs.";
});
$("arena-run-btn")?.addEventListener("click", runArenaCompute);
$("arena-clear-btn")?.addEventListener("click", () => {
$("arena-csv").value = "";
$("arena-output").innerHTML = "";
$("arena-status").textContent = "";
});
// ════════════════════════════════════════════════════════════════════
// 🧪 Contamination Prior (v0.7.3 anti-bullshit pack #4)
// ════════════════════════════════════════════════════════════════════
const CONTAM_LEVEL_COLOR = { high: "#f85149", medium: "#f1c40f", low: "#3fb950" };
function renderContamCard(rows, modelCutoff) {
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
const titleRanked = t("contam.section.ranked") || "Benchmark contamination priors";
const titleHigh = t("contam.section.high") || "🔴 High-risk benchmarks (treat scores as unreliable)";
const titleMed = t("contam.section.medium") || "🟡 Medium-risk (verify with alternates)";
const titleLow = t("contam.section.low") || "🟢 Low-risk (likely clean)";
const colBench = t("contam.col.benchmark") || "Benchmark";
const colReleased = t("contam.col.released") || "Released";
const colGap = t("contam.col.gap") || "Gap (months)";
const colPrior = t("contam.col.prior") || "P(contam)";
const colLevel = t("contam.col.level") || "Level";
const colCorpora = t("contam.col.corpora") || "In corpora";
const colCategory = t("contam.col.category") || "Category";
const high = rows.filter(r => r.level === "high");
const medium = rows.filter(r => r.level === "medium");
const low = rows.filter(r => r.level === "low");
function tableFor(group) {
if (group.length === 0) return `<p class="unmask-reco">${t("contam.no_entries") || "(none in this category)"}</p>`;
let body = "";
for (const r of group) {
body += `<tr>
<td><strong>${escapeHtml(r.benchmark)}</strong></td>
<td>${escapeHtml(r.benchmark_released)}</td>
<td class="arena-spread">${r.gap_months > 0 ? "+" : ""}${r.gap_months}</td>
<td class="arena-elo" style="color: ${CONTAM_LEVEL_COLOR[r.level]};"><strong>${(r.prior * 100).toFixed(0)}%</strong></td>
<td>${r.benchmark_in_corpora ? "✓" : "✗"}</td>
<td class="arena-spread">${escapeHtml(r.benchmark_category)}</td>
</tr>`;
}
return `<table class="arena-table">
<thead><tr><th>${colBench}</th><th>${colReleased}</th><th>${colGap}</th><th>${colPrior}</th><th>${colCorpora}</th><th>${colCategory}</th></tr></thead>
<tbody>${body}</tbody></table>`;
}
const adviceHigh = t("contam.advice.high") || "Treat these scores as unreliable. Replace with newer / private-test alternates (MMLU-Pro, GPQA, MUSR, MATH-500).";
const adviceMedium = t("contam.advice.medium") || "Take with caution. Look for replication on a held-out subset or community reproductions.";
const adviceLow = t("contam.advice.low") || "Score likely uncontaminated, but absence of leak is not proof — still cross-check with alternate test.";
return `
<div class="arena-result">
<div class="unmask-hero" style="border-color: #58a6ff;">
<div class="unmask-verdict" style="color: #58a6ff;">${tFmt("contam.summary.headline", { cutoff: modelCutoff, n: rows.length })}</div>
<div class="unmask-numbers">
<div><span class="unmask-num-label" style="color:${CONTAM_LEVEL_COLOR.high}">🔴 ${t("contam.label.high") || "High risk"}</span><span class="unmask-num-val">${high.length}</span></div>
<div><span class="unmask-num-label" style="color:${CONTAM_LEVEL_COLOR.medium}">🟡 ${t("contam.label.medium") || "Medium"}</span><span class="unmask-num-val">${medium.length}</span></div>
<div><span class="unmask-num-label" style="color:${CONTAM_LEVEL_COLOR.low}">🟢 ${t("contam.label.low") || "Low"}</span><span class="unmask-num-val">${low.length}</span></div>
</div>
</div>
<div class="unmask-details">
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${titleHigh} <span class="arena-tie-count">(${high.length})</span></summary>
<p class="unmask-reco">${adviceHigh}</p>
${tableFor(high)}
</details>
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${titleMed} <span class="arena-tie-count">(${medium.length})</span></summary>
<p class="unmask-reco">${adviceMedium}</p>
${tableFor(medium)}
</details>
<details class="unmask-panel">
<summary class="unmask-panel-title">${titleLow} <span class="arena-tie-count">(${low.length})</span></summary>
<p class="unmask-reco">${adviceLow}</p>
${tableFor(low)}
</details>
</div>
</div>
`;
}
function runContamCompute() {
const cutoff = ($("contam-cutoff").value || "").trim();
if (!cutoff) {
$("contam-status").textContent = t("contam.status.empty") || "⚠ Enter a model training cutoff date (e.g. 2023-12).";
return;
}
if (!/^\d{4}(-\d{1,2})?(-\d{1,2})?$/.test(cutoff)) {
$("contam-status").textContent = t("contam.status.bad_date") || "⚠ Bad date format. Use YYYY-MM or YYYY-MM-DD.";
return;
}
const rows = rateAllBenchmarks(cutoff);
$("contam-output").innerHTML = renderContamCard(rows, cutoff);
$("contam-status").textContent = tFmt("contam.status.done", {
cutoff, n: rows.length,
high: rows.filter(r => r.level === "high").length,
});
}
$("contam-run-btn")?.addEventListener("click", runContamCompute);
$("contam-cutoff")?.addEventListener("keydown", (e) => {
if (e.key === "Enter") { e.preventDefault(); runContamCompute(); }
});
// ════════════════════════════════════════════════════════════════════
// ⚖️ Quant-regime classifier (v0.7.3 anti-bullshit pack #5)
// ════════════════════════════════════════════════════════════════════
const QUANT_REGIME_COLOR = {
safe: "#3fb950",
mild: "#3fb950",
significant: "#f1c40f",
cliff: "#f85149",
};
// Populate scheme dropdown from QUANT_SCHEMES on first render. Idempotent.
function populateQuantSchemes() {
const sel = $("quant-scheme");
if (!sel || sel.options.length > 1) return;
for (const s of QUANT_SCHEMES) {
const opt = document.createElement("option");
opt.value = s.id;
opt.textContent = s.label;
sel.appendChild(opt);
}
}
// Cache config across "Fetch" + "Predict" / "Compare" actions on the same id.
let __quantLastConfig = null;
let __quantLastModelId = null;
async function quantFetchConfig() {
const modelId = ($("quant-id").value || "").trim();
if (!modelId) {
$("quant-status").textContent = t("quant.status.empty_id") || "⚠ Enter a model id.";
return null;
}
$("quant-status").textContent = tFmt("quant.status.fetching", { modelId });
$("quant-fetch-btn").disabled = true;
try {
const cfg = await fetchHfConfig(modelId);
__quantLastConfig = cfg;
__quantLastModelId = modelId;
$("quant-status").textContent = tFmt("quant.status.fetched", { modelId });
return cfg;
} catch (err) {
if (err.code === "gated") {
$("quant-status").innerHTML = `🔒 <strong>${err.modelId}</strong> ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} <a href="https://huggingface.co/${err.modelId}" target="_blank" rel="noopener">huggingface.co/${err.modelId}</a>`;
} else {
$("quant-status").textContent = `❌ ${err.message}`;
}
return null;
} finally {
$("quant-fetch-btn").disabled = false;
}
}
function renderQuantSingle(result, modelId) {
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString();
const color = QUANT_REGIME_COLOR[result.regime] || "#8b949e";
const regimeLabel = t(`quant.regime.${result.regime}`) || result.regime;
let recoHtml = "";
if (result.recommend_code) {
const recoText = result.recommend_scheme
? tFmt("quant.reco." + result.recommend_code, {
scheme: QUANT_SCHEMES.find(s => s.id === result.recommend_scheme)?.label || result.recommend_scheme,
})
: (t("quant.reco." + result.recommend_code) || result.recommend_code);
recoHtml = `<p class="unmask-reco">${recoText}</p>`;
} else {
recoHtml = `<p class="unmask-reco">${t("quant.reco.no_action") || "No action needed — quantization is safe for this architecture."}</p>`;
}
return `
<div class="unmask-result">
<div class="unmask-hero" style="border-color: ${color};">
<div class="unmask-verdict" style="color: ${color};">${regimeLabel}</div>
<div class="unmask-model"><code>${escapeHtml(modelId)}</code> + <code>${escapeHtml(result.scheme_label)}</code></div>
<div class="unmask-numbers">
<div><span class="unmask-num-label">${t("quant.label.gamma_shift") || "γ shift"}</span><span class="unmask-num-val">+${result.gamma_shift.toFixed(3)}</span></div>
<div><span class="unmask-num-label">${t("quant.label.delta_ppl") || "ΔPPL (est.)"}</span><span class="unmask-num-val">+${result.delta_ppl.mid.toFixed(2)}</span></div>
<div><span class="unmask-num-label">${t("quant.label.arch_mult") || "Arch multiplier"}</span><span class="unmask-num-val">×${result.arch_multiplier}</span></div>
</div>
</div>
<div class="unmask-details">
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("quant.section.breakdown") || "Breakdown"}</summary>
<ul>
<li><strong>${t("quant.field.scheme") || "Scheme"}:</strong> ${escapeHtml(result.scheme_label)} (${result.scheme_bits}-bit, ${result.scheme_calibrated ? (t("quant.field.calibrated") || "calibrated") : (t("quant.field.uncalibrated") || "uncalibrated")})</li>
<li><strong>${t("quant.field.base_penalty") || "Base penalty"}:</strong> ${result.base_penalty.toFixed(3)}</li>
<li><strong>${t("quant.field.arch_mult_full") || "Architecture multiplier"}:</strong> ×${result.arch_multiplier} (d_head, GQA, SWA, params)</li>
<li><strong>${t("quant.field.gamma_shift") || "Predicted γ shift"}:</strong> +${result.gamma_shift.toFixed(3)}</li>
<li><strong>${t("quant.field.ppl_band") || "ΔPPL band (est.)"}:</strong> ${result.delta_ppl.low.toFixed(2)}${result.delta_ppl.high.toFixed(2)}</li>
<li><strong>${t("quant.field.params") || "Parameters"}:</strong> ${fmtN(result.n_params)}</li>
</ul>
</details>
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("quant.section.reco") || "Recommendation"}</summary>
${recoHtml}
</details>
</div>
</div>
`;
}
function renderQuantAll(rows, modelId) {
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
let body = "";
for (const r of rows) {
const color = QUANT_REGIME_COLOR[r.regime] || "#8b949e";
const regimeLabel = t(`quant.regime.${r.regime}`) || r.regime;
body += `<tr>
<td><strong>${escapeHtml(r.scheme_label)}</strong></td>
<td class="arena-spread">${r.scheme_bits}-bit ${r.scheme_calibrated ? "✓" : ""}</td>
<td class="arena-elo">+${r.gamma_shift.toFixed(3)}</td>
<td class="arena-spread">${r.delta_ppl.low.toFixed(2)}${r.delta_ppl.high.toFixed(2)}</td>
<td style="color: ${color};"><strong>${regimeLabel}</strong></td>
</tr>`;
}
return `
<div class="arena-result">
<div class="unmask-hero" style="border-color: #58a6ff;">
<div class="unmask-verdict" style="color: #58a6ff;">${tFmt("quant.summary.headline_all", { modelId })}</div>
</div>
<div class="unmask-details">
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("quant.section.compare") || "All schemes (sorted by safety)"}</summary>
<table class="arena-table">
<thead><tr>
<th>${t("quant.col.scheme") || "Scheme"}</th>
<th>${t("quant.col.bits") || "Bits"}</th>
<th>${t("quant.col.gamma_shift") || "γ shift"}</th>
<th>${t("quant.col.ppl_band") || "ΔPPL band"}</th>
<th>${t("quant.col.regime") || "Regime"}</th>
</tr></thead>
<tbody>${body}</tbody>
</table>
</details>
</div>
</div>
`;
}
async function runQuantPredict() {
const cfg = __quantLastConfig || await quantFetchConfig();
if (!cfg) return;
const schemeId = $("quant-scheme").value;
if (!schemeId) {
$("quant-status").textContent = t("quant.status.no_scheme") || "⚠ Pick a quant scheme.";
return;
}
const result = predictQuantShift(cfg, schemeId);
if (!result) {
$("quant-status").textContent = "❌ Unknown scheme.";
return;
}
$("quant-output").innerHTML = renderQuantSingle(result, __quantLastModelId);
$("quant-status").textContent = tFmt("quant.status.done", { regime: t(`quant.regime.${result.regime}`) || result.regime });
}
async function runQuantAll() {
const cfg = __quantLastConfig || await quantFetchConfig();
if (!cfg) return;
const rows = predictAllSchemes(cfg);
$("quant-output").innerHTML = renderQuantAll(rows, __quantLastModelId);
$("quant-status").textContent = tFmt("quant.status.done_all", { n: rows.length });
}
populateQuantSchemes();
$("quant-fetch-btn")?.addEventListener("click", quantFetchConfig);
$("quant-run-btn")?.addEventListener("click", runQuantPredict);
$("quant-all-btn")?.addEventListener("click", runQuantAll);
$("quant-id")?.addEventListener("keydown", (e) => {
if (e.key === "Enter") { e.preventDefault(); quantFetchConfig(); }
});
// ════════════════════════════════════════════════════════════════════
// 🔀 Cross-framework drift bound (v0.7.5 anti-bullshit pack #6)
// ════════════════════════════════════════════════════════════════════
const DRIFT_VERDICT_COLOR = {
noise: "#3fb950",
suspicious: "#f1c40f",
bug: "#f85149",
bug_template: "#f85149",
};
function populateDriftDropdowns() {
for (const side of ["a", "b"]) {
const fwSel = $(`drift-${side}-framework`);
const dtSel = $(`drift-${side}-dtype`);
if (fwSel && fwSel.options.length === 0) {
for (const f of DRIFT_FRAMEWORKS) {
const opt = document.createElement("option");
opt.value = f.id; opt.textContent = f.label;
fwSel.appendChild(opt);
}
}
if (dtSel && dtSel.options.length === 0) {
for (const d of DRIFT_DTYPES) {
const opt = document.createElement("option");
opt.value = d.id; opt.textContent = d.label;
dtSel.appendChild(opt);
}
}
}
}
function readDriftSetup(side) {
return {
score: parseFloat($(`drift-${side}-score`).value),
framework: $(`drift-${side}-framework`).value,
dtype: $(`drift-${side}-dtype`).value,
batch: parseInt($(`drift-${side}-batch`).value, 10) || 1,
chat_template: $(`drift-${side}-template`).value,
};
}
function renderDriftCard(result) {
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
const color = DRIFT_VERDICT_COLOR[result.verdict] || "#8b949e";
const verdictLabel = t(`drift.verdict.${result.verdict}`) || result.verdict;
const a = result.setup_a, b = result.setup_b;
const fwLabel = (id) => DRIFT_FRAMEWORKS.find(f => f.id === id)?.label || id;
const dtLabel = (id) => DRIFT_DTYPES.find(d => d.id === id)?.label || id;
let causeHtml = "";
if (result.dominant_cause) {
const causeText = t(`drift.cause.${result.dominant_cause}`) || result.dominant_cause;
causeHtml = `<p class="unmask-reco"><strong>${t("drift.dominant_cause") || "Dominant cause"}:</strong> ${causeText}</p>`;
}
const recoText = t(`drift.reco.${result.verdict}`) || "";
return `
<div class="unmask-result">
<div class="unmask-hero" style="border-color: ${color};">
<div class="unmask-verdict" style="color: ${color};">${verdictLabel}</div>
<div class="unmask-numbers">
<div><span class="unmask-num-label">${t("drift.label.observed") || "Observed gap"}</span><span class="unmask-num-val">${result.observed_gap.toFixed(2)}</span></div>
<div><span class="unmask-num-label">${t("drift.label.band") || "Numerical band"}</span><span class="unmask-num-val">±${result.numerical_band.toFixed(2)}</span></div>
<div><span class="unmask-num-label">${t("drift.label.ratio") || "Gap / band"}</span><span class="unmask-num-val">${result.numerical_band > 0 ? (result.observed_gap / result.numerical_band).toFixed(1) : "∞"}×</span></div>
</div>
</div>
<div class="unmask-details">
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("drift.section.setups") || "Setups"}</summary>
<table class="arena-table">
<thead><tr><th></th><th>${t("drift.setup_a") || "Setup A"}</th><th>${t("drift.setup_b") || "Setup B"}</th></tr></thead>
<tbody>
<tr><td>${t("drift.score") || "Score"}</td><td class="arena-elo">${a.score?.toFixed(2)}</td><td class="arena-elo">${b.score?.toFixed(2)}</td></tr>
<tr><td>${t("drift.framework") || "Framework"}</td><td>${escapeHtml(fwLabel(a.framework))}</td><td>${escapeHtml(fwLabel(b.framework))}</td></tr>
<tr><td>${t("drift.dtype") || "Dtype"}</td><td>${escapeHtml(dtLabel(a.dtype))}</td><td>${escapeHtml(dtLabel(b.dtype))}</td></tr>
<tr><td>${t("drift.batch") || "Batch"}</td><td>${a.batch}</td><td>${b.batch}</td></tr>
<tr><td>${t("drift.template") || "Chat-template"}</td><td>${escapeHtml(t("drift.template." + a.chat_template) || a.chat_template)}</td><td>${escapeHtml(t("drift.template." + b.chat_template) || b.chat_template)}</td></tr>
</tbody>
</table>
</details>
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("drift.section.breakdown") || "Drift contributors (numerical band)"}</summary>
<ul>
<li><strong>${t("drift.contrib.dtype") || "Dtype mismatch"}:</strong> ${result.breakdown.dtype.toFixed(2)} pts</li>
<li><strong>${t("drift.contrib.framework") || "Framework"}:</strong> ${result.breakdown.framework.toFixed(2)} pts</li>
<li><strong>${t("drift.contrib.batch") || "Batch difference"}:</strong> ${result.breakdown.batch.toFixed(2)} pts</li>
${result.breakdown.template_mismatch !== null ? `<li style="color:${color};"><strong>${t("drift.contrib.template") || "Chat-template MISMATCH"}:</strong> ~${result.breakdown.template_mismatch.toFixed(0)} pts (dominant)</li>` : ""}
</ul>
</details>
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("drift.section.verdict") || "Verdict & recommendation"}</summary>
${causeHtml}
${recoText ? `<p class="unmask-reco">${recoText}</p>` : ""}
</details>
</div>
</div>
`;
}
function runDriftCompute() {
const a = readDriftSetup("a");
const b = readDriftSetup("b");
if (Number.isNaN(a.score) || Number.isNaN(b.score)) {
$("drift-status").textContent = t("drift.status.empty_scores") || "⚠ Enter both scores.";
return;
}
const result = computeDriftBound(a, b);
$("drift-output").innerHTML = renderDriftCard(result);
if (window.__taf_applyTranslations) window.__taf_applyTranslations();
$("drift-status").textContent = tFmt("drift.status.done", { verdict: t(`drift.verdict.${result.verdict}`) || result.verdict });
}
function loadDriftSample() {
// Canonical chat-template bug: same model on lm-eval-hf (no template applied)
// gets ~50 on multi-turn, vLLM-served (template auto-applied) gets ~75.
$("drift-a-score").value = 50.2;
$("drift-a-framework").value = "lm-eval-hf";
$("drift-a-dtype").value = "bf16";
$("drift-a-batch").value = 1;
$("drift-a-template").value = "not_applied";
$("drift-b-score").value = 74.8;
$("drift-b-framework").value = "vllm-served";
$("drift-b-dtype").value = "bf16";
$("drift-b-batch").value = 8;
$("drift-b-template").value = "applied";
$("drift-status").textContent = t("drift.status.sample_loaded") || "✅ Sample loaded (canonical chat-template bug). Click Compute drift bound.";
}
populateDriftDropdowns();
$("drift-run-btn")?.addEventListener("click", runDriftCompute);
$("drift-sample-btn")?.addEventListener("click", loadDriftSample);
// ════════════════════════════════════════════════════════════════════
// 🔍 NIAH → reasoning gap predictor (v0.7.6 anti-bullshit pack #7)
// ════════════════════════════════════════════════════════════════════
const NIAH_VERDICT_COLOR = {
robust: "#3fb950",
marginal: "#f1c40f",
degraded: "#f1c40f",
retrieval_only: "#f85149",
broken: "#f85149",
};
let __niahLastConfig = null;
let __niahLastModelId = null;
async function niahFetchConfig() {
const modelId = ($("niah-id").value || "").trim();
if (!modelId) {
$("niah-status").textContent = t("niah.status.empty_id") || "⚠ Enter a model id.";
return null;
}
$("niah-status").textContent = tFmt("niah.status.fetching", { modelId });
$("niah-fetch-btn").disabled = true;
try {
const cfg = await fetchHfConfig(modelId);
__niahLastConfig = cfg;
// Keep the user-pasted id for RULER lookup (it has the canonical
// alias mapping). The mirror id is recorded in cfg.__via_mirror
// for any UI that wants to surface "fetched via mirror" — niah
// status string already shows it below.
__niahLastModelId = modelId;
if (cfg.__via_mirror) {
$("niah-status").innerHTML = `${tFmt("niah.status.fetched", { modelId })} <span class="subtle" style="color:#d29922;">(via mirror <code>${cfg.__via_mirror}</code>)</span>`;
} else {
$("niah-status").textContent = tFmt("niah.status.fetched", { modelId });
}
return cfg;
} catch (err) {
if (err.code === "gated") {
$("niah-status").innerHTML = `🔒 <strong>${err.modelId}</strong> ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} <a href="https://huggingface.co/${err.modelId}" target="_blank" rel="noopener">huggingface.co/${err.modelId}</a>`;
} else {
$("niah-status").textContent = `❌ ${err.message}`;
}
return null;
} finally {
$("niah-fetch-btn").disabled = false;
}
}
function renderNIAHCard(result, modelId, calib = null) {
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString();
const color = NIAH_VERDICT_COLOR[result.verdict] || "#8b949e";
const verdictLabel = t(`niah.verdict.${result.verdict}`) || result.verdict;
const reco = t(`niah.reco.${result.verdict}`) || "";
const safeText = result.safe_context
? tFmt("niah.safe_context", { ctx: result.safe_context })
: (t("niah.safe_context_none") || "No safe context found below your target — model fails reasoning even at small contexts.");
// RULER calibration block — appears only when KB lookup hits.
// Shows measured RULER aggregate, derived NIAH/reasoning, and the
// delta vs the heuristic so users see when the predictor was off.
let calibBlock = "";
if (calib) {
const fmtPct = (v) => `${(v * 100).toFixed(0)}%`;
const fmtDelta = (d) => {
if (d == null) return "—";
const pp = Math.round(d * 100);
const sign = pp > 0 ? "+" : "";
const col = Math.abs(pp) >= 10 ? "#f0883e" : Math.abs(pp) >= 5 ? "#d29922" : "#8b949e";
return `<span style="color:${col};">${sign}${pp} pp</span>`;
};
const extrapNote = calib.extrapolated
? `<span class="subtle" style="color:#d29922;font-size:0.85em;"> ⚠ ${t("niah.calib.extrapolated") || "extrapolated outside RULER's measured range"}</span>`
: "";
calibBlock = `
<details class="unmask-panel" open style="border-left:3px solid #3fb950;">
<summary class="unmask-panel-title">📊 ${t("niah.calib.heading") || "RULER-calibrated (NVIDIA published data)"}</summary>
<p>${tFmt("niah.calib.matched", {
alias: escapeHtml(calib.matched_alias),
canonical: escapeHtml(calib.canonical_id),
}) || `Matched <code>${escapeHtml(calib.matched_alias)}</code> → KB row <code>${escapeHtml(calib.canonical_id)}</code>.`}</p>
<p>
<strong>${t("niah.calib.aggregate") || "RULER aggregate"} @ ${fmtN(result.T_eval)}:</strong>
<code>${calib.ruler_avg_pct}%</code>
<span class="subtle">(${t("niah.calib.interp") || "interpolated between"} ${calib.interp_anchor})</span>${extrapNote}
</p>
<table class="arena-table" style="margin-top:0.5em;">
<thead><tr>
<th></th>
<th>${t("niah.calib.col.heuristic") || "Heuristic"}</th>
<th>${t("niah.calib.col.calibrated") || "RULER-calibrated"}</th>
<th>${t("niah.calib.col.delta") || "Δ"}</th>
</tr></thead>
<tbody>
<tr>
<td><strong>NIAH</strong></td>
<td>${fmtPct(result.niah_rate)}</td>
<td><strong>${fmtPct(calib.niah_calibrated)}</strong></td>
<td>${fmtDelta(calib.delta_niah)}</td>
</tr>
<tr>
<td><strong>${t("niah.label.reasoning") || "Reasoning"}</strong></td>
<td>${fmtPct(result.reasoning_rate)}</td>
<td><strong>${fmtPct(calib.reasoning_calibrated)}</strong></td>
<td>${fmtDelta(calib.delta_reasoning)}</td>
</tr>
</tbody>
</table>
<p class="recipe-desc subtle" style="font-size:0.82em;">
${t("niah.calib.factors") || "Per-task factors from RULER paper Appendix Tables 13-16:"}
retrieval = ${calib.retrieval_factor}× aggregate,
reasoning = ${calib.reasoning_factor}× aggregate
(${t("niah.calib.factors_caveat") || "honest range: retrieval 0.95-1.10×, reasoning 0.60-0.85×"}).
</p>
<p class="recipe-desc subtle" style="font-size:0.82em;">
${t("niah.calib.claimed_vs_effective") || "Paper-reported"}:
${t("niah.calib.claimed") || "claimed"} ${fmtN(calib.claimed_context)} /
${t("niah.calib.effective") || "effective"} ${fmtN(calib.effective_context)}.
${t("niah.calib.source") || "Source"}:
<a href="${calib.source_url}" target="_blank" rel="noopener noreferrer">RULER paper (Hsieh et al., COLM 2024)</a>
</p>
</details>
`;
} else if (modelId) {
// KB miss — explicitly state we're heuristic-only.
calibBlock = `
<p class="recipe-desc subtle" style="font-size:0.85em;margin-top:0.5em;">
💡 ${t("niah.calib.miss") || "RULER calibration unavailable for this model — using architectural heuristic only. Add to data/ruler_kb.json if you have measured numbers."}
</p>
`;
}
return `
<div class="unmask-result">
<div class="unmask-hero" style="border-color: ${color};">
<div class="unmask-verdict" style="color: ${color};">${verdictLabel}</div>
<div class="unmask-model"><code>${escapeHtml(modelId)}</code> @ <code>${fmtN(result.T_eval)}</code> tokens</div>
<div class="unmask-numbers">
<div><span class="unmask-num-label">${t("niah.label.niah") || "NIAH pass rate"}</span><span class="unmask-num-val">${(result.niah_rate * 100).toFixed(0)}%</span></div>
<div><span class="unmask-num-label">${t("niah.label.reasoning") || "Reasoning pass rate"}</span><span class="unmask-num-val">${(result.reasoning_rate * 100).toFixed(0)}%</span></div>
<div><span class="unmask-num-label">${t("niah.label.gap") || "Gap"}</span><span class="unmask-num-val">${(result.gap * 100).toFixed(0)} pts</span></div>
</div>
</div>
<div class="unmask-details">
${calibBlock}
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("niah.section.breakdown") || "Architecture breakdown"}</summary>
<ul>
<li><strong>γ_Padé @ T_eval:</strong> ${result.gamma_pade}</li>
<li><strong>${t("niah.field.dhorizon") || "d_horizon (effective)"}:</strong> ${fmtN(result.d_horizon)} tokens</li>
<li><strong>${t("niah.field.ratio") || "T_eval / d_horizon"}:</strong> ${result.horizon_ratio}×</li>
<li><strong>${t("niah.field.arch_pressure") || "Arch pressure (small d_head + GQA + SWA)"}:</strong> ×${result.arch_pressure}</li>
<li><strong>${t("niah.field.theta") || "RoPE θ"}:</strong> ${fmtN(result.theta)}</li>
<li><strong>${t("niah.field.t_train") || "T_train (claimed)"}:</strong> ${fmtN(result.T_train)}</li>
</ul>
</details>
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("niah.section.reco") || "Recommendation"}</summary>
<p class="unmask-reco">${reco}</p>
<p class="unmask-reco"><strong>${t("niah.label.safe_ctx") || "Safe reasoning context"}:</strong> ${safeText}</p>
</details>
</div>
</div>
`;
}
function renderNIAHSweep(rows, modelId) {
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&amp;","<":"&lt;",">":"&gt;",'"':"&quot;","'":"&#39;"}[c]));
const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString();
let body = "";
for (const r of rows) {
const color = NIAH_VERDICT_COLOR[r.verdict] || "#8b949e";
const label = t(`niah.verdict.${r.verdict}`) || r.verdict;
body += `<tr>
<td><strong>${fmtN(r.T_eval)}</strong></td>
<td class="arena-elo">${(r.niah_rate * 100).toFixed(0)}%</td>
<td class="arena-elo">${(r.reasoning_rate * 100).toFixed(0)}%</td>
<td class="arena-spread">${(r.gap * 100).toFixed(0)} pts</td>
<td style="color: ${color};"><strong>${label}</strong></td>
</tr>`;
}
return `
<div class="arena-result">
<div class="unmask-hero" style="border-color: #58a6ff;">
<div class="unmask-verdict" style="color: #58a6ff;">${tFmt("niah.summary.sweep", { modelId })}</div>
</div>
<div class="unmask-details">
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("niah.section.sweep") || "Pass rate sweep across context lengths"}</summary>
<table class="arena-table">
<thead><tr>
<th>${t("niah.col.context") || "T_eval"}</th>
<th>${t("niah.col.niah") || "NIAH"}</th>
<th>${t("niah.col.reasoning") || "Reasoning"}</th>
<th>${t("niah.col.gap") || "Gap"}</th>
<th>${t("niah.col.verdict") || "Verdict"}</th>
</tr></thead>
<tbody>${body}</tbody>
</table>
</details>
</div>
</div>
`;
}
async function runNIAHPredict() {
const cfg = __niahLastConfig || await niahFetchConfig();
if (!cfg) return;
const T_eval = parseInt($("niah-teval").value, 10);
if (Number.isNaN(T_eval) || T_eval < 512) {
$("niah-status").textContent = t("niah.status.bad_teval") || "⚠ Enter a target context (≥512).";
return;
}
const result = predictNIAHReasoning(cfg, T_eval);
// Ensure RULER KB is loaded once; idempotent. No-op if already loaded.
await loadRulerKB();
// Calibrate against published RULER measurements if available.
const calib = calibrateNIAH(__niahLastModelId, T_eval, result);
$("niah-output").innerHTML = renderNIAHCard(result, __niahLastModelId, calib);
$("niah-status").textContent = tFmt("niah.status.done", {
verdict: t(`niah.verdict.${result.verdict}`) || result.verdict,
niah: (result.niah_rate * 100).toFixed(0),
reasoning: (result.reasoning_rate * 100).toFixed(0),
});
}
async function runNIAHSweep() {
const cfg = __niahLastConfig || await niahFetchConfig();
if (!cfg) return;
const rows = sweepContextLengths(cfg);
$("niah-output").innerHTML = renderNIAHSweep(rows, __niahLastModelId);
$("niah-status").textContent = tFmt("niah.status.sweep_done", { n: rows.length });
}
$("niah-fetch-btn")?.addEventListener("click", niahFetchConfig);
$("niah-run-btn")?.addEventListener("click", runNIAHPredict);
$("niah-sweep-btn")?.addEventListener("click", runNIAHSweep);
$("niah-id")?.addEventListener("keydown", (e) => {
if (e.key === "Enter") { e.preventDefault(); niahFetchConfig(); }
});
function configToPreset(cfg, modelId) {
const n_attn = cfg.num_attention_heads || cfg.n_head || 0;
const n_kv = cfg.num_key_value_heads || cfg.num_attention_heads || cfg.n_head || 0;
const hidden = cfg.hidden_size || cfg.d_model || cfg.n_embd || 0;
const d_head = cfg.head_dim || (n_attn > 0 ? Math.floor(hidden / n_attn) : 0);
const theta = cfg.rope_theta || cfg.rotary_emb_base ||
(cfg.alibi ? null : (cfg.position_embedding_type === "absolute" ? null : 10000));
const T_train = cfg.max_position_embeddings || cfg.max_sequence_length ||
cfg.n_positions || cfg.n_ctx || 0;
const n_layers = cfg.num_hidden_layers || cfg.n_layer || 0;
const has_SWA = !!(cfg.sliding_window || cfg.use_sliding_window);
let family = "rope-mha";
if (cfg.alibi) family = "alibi";
else if (cfg.model_type === "mamba" || cfg.model_type === "mamba2") family = "ssm";
else if (theta == null) family = "abspe";
else if (n_kv < n_attn) family = "rope-gqa";
const n_params_est = estimateParams(cfg);
return {
theta: theta || 10000, T_train: T_train || 2048,
n_attention_heads: n_attn, n_kv_heads: n_kv, d_head: d_head,
n_layers: n_layers, n_params: n_params_est, has_SWA: has_SWA,
_family: family, _model_id: modelId,
};
}
function estimateParams(cfg) {
const h = cfg.hidden_size || cfg.d_model || 0;
const L = cfg.num_hidden_layers || cfg.n_layer || 0;
const V = cfg.vocab_size || 32000;
return Math.round(12 * h * h * L + 2 * V * h);
}
// ════════════════════════════════════════════════════════════════════
// Run recipe (manual mode)
// ════════════════════════════════════════════════════════════════════
$("run-btn").addEventListener("click", async () => {
if (!state.currentRecipe) {
alert("Select a recipe first.");
return;
}
const rid = state.currentRecipe.id;
const params = collectParams(state.currentRecipe.params);
await runAndDisplay(rid, params);
});
function collectParams(paramNames) {
const p = {};
paramNames.forEach(name => {
const el = $("param_" + name);
if (!el || el.value === "") return;
let v = el.value;
if (v === "true" || v === "false") {
p[name] = (v === "true");
} else if (!isNaN(parseFloat(v)) && isFinite(v)) {
p[name] = parseFloat(v);
} else {
p[name] = v;
}
});
return p;
}
// ════════════════════════════════════════════════════════════════════
// Ask mode (free-form question via router)
// ════════════════════════════════════════════════════════════════════
$("ask-btn").addEventListener("click", async () => {
const q = $("question").value.trim();
if (!q) {
alert("Please type a question.");
return;
}
$("ask-btn").disabled = true;
setStatus("🤔 Asking the in-browser LLM to pick a recipe...");
try {
const route = await routeQuestion(q);
setStatus(`📋 Selected recipe ${route.recipe_id}. Running...`);
await runAndDisplay(route.recipe_id, route.params, q);
} catch (err) {
setStatus(`❌ Routing failed: ${err.message}`);
$("output-section").style.display = "block";
$("verdict-box").className = "verdict-no";
$("verdict-box").innerHTML = `<strong>Could not route question.</strong><br>${escapeHtml(err.message)}<br><br>Try the Recipe mode for full manual control.`;
} finally {
$("ask-btn").disabled = false;
}
});
$("example-btn").addEventListener("click", () => {
const ex = EXAMPLES[Math.floor(Math.random() * EXAMPLES.length)];
$("question").value = ex;
});
async function routeQuestion(question) {
const engine = await loadWebLLM();
const recipesDesc = state.recipes.map(r =>
` ${r.id}: ${r.name}${r.description}\n params: ${r.params.join(", ")}`
).join("\n");
const systemPrompt = `You are a routing function. Given a user's free-form question
about transformer LLM viability, you MUST output a single JSON object with two fields:
- recipe_id: one of [${state.recipes.map(r => r.id).join(", ")}]
- params: an object with parameter values inferred from the question
Available recipes:
${recipesDesc}
Common model facts you may use:
Meta-Llama-3-8B: theta=500000, T_train=8192, n_attention_heads=32, n_kv_heads=8, d_head=128, n_layers=32, n_params=8e9
Mistral-7B-v0.1: theta=10000, T_train=8192, n_attention_heads=32, n_kv_heads=8, d_head=128, n_layers=32, n_params=7e9, has_SWA=true
Qwen2.5-7B: theta=1000000, T_train=32768, n_attention_heads=28, n_kv_heads=4, d_head=128, n_layers=28, n_params=7.6e9
Llama-3.3-70B-Instruct: theta=500000, T_train=131072, n_attention_heads=64, n_kv_heads=8, d_head=128, n_layers=80, n_params=70e9
Respond with ONLY the JSON object. No prose, no markdown fences, no explanation.`;
const reply = await engine.chat.completions.create({
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: question },
],
max_tokens: 400,
temperature: 0.0,
response_format: { type: "json_object" },
});
const raw = reply.choices[0].message.content.trim();
let parsed;
try {
parsed = JSON.parse(raw);
} catch (e) {
// Try extracting JSON from markdown fences
const m = raw.match(/\{[\s\S]*\}/);
if (!m) throw new Error(`LLM returned non-JSON: ${raw.slice(0, 200)}`);
parsed = JSON.parse(m[0]);
}
if (!parsed.recipe_id || !state.recipesById[parsed.recipe_id]) {
throw new Error(`Unknown recipe: ${parsed.recipe_id}`);
}
return parsed;
}
// ════════════════════════════════════════════════════════════════════
// Run + display + synthesize
// ════════════════════════════════════════════════════════════════════
async function runAndDisplay(recipeId, params, originalQuestion=null) {
setStatus("🧮 Computing TAF chain...");
state.pyodide.globals.set("__rid", recipeId);
state.pyodide.globals.set("__params", state.pyodide.toPy(params));
const resultJSON = state.pyodide.runPython(`
import json
result = run_recipe(__rid, **__params)
json.dumps(result)
`);
const result = JSON.parse(resultJSON);
result._original_question = originalQuestion;
renderResult(result);
$("output-section").style.display = "block";
$("profile-output").style.display = "none";
$("compare-output").style.display = "none";
state.lastResult = { type: "recipe", recipeId, params };
state.lastFullResult = result;
setStatus("✅ Done. Numbers below.");
if (ENABLE_WEBLLM) {
await synthesizeAnswer(result);
}
}
function renderResult(r) {
console.log("[TAF] renderResult called with:", r);
if (r.error) {
$("verdict-box").className = "verdict-no";
$("verdict-box").innerHTML = `<strong>Error</strong>: ${escapeHtml(r.error)}`;
$("chain-box").innerHTML = "";
return;
}
const vBox = $("verdict-box");
if (!vBox) {
console.error("[TAF] verdict-box element not found!");
return;
}
const verdictStr = String(r.verdict || "UNKNOWN");
let vClass = "";
if (verdictStr.startsWith("YES") || verdictStr === "GO" || verdictStr.startsWith("USE SOFT")) vClass = "verdict-yes";
else if (verdictStr.startsWith("NO") || verdictStr.startsWith("MEMORY") || verdictStr === "TINY-MODEL") vClass = "verdict-no";
else vClass = "verdict-degraded";
vBox.className = vClass;
const verdictEmoji = vClass === "verdict-yes" ? "✅" : (vClass === "verdict-no" ? "❌" : "⚠");
vBox.innerHTML = `
<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:0.75rem; gap:1rem; flex-wrap:wrap;">
<div style="font-size:1.6rem; font-weight:800;">${verdictEmoji} ${escapeHtml(verdictStr)}</div>
<div class="recipe-tag">${escapeHtml(r.recipe_id || "")}${escapeHtml(r.recipe_name || "")}</div>
</div>
<div style="margin-bottom:0.5rem;"><strong>Reason:</strong> ${escapeHtml(r.reason || "(none)")}</div>
${r.mitigation && r.mitigation !== "None required." && r.mitigation !== "None — proceed with Chinchilla-optimal recipe."
? `<div><strong>Action:</strong> ${escapeHtml(r.mitigation)}</div>`
: ""}
`;
console.log("[TAF] verdict-box populated with class:", vClass, "verdict:", verdictStr);
const cBox = $("chain-box");
cBox.innerHTML = "";
r.chain.forEach(step => {
const div = document.createElement("details");
div.className = "chain-step";
div.innerHTML = `
<summary>
<span><strong>Step ${step.step}</strong> — ${escapeHtml(step.name)}</span>
<span class="step-section">${escapeHtml(step.section)}</span>
</summary>
<div class="step-formula">${escapeHtml(step.formula)}</div>
<div><strong>Inputs:</strong> ${escapeHtml(JSON.stringify(step.inputs))}</div>
<div class="step-result"><strong>Result:</strong> ${formatResult(step.result)}</div>
${step.interpretation ? `<div class="step-interp">${escapeHtml(step.interpretation)}</div>` : ""}
`;
cBox.appendChild(div);
});
}
function formatResult(r) {
if (r === null || r === undefined) return "n/a (not applicable)";
if (typeof r === "number") return r.toLocaleString(undefined, { maximumFractionDigits: 4 });
if (typeof r === "object") return `<pre>${escapeHtml(JSON.stringify(r, null, 2))}</pre>`;
return String(r);
}
function escapeHtml(s) {
return String(s)
.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;")
.replace(/"/g, "&quot;").replace(/'/g, "&#39;");
}
// ════════════════════════════════════════════════════════════════════
// WebLLM (synthesis + router)
// ════════════════════════════════════════════════════════════════════
async function loadWebLLM() {
if (state.webllm) return state.webllm;
// Request persistent storage to avoid quota issues with cached model weights
if (navigator.storage && navigator.storage.persist) {
try {
const persistent = await navigator.storage.persist();
console.log(persistent ? "Persistent storage granted" : "Persistent storage denied");
} catch (e) {
console.warn("storage.persist() failed:", e);
}
}
setStatus(`⏳ Loading WebLLM library + ${WEBLLM_MODEL.split("-")[0]} (~350MB first time, cached after)...`);
const { CreateMLCEngine } = await import("https://esm.run/@mlc-ai/web-llm");
const tryLoad = async (modelId) => {
return await CreateMLCEngine(modelId, {
initProgressCallback: (info) => setStatus(`⏳ ${info.text || "Loading model..."}`),
});
};
try {
state.webllm = await tryLoad(WEBLLM_MODEL);
} catch (err) {
if (String(err).includes("QuotaExceeded") || String(err).includes("storage")) {
setStatus(`⚠ Quota exceeded for ${WEBLLM_MODEL}. Trying smaller fallback ${WEBLLM_FALLBACK}...`);
try {
state.webllm = await tryLoad(WEBLLM_FALLBACK);
} catch (err2) {
throw new Error(
`Both models failed. Browser storage too constrained. ` +
`Try: (1) Settings → Privacy → Site settings → allow more storage for this site, ` +
`(2) clear browser cache, (3) use Chrome/Edge in non-incognito mode. ` +
`Original error: ${err2.message || err2}`
);
}
} else {
throw err;
}
}
return state.webllm;
}
async function synthesizeAnswer(result) {
$("answer-header").style.display = "block";
$("answer-box").style.display = "block";
$("answer-box").innerHTML = '<em style="color:var(--fg-dim);">Generating plain-English summary...</em>';
let engine;
try {
engine = await loadWebLLM();
} catch (err) {
$("answer-box").innerHTML = `<em style="color:var(--warning);">⚠ WebLLM failed: ${escapeHtml(String(err))}<br>Numbers above are still correct.</em>`;
return;
}
const prompt = buildSynthesisPrompt(result);
let answer = "";
try {
const reply = await engine.chat.completions.create({
messages: [
{ role: "system", content: t("synthesis.system") },
{ role: "user", content: prompt },
],
max_tokens: 400,
temperature: 0.2,
});
answer = reply.choices[0].message.content;
} catch (err) {
$("answer-box").innerHTML = `<em style="color:var(--warning);">⚠ Synthesis failed: ${escapeHtml(String(err))}</em>`;
return;
}
$("answer-box").innerHTML = `
<div style="white-space:pre-wrap; line-height:1.7;">${escapeHtml(answer)}</div>
<div style="margin-top:0.75rem; font-size:0.85rem; color:var(--fg-dim);">
↑ Synthesised by Llama-3.2-1B in your browser. Numbers are deterministic Python.
</div>
`;
setStatus("✅ Done.");
}
function buildSynthesisPrompt(r) {
const numbersBlock = r.chain.map(s =>
`Step ${s.step} (${s.section}) ${s.name}: ${formatResultPlain(s.result)}${s.interpretation || ""}`
).join("\n");
return `Recipe: ${r.recipe_id}${r.recipe_name}
${r._original_question ? `User question: "${r._original_question}"\n` : ""}
Computed chain:
${numbersBlock}
Verdict: ${r.verdict}
Reason: ${r.reason}
Action: ${r.mitigation}
Summarize for non-technical user in 4-6 sentences. Cite section numbers (§X.Y). Mention verdict and most important action.`;
}
function formatResultPlain(r) {
if (r === null || r === undefined) return "n/a";
if (typeof r === "number") return r.toLocaleString(undefined, { maximumFractionDigits: 4 });
if (typeof r === "object") return JSON.stringify(r);
return String(r);
}
// ════════════════════════════════════════════════════════════════════
// INSPECTOR mode (paste raw config.json)
// ════════════════════════════════════════════════════════════════════
$("inspector-btn").addEventListener("click", async () => {
const raw = $("inspector-json").value.trim();
if (!raw) {
$("inspector-status").textContent = "⚠ Paste a config.json first";
return;
}
let cfg;
try {
cfg = JSON.parse(raw);
} catch (e) {
$("inspector-status").textContent = `❌ Invalid JSON: ${e.message}`;
return;
}
$("inspector-status").textContent = "⏳ Parsing + profiling...";
$("inspector-btn").disabled = true;
try {
const preset = configToPreset(cfg, cfg.model_type ? `<inspector:${cfg.model_type}>` : "<inspector>");
state.lastModelId = preset._model_id || "<inspected>";
const T_eval = parseInt($("inspector-T_eval").value) || preset.T_train;
const params = {
theta: preset.theta, T_train: preset.T_train, T_eval: T_eval,
n_attention_heads: preset.n_attention_heads,
n_kv_heads: preset.n_kv_heads,
d_head: preset.d_head, n_layers: preset.n_layers,
n_params: preset.n_params, has_SWA: preset.has_SWA,
};
state.pyodide.globals.set("__pp", state.pyodide.toPy(params));
const json = state.pyodide.runPython(`
import json
result = profile_model(**__pp)
json.dumps(result)
`);
const profile = JSON.parse(json);
renderProfile(profile, params);
state.lastResult = { type: "profile", params };
state.lastFullResult = profile;
$("inspector-status").innerHTML = `✅ Profiled: <strong>${preset._family}</strong> (${preset.n_params.toExponential(2)} params)`;
} catch (err) {
$("inspector-status").textContent = `❌ ${err.message}`;
console.error(err);
} finally {
$("inspector-btn").disabled = false;
}
});
// ════════════════════════════════════════════════════════════════════
// What-if T_eval slider — interactive exploration
// ════════════════════════════════════════════════════════════════════
function renderWhatIfSlider(profile, params, targetEl) {
if (!profile || !params) return;
const minL = 256;
const maxL = Math.max(params.T_eval * 4, 200000);
const initialL = params.T_eval;
targetEl.innerHTML = `
<h3 data-i18n="whatif.title">🎚 What-if: drag T_eval to see γ change live</h3>
<p class="subtle" data-i18n="whatif.desc">Pure JS recompute (no Pyodide call). Shows the geometric γ_Padé and d_horizon as you slide. The full chain re-runs on click.</p>
<input type="range" id="whatif-slider" class="whatif-slider"
min="${minL}" max="${maxL}" step="${Math.round(maxL/200)}" value="${initialL}" />
<div class="whatif-row"><span data-i18n="whatif.T_eval"><strong>T_eval</strong></span><span id="whatif-T_eval">${initialL.toLocaleString()}</span></div>
<div class="whatif-row"><span data-i18n="whatif.gamma_pade"><strong>γ_Padé</strong></span><span id="whatif-gamma">—</span></div>
<div class="whatif-row"><span data-i18n="whatif.d_horizon"><strong>d_horizon</strong></span><span id="whatif-dh">—</span></div>
<div class="whatif-row"><span data-i18n="whatif.l_niah"><strong>L_NIAH ceiling</strong></span><span id="whatif-niah">—</span></div>
<div class="whatif-row"><span data-i18n="whatif.predicted"><strong>Predicted geometric verdict</strong></span><span id="whatif-verdict" class="verdict-text">—</span></div>
<button id="whatif-rerun" class="secondary" type="button" style="margin-top:0.5rem;" data-i18n="whatif.rerun">↻ Recompute full chain at this T_eval</button>
`;
if (window.__taf_applyTranslations) window.__taf_applyTranslations();
const update = () => {
const T = parseInt($("whatif-slider").value);
const sqrt2 = Math.SQRT2;
const g_pade = (2 * params.theta - T * sqrt2) / (2 * params.theta + T * sqrt2);
// Apply same decomposition as Python
const g_corr = g_pade
+ (params.n_kv_heads < params.n_attention_heads ? 0.11 : 0)
+ (params.has_SWA ? -0.21 : 0)
+ (params.n_params >= 4e8 ? -0.15 : 0);
let dh = null, niah = null, verdict, vClass;
if (g_corr > 0 && g_corr < 1) {
dh = params.theta * (1 - g_corr) * sqrt2 / (1 + g_corr);
niah = 2 * dh;
if (T < dh) { verdict = `✅ YES (margin ${((1 - T / dh) * 100).toFixed(0)}%)`; vClass = "yes"; }
else if (T < niah) { verdict = `⚠ DEGRADED`; vClass = "deg"; }
else { verdict = `❌ NO (past NIAH ceiling)`; vClass = "no"; }
} else {
verdict = `❌ NO (Phase B)`; vClass = "no";
}
$("whatif-T_eval").textContent = T.toLocaleString();
$("whatif-gamma").textContent = g_pade.toFixed(4) + (g_corr !== g_pade ? ` → ${g_corr.toFixed(4)}` : "");
$("whatif-dh").textContent = dh !== null ? Math.round(dh).toLocaleString() : "n/a (Phase B)";
$("whatif-niah").textContent = niah !== null ? Math.round(niah).toLocaleString() : "n/a";
const vEl = $("whatif-verdict");
vEl.textContent = verdict;
vEl.className = "verdict-text " + vClass;
};
$("whatif-slider").addEventListener("input", update);
$("whatif-rerun").addEventListener("click", () => {
const T = parseInt($("whatif-slider").value);
// Update params and trigger full re-profile
$("profile-T_eval").value = T;
$("profile-btn").click();
});
update();
}
// ════════════════════════════════════════════════════════════════════
// FALSIFICATION dashboard inline
// ════════════════════════════════════════════════════════════════════
const FALSIFICATION_STATUS = [
{ id: "F1", claim: "γ_Padé MAE < 5% on non-anomalous Phase A models", status: "confirmed", evidence: "n=9, paper Tab. 4" },
{ id: "F2", claim: "d_horizon predicts NIAH collapse within 1% (pythia-70m)", status: "confirmed", evidence: "predicted 4078, observed 4096" },
{ id: "F3", claim: "Fisher info predicts forward-hook recovery within 0.2%", status: "confirmed", evidence: "12.5% predicted vs 12.3% observed" },
{ id: "F4", claim: "Layer asymmetry early/late ratio ≈ 13.5× (pythia-70m)", status: "confirmed", evidence: "F2 thermostat experiment" },
{ id: "F5", claim: "Area law S_γ = O(log N) for all γ > 0", status: "confirmed", evidence: "n=56, r=-0.954" },
{ id: "F6", claim: "KV truncation at D_f gives ΔPPL ≤ 0 in γ ∈ [0.65, 0.85]", status: "confirmed", evidence: "pythia-2.8b ΔPPL=-0.51" },
{ id: "F7", claim: "Linear pruning cost: ΔPPL ≈ 0.18 × %Q/K_pruned", status: "confirmed", evidence: "pythia-1b 0.17, 2.8b 0.18" },
{ id: "F8", claim: "Padé saturates at [1,1] in LLM regime z<<1", status: "confirmed", evidence: "sage round 4" },
{ id: "F9", claim: "RoPE attention is Euclidean fractional (d_eff=1/γ), not hyperbolic", status: "confirmed", evidence: "EXP-METRIC-RoPE sage" },
{ id: "F10", claim: "Δγ < -0.1 in models ≥ 400M ⇒ GQA / induction-head dominance", status: "confirmed", evidence: "n=20+ models" },
{ id: "F11", claim: "Δγ > +0.3 ⇒ alternating SWA (Gemma family signature)", status: "confirmed", evidence: "Gemma-2-9b Δγ=+0.51" },
{ id: "F12", claim: "Mamba L_crit = 45, α = 0.703", status: "confirmed", evidence: "3 seeds" },
{ id: "F13", claim: "Phase boundary at γ = 1 (Hagedorn)", status: "confirmed", evidence: "χ → ∞" },
{ id: "F14", claim: "RLHF Δγ shift ≤ 0.072 (recipe-specific)", status: "partial", evidence: "n=8 recipe-locked" },
{ id: "F15", claim: "R_c boundary at R_c★ ≈ 1.68", status: "refuted", evidence: "overlap zone [0.92, 3.08] n=9" },
{ id: "F16", claim: "Holographic pruning: alive bands in ℓ > L_crit ΔPPL ≈ 0", status: "refuted", evidence: "linear cost law instead" },
{ id: "F17", claim: "Soft d_horizon decay beats hard in regime d_h ≳ T_train/2", status: "partial", evidence: "n=2/3 (pythia-1b refuted)" },
{ id: "F18", claim: "Mittag-Leffler prefactor 1/Γ(1-γ) governs A_0", status: "refuted", evidence: "n=39, ratio 0.23" },
{ id: "F19", claim: "γ_Padé predicts γ_obs across-model variance", status: "partial", evidence: "centroid OK, ~0.1% var explained, see §sec:gamma_decomposition" },
{ id: "F20", claim: "β-flow exactly equivalent to logistic ODE", status: "confirmed", evidence: "sage symbolic check" },
{ id: "F21", claim: "tanh trajectory γ(t)~tanh(log step) on pythia-1b checkpoints", status: "refuted", evidence: "R²=0.15 on 4 checkpoints" },
{ id: "F22", claim: "χ(z*) = (5+√17)/4 closed form at Cayley fixed point", status: "confirmed", evidence: "sage symbolic, minimal poly 2y²-5y+1" },
{ id: "F23", claim: "T ↔ d_horizon involution: θ_design ∘ γ_Padé = id", status: "confirmed", evidence: "sage symbolic" },
];
function renderFalsificationDashboard() {
const target = $("falsification-table");
if (!target) return;
const counts = { confirmed: 0, partial: 0, refuted: 0, untested: 0 };
FALSIFICATION_STATUS.forEach(f => counts[f.status]++);
const summary = `<p class="subtle">
✅ <strong>${counts.confirmed}</strong> confirmed ·
⚠ <strong>${counts.partial}</strong> partial ·
❌ <strong>${counts.refuted}</strong> refuted ·
⏳ <strong>${counts.untested}</strong> untested
(out of ${FALSIFICATION_STATUS.length} total predictions)
</p>`;
let table = `<table class="falsification-table"><thead>
<tr><th>ID</th><th>Claim</th><th>Status</th><th>Evidence</th></tr>
</thead><tbody>`;
FALSIFICATION_STATUS.forEach(f => {
const icon = ({ confirmed: "✅", partial: "⚠", refuted: "❌", untested: "⏳" })[f.status];
table += `<tr>
<td><code>${f.id}</code></td>
<td>${escapeHtml(f.claim)}</td>
<td class="fal-status ${f.status}">${icon} ${f.status}</td>
<td class="subtle">${escapeHtml(f.evidence)}</td>
</tr>`;
});
table += "</tbody></table>";
target.innerHTML = summary + table;
}
// ════════════════════════════════════════════════════════════════════
// Browse community submissions (live from GitHub Issues API)
// ════════════════════════════════════════════════════════════════════
async function loadCommunityFeed() {
const target = $("community-feed");
if (!target) return;
try {
const resp = await fetch(`https://api.github.com/repos/${REGISTRY_REPO}/issues?state=open&per_page=15&sort=created&direction=desc`);
if (!resp.ok) {
if (resp.status === 404) {
target.innerHTML = `<em>The registry repo isn't created yet. Once <a href="https://github.com/${REGISTRY_REPO}" target="_blank"><code>${REGISTRY_REPO}</code></a> exists with submissions, they'll appear here live.</em>`;
return;
}
throw new Error(`HTTP ${resp.status}`);
}
const issues = await resp.json();
if (!issues || issues.length === 0) {
target.innerHTML = `<em>No submissions yet. Be the first — generate a Profile and click <strong>📤 Submit to registry</strong>.</em>`;
return;
}
const html = issues.map(issue => {
const verdict = extractVerdictFromTitle(issue.title);
const vClass = verdictClass(verdict);
const time = relativeTime(new Date(issue.created_at));
return `<div class="community-item">
<span class="verdict-badge ${vClass}">${escapeHtml(verdict)}</span>
<a href="${escapeHtml(issue.html_url)}" target="_blank">${escapeHtml(issue.title)}</a>
<span class="item-time">${time}</span>
</div>`;
}).join("");
target.innerHTML = html;
} catch (err) {
target.innerHTML = `<em>⚠ Couldn't load community feed: ${escapeHtml(err.message)}</em>`;
}
}
function extractVerdictFromTitle(title) {
const m = title.match(/→\s*(\S+)/);
if (m) return m[1];
if (title.includes("YES")) return "YES";
if (title.includes("NO")) return "NO";
if (title.includes("DEGRADED")) return "DEG";
if (title.includes("Profile")) return "📇";
if (title.includes("Compare")) return "🆚";
return "?";
}
function verdictClass(v) {
if (v.startsWith("YES") || v === "GO") return "yes";
if (v.startsWith("NO")) return "no";
if (v === "DEG" || v === "DEGRADED") return "deg";
return "";
}
function relativeTime(d) {
const sec = Math.floor((Date.now() - d.getTime()) / 1000);
if (sec < 60) return `${sec}s ago`;
if (sec < 3600) return `${Math.floor(sec / 60)}m ago`;
if (sec < 86400) return `${Math.floor(sec / 3600)}h ago`;
return `${Math.floor(sec / 86400)}d ago`;
}
// ════════════════════════════════════════════════════════════════════
// PROFILE mode
// ════════════════════════════════════════════════════════════════════
$("profile-preset").addEventListener("change", (e) => {
if (!e.target.value) return;
const modelId = e.target.value;
state.lastModelId = modelId; // remember for filename/hash
// Preset keys ARE valid HF model ids (e.g. "meta-llama/Llama-3.2-1B"). Auto-fill
// the HF id input so the user can also click 📥 Fetch to refresh from HF Hub
// without retyping. Status hint clarifies the dual source of truth.
if ($("profile-hf-id")) {
$("profile-hf-id").value = modelId;
if ($("profile-hf-status")) {
$("profile-hf-status").textContent = tFmt("profile.preset_loaded", { id: modelId });
}
}
const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(modelId)})`);
const p = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy;
if (!p || Object.keys(p).length === 0) return;
$("profile-theta").value = p.theta;
$("profile-T_train").value = p.T_train;
$("profile-n_attn").value = p.n_attention_heads;
$("profile-n_kv").value = p.n_kv_heads;
$("profile-d_head").value = p.d_head;
$("profile-n_layers").value = p.n_layers;
$("profile-n_params").value = p.n_params.toExponential(2);
$("profile-has_swa").value = String(p.has_SWA);
});
$("profile-fetch-btn").addEventListener("click", async () => {
const id = $("profile-hf-id").value.trim();
if (!id) { $("profile-hf-status").textContent = "⚠ Enter a model id"; return; }
$("profile-hf-status").textContent = `⏳ Fetching ${id}...`;
$("profile-fetch-btn").disabled = true;
state.lastModelId = id; // remember for filename/hash
try {
const cfg = await fetchHfConfig(id);
const p = configToPreset(cfg, id);
$("profile-theta").value = p.theta;
$("profile-T_train").value = p.T_train;
$("profile-n_attn").value = p.n_attention_heads;
$("profile-n_kv").value = p.n_kv_heads;
$("profile-d_head").value = p.d_head;
$("profile-n_layers").value = p.n_layers;
$("profile-n_params").value = p.n_params.toExponential(2);
$("profile-has_swa").value = String(p.has_SWA);
$("profile-hf-status").innerHTML = `✅ <strong>${id}</strong> (${p._family})`;
} catch (err) {
$("profile-hf-status").textContent = `❌ ${err.message}`;
} finally {
$("profile-fetch-btn").disabled = false;
}
});
$("profile-btn").addEventListener("click", async () => {
const params = {
theta: parseFloat($("profile-theta").value),
T_train: parseInt($("profile-T_train").value),
T_eval: parseInt($("profile-T_eval").value),
n_attention_heads: parseInt($("profile-n_attn").value),
n_kv_heads: parseInt($("profile-n_kv").value),
d_head: parseInt($("profile-d_head").value),
n_layers: parseInt($("profile-n_layers").value),
n_params: parseFloat($("profile-n_params").value),
has_SWA: $("profile-has_swa").value === "true",
};
setStatus("🧮 Profiling — running all 5 recipes...");
$("profile-btn").disabled = true;
try {
state.pyodide.globals.set("__pp", state.pyodide.toPy(params));
const json = state.pyodide.runPython(`
import json
result = profile_model(**__pp)
json.dumps(result)
`);
const profile = JSON.parse(json);
renderProfile(profile, params);
state.lastResult = { type: "profile", params };
state.lastFullResult = profile;
setStatus("✅ Profile ready.");
} catch (err) {
setStatus(`❌ ${err.message}`);
console.error(err);
} finally {
$("profile-btn").disabled = false;
}
});
function renderProfile(p, params) {
$("profile-output").style.display = "block";
// Hide other outputs
$("output-section").style.display = "none";
$("compare-output").style.display = "none";
const verdictClass = (v) => {
if (v.startsWith("YES") || v === "GO" || v.startsWith("USE SOFT")) return "v-yes";
if (v.startsWith("NO") || v.startsWith("MEMORY") || v === "TINY-MODEL") return "v-no";
return "v-deg";
};
const verdictEmoji = (v) => verdictClass(v) === "v-yes" ? "✅"
: verdictClass(v) === "v-no" ? "❌" : "⚠";
const ms = p.model_summary;
const kn = p.key_numbers;
const formatN = (x) => x === null || x === undefined ? "n/a"
: (typeof x === "number" ? x.toLocaleString(undefined, { maximumFractionDigits: 4 }) : String(x));
const recipesHtml = Object.entries(p.recipes).map(([rid, r]) => `
<div class="taf-recipe-tile ${verdictClass(r.verdict)}">
<div class="tile-header">
<span>${escapeHtml(rid)} — <span class="tile-name">${escapeHtml(r.name)}</span></span>
<span class="tile-verdict">${verdictEmoji(r.verdict)} ${escapeHtml(r.verdict)}</span>
</div>
<div class="tile-reason">${escapeHtml(r.reason || "")}</div>
${r.mitigation && r.mitigation !== "None required." && r.mitigation !== "None — proceed with Chinchilla-optimal recipe."
? `<div class="tile-reason" style="margin-top:0.4rem; color:var(--fg-dim);"><strong>Action:</strong> ${escapeHtml(r.mitigation)}</div>`
: ""}
</div>
`).join("");
// Reusable tooltip helper — keeps tooltip pattern uniform across the card
const ttip = (key, fallback) =>
`<span class="info"><span class="tooltip" data-i18n="${key}">${fallback}</span></span>`;
const numbersHtml = `
<div class="num-row"><span class="num-label">γ_Padé(T_eval) ${ttip("tooltip.gamma_pade", "Closed-form prediction (2−z)/(2+z), z = T√2/θ. Paper §sec:gamma_decomposition.")}</span><span class="num-value">${formatN(kn.gamma_pade)}</span></div>
<div class="num-row"><span class="num-label">γ_decomposed ${ttip("tooltip.gamma_decomposed", "γ from full architectural decomposition: Padé baseline + GQA shift + SWA shift + post-IH shift.")}</span><span class="num-value">${formatN(kn.gamma_decomposed)}</span></div>
<div class="num-row"><span class="num-label">d_horizon ${ttip("tooltip.d_horizon", "Effective attention horizon at T_eval. Beyond this, attention scores fall below the noise floor (paper §26).")}</span><span class="num-value">${formatN(kn.d_horizon)}</span></div>
<div class="num-row"><span class="num-label">L_NIAH ceiling ${ttip("tooltip.L_NIAH", "Predicted ceiling for needle-in-a-haystack retrieval reliability at the current d_horizon.")}</span><span class="num-value">${formatN(kn.L_NIAH_ceiling)}</span></div>
<div class="num-row"><span class="num-label">χ susceptibility ${ttip("tooltip.chi", "Susceptibility exponent χ = 1/(1−γ). Diverges at the Hagedorn line γ=1.")}</span><span class="num-value">${formatN(kn.chi_susceptibility)}</span></div>
<div class="num-row"><span class="num-label">KV memory @ T_eval (BF16) ${ttip("tooltip.kv_memory", "Per-request KV cache memory at T_eval in BF16 = 2 · n_layers · n_kv_heads · d_head · T_eval bytes.")}</span><span class="num-value">${formatN(kn.kv_memory_per_request_GB)} GB</span></div>
`;
const falsHtml = (p.falsification_status || []).map(f =>
`<div class="taf-falsification"><strong>${escapeHtml(f.id)}</strong> — ${escapeHtml(f.claim)}: ${escapeHtml(f.status)}</div>`
).join("");
// Per-verdict count breakdown — recipes test orthogonal axes (long-context,
// budget, hardware, custom-vs-API, KV-compression). Worst-of-N would conflate
// a "use API" recommendation with a long-context failure, so we show counts.
const verdictCounts = Object.values(p.recipes).reduce((acc, r) => {
const c = verdictClass(r.verdict);
acc[c] = (acc[c] || 0) + 1;
return acc;
}, {});
const nYes = verdictCounts["v-yes"] || 0;
const nDeg = verdictCounts["v-deg"] || 0;
const nNo = verdictCounts["v-no"] || 0;
const breakdownCls = nNo ? "v-no" : nDeg ? "v-deg" : "v-yes";
const gammaForPill = kn.gamma_decomposed ?? kn.gamma_pade;
const recipeCount = Object.keys(p.recipes).length;
$("profile-box").innerHTML = `
<div class="taf-card">
<div class="taf-hero">
<div class="hero-arch">${escapeHtml(ms.architecture_class)}</div>
<div class="hero-meta">
n_params=${formatN(ms.n_params)} ·
T_train=${ms.T_train} · T_eval=${ms.T_eval} ·
θ=${formatN(ms.rope_theta)} ·
${ms.has_GQA ? "GQA" : "MHA"}${ms.has_SWA ? " + SWA" : ""}
</div>
<div class="hero-row">
<span class="hero-pill ${breakdownCls}">✅ ${nYes} · ⚠ ${nDeg} · ❌ ${nNo} ${ttip("tooltip.verdict_breakdown", "Per-recipe breakdown across the orthogonal axes (long-context, budget, hardware, custom-vs-API, KV-compression). Recipes are independent decisions — a ❌ on X-1 means \"use API\" not \"model fails\". Open the Recipes section for per-axis verdict.")}</span>
${gammaForPill !== null && gammaForPill !== undefined
? `<span class="hero-pill">γ = ${formatN(gammaForPill)} ${ttip("tooltip.gamma_pill", "γ_decomposed (full architectural decomposition) or γ_Padé as fallback. Range (0,1) = Phase A (anti-Ising). γ ≥ 1 = Hagedorn / Phase B.")}</span>`
: ''}
${gammaForPill > 0 && gammaForPill < 1
? `<span class="hero-pill" style="background:rgba(110,80,200,0.15); border-color:rgba(110,80,200,0.45);"><span data-i18n="v05.antiising.badge">🧲 Anti-Ising (β=γ−1&lt;0, machine-verified)</span> ${ttip("tooltip.anti_ising", "Phase A class: β = γ−1 &lt; 0 (anti-Ising). Machine-verified by Sage Groebner basis + Lean Mathlib4. See §35 v0.5.")} ${badgesForUiBinding("anti_ising_pill")}</span>`
: ''}
</div>
</div>
<details class="taf-section" open>
<summary>
<span data-i18n="tafcard.recipes_title">📋 Recipes — verdict per dimension</span>
<span class="section-count">${recipeCount} ${t("tafcard.recipes_count_label", "dimensions")}</span>
</summary>
<div class="taf-section-body">
<div class="taf-recipes-grid">${recipesHtml}</div>
</div>
</details>
<details class="taf-section">
<summary>
<span data-i18n="tafcard.diag_title">🔬 Diagnostics — numbers + γ check + what-if</span>
</summary>
<div class="taf-section-body">
<h4 style="margin-top:0.3em;" data-i18n="tafcard.numbers_title">🔢 Key numbers (paper §26)</h4>
<div class="taf-key-numbers">${numbersHtml}</div>
<h4 style="margin-top:1.2em;" data-i18n="gamma_check.title">🔍 γ predicted vs observed</h4>
<div class="recipe-desc" data-i18n="gamma_check.desc">
Enter your empirically measured γ. Tool detects regime: fraud (θ inflated) / compressed / over-Padé / SWA-random / normal.
</div>
<div class="form-grid" style="margin:0.5em 0 0.6em;">
<div class="form-field">
<label><span data-i18n="gamma_check.gobs_label">γ_observed</span>
<span class="info"><span class="tooltip" data-i18n="gamma_check.gobs_tip">Empirically measured γ from your model's attention scores. Use the Diagnose CLI to obtain this from real weights.</span></span>
</label>
<input type="number" id="gc-gobs" step="0.0001" value="${formatN(kn.gamma_decomposed ?? kn.gamma_pade)}" />
</div>
<div class="form-field">
<label><span data-i18n="gamma_check.random_label">Random corpus?</span>
<span class="info"><span class="tooltip" data-i18n="gamma_check.random_tip">Tick if γ_observed was measured on random/unstructured tokens. Distinguishes SWA signature (γ_obs &gt; 1) from anomaly.</span></span>
</label>
<select id="gc-random">
<option value="false" selected data-i18n="common.no">No</option>
<option value="true" data-i18n="common.yes">Yes</option>
</select>
</div>
</div>
<div id="gamma-check-results"></div>
<h4 style="margin-top:1.2em;" data-i18n="tafcard.whatif_title">🎚️ What-if explorer</h4>
<div id="whatif-container" class="whatif-box"></div>
</div>
</details>
<details class="taf-section">
<summary>
<span data-i18n="tafcard.verify_title">✓ Verification — Lean + Sage + falsification</span>
</summary>
<div class="taf-section-body">
<h4 style="margin-top:0.3em;" data-i18n="lean.table.title">📑 Lean+Mathlib theorem table</h4>
<div style="margin-bottom: 0.6em; opacity: 0.85; font-size: 0.92em;" data-i18n="lean.table.desc">
Every entry below is machine-proven against Lean 4 + Mathlib4. Click any L# link to jump to the source line on GitHub. The table is grouped by topic; click a header to expand.
</div>
<div id="lean-table-host"></div>
<h4 style="margin-top:1.2em;" data-i18n="v05.consistency.title">🔬 Algebraic consistency (Sage + Lean v0.5)</h4>
<div style="margin-bottom: 0.6em; opacity: 0.85; font-size: 0.92em;" data-i18n="v05.consistency.desc">
Verifies 12 D-SAGE algebraic identities of TAF critical exponents (machine-proof Sage Groebner basis + Lean Mathlib4). Pass = framework intact. Fail = bf16 outlier / quantization artifact.
</div>
<div class="lean-badges-row">${badgesForUiBinding("algebraic_consistency_check")}</div>
<button class="secondary" id="verify-consistency-btn" data-i18n="v05.consistency.btn">
🔬 Verify algebraic consistency
</button>
<div id="consistency-result" style="margin-top: 0.8em;"></div>
<h4 style="margin-top:1.2em;" data-i18n="tafcard.fals_title">🔬 Falsification status (F1-F23)</h4>
${falsHtml || '<div class="subtle" data-i18n="tafcard.fals_none">No falsifications applicable.</div>'}
</div>
</details>
<details class="taf-section">
<summary>
<span data-i18n="tafcard.share_title">📂 Provenance & share</span>
</summary>
<div class="taf-section-body">
<details style="margin:0.4em 0 0.8em; padding:0.6em 0.8em; border:1px solid rgba(241,196,15,0.5); border-radius:6px; background:rgba(241,196,15,0.07); font-size:0.88em;">
<summary style="cursor:pointer; font-weight:600;" data-i18n="v053.calibration.title">🔬 v0.5.3 — Calibration audit (2026-05-02)</summary>
<div style="margin-top:0.5em; line-height:1.45;" data-i18n="v053.calibration.note"></div>
</details>
<div class="share-bar">
<button class="secondary" id="profile-share-btn" data-i18n="share.btn">🔗 Copy share link</button>
<button class="secondary" id="profile-download-btn" data-i18n="share.download">💾 Download JSON</button>
<button class="secondary" id="profile-download-md-btn" data-i18n="share.download_md">📝 Markdown</button>
<button class="secondary" id="profile-download-tex-btn" data-i18n="share.download_tex">📜 LaTeX</button>
<button class="secondary" id="profile-submit-btn" data-i18n="share.submit">📤 Submit to registry</button>
<span id="profile-share-status" class="subtle"></span>
</div>
</div>
</details>
</div>
`;
// Render the what-if slider for interactive exploration
renderWhatIfSlider(p, params, $("whatif-container"));
// Render Lean+Mathlib theorem table (graceful no-op if manifest missed).
// Loaded async at bootstrap; if Profile clicked before fetch resolves we
// wait once and then render.
const renderLeanTable = () => {
const host = $("lean-table-host");
if (!host) return;
if (getManifest()) {
host.innerHTML = renderTheoremTable();
if (window.__taf_applyTranslations) window.__taf_applyTranslations();
} else {
host.innerHTML = `<div class="subtle" data-i18n="lean.manifest.loading">Loading Lean manifest…</div>`;
loadLeanManifest()
.then(() => { host.innerHTML = renderTheoremTable(); if (window.__taf_applyTranslations) window.__taf_applyTranslations(); })
.catch(err => { host.innerHTML = `<div class="subtle" data-i18n="lean.manifest.error">Lean manifest unavailable: ${err.message}</div>`; });
}
};
renderLeanTable();
// Re-apply translations to dynamically inserted buttons
if (window.__taf_applyTranslations) window.__taf_applyTranslations();
// Wire share/download/submit buttons
$("profile-share-btn").addEventListener("click", () => copyShareLink("profile", params));
$("profile-download-btn").addEventListener("click", async () => {
const filename = await makeFilename("profile", p);
const data = await exportableData("profile", p);
downloadJSON(filename, data);
$("profile-share-status").textContent = `✅ Downloaded ${filename}`;
setTimeout(() => $("profile-share-status").textContent = "", 5000);
});
$("profile-download-md-btn").addEventListener("click", async () => {
const hash = await inputHash("profile", p);
const base = (await makeFilename("profile", p)).replace(/\.json$/, "");
downloadText(`${base}.md`, profileToMarkdown(p, hash), "text/markdown;charset=utf-8");
$("profile-share-status").textContent = `✅ Downloaded ${base}.md`;
setTimeout(() => $("profile-share-status").textContent = "", 5000);
});
$("profile-download-tex-btn").addEventListener("click", async () => {
const hash = await inputHash("profile", p);
const base = (await makeFilename("profile", p)).replace(/\.json$/, "");
downloadText(`${base}.tex`, profileToLatex(p, hash), "application/x-tex;charset=utf-8");
$("profile-share-status").textContent = `✅ Downloaded ${base}.tex`;
setTimeout(() => $("profile-share-status").textContent = "", 5000);
});
$("profile-submit-btn").addEventListener("click", async () => {
await submitToRegistry("profile", p, $("profile-share-status"));
setTimeout(() => $("profile-share-status").textContent = "", 8000);
});
// v0.6: γ predicted-vs-observed panel — interactive
const updateGammaCheck = () => {
const gObs = parseFloat($("gc-gobs").value);
const isRandom = $("gc-random").value === "true";
const r = gammaCheckAll({ theta: params.theta, T: params.T_eval, gObs, isRandom });
const meta = REGIME_META[r.regime] || REGIME_META.unknown;
const fmt = (x, d=4) => (x === null || x === undefined || Number.isNaN(x))
? "n/a"
: (!Number.isFinite(x) ? "∞" : Number(x).toLocaleString(undefined, { maximumFractionDigits: d }));
$("gamma-check-results").innerHTML = `
<div class="taf-key-numbers">
<div class="num-row"><span class="num-label">γ_Padé(T_eval) ${ttip("tooltip.gamma_pade", "Closed-form prediction (2−z)/(2+z), z = T√2/θ.")}</span><span class="num-value">${fmt(r.gammaPade)}</span></div>
<div class="num-row"><span class="num-label">θ_eff (observed) ${ttip("tooltip.theta_eff_obs", "Effective θ implied by your γ_observed: T√2 / (1 − γ_obs).")}</span><span class="num-value">${fmt(r.thetaEffObs, 1)}</span></div>
<div class="num-row"><span class="num-label">θ_eff (Padé) ${ttip("tooltip.theta_eff_pade", "Effective θ predicted by closed-form: θ + T/√2.")}</span><span class="num-value">${fmt(r.thetaEffPade, 1)}</span></div>
<div class="num-row"><span class="num-label">η = θ_eff_obs / θ_eff_Padé ${ttip("tooltip.efficiency", "Efficiency ratio. ≈1 = normal · &lt;0.01 = fraud · &lt;0.5 = compressed · &gt;1.5 = over-Padé.")}</span><span class="num-value">${fmt(r.efficiency)}</span></div>
<div class="num-row"><span class="num-label">ΔH_Cardy = log(θ_eff_obs / θ_nominal) ${ttip("tooltip.delta_h_cardy", "Cardy entropy shift. Negative = compression entropy. ~0 = nominal match.")}</span><span class="num-value">${fmt(r.deltaHCardy)}</span></div>
</div>
<div class="taf-recipe-tile ${meta.cls}" style="margin-top:0.6em;">
<div class="tile-header">
<span data-i18n="gamma_check.regime">Regime</span>
<span class="tile-verdict">${meta.emoji} <span data-i18n="gamma_check.regime.${r.regime}">${r.regime}</span></span>
</div>
<div class="tile-reason" data-i18n="gamma_check.regime.${r.regime}.desc"></div>
</div>
<details style="margin-top:0.6em;">
<summary style="cursor:pointer; font-weight:600;" data-i18n="gamma_check.glossary.title">ⓘ What do these mean?</summary>
<ul class="gc-glossary" style="margin:0.5em 0 0 1.2em; line-height:1.55;">
<li data-i18n="gamma_check.glossary.gamma_pade"></li>
<li data-i18n="gamma_check.glossary.gamma_obs"></li>
<li data-i18n="gamma_check.glossary.theta_eff_obs"></li>
<li data-i18n="gamma_check.glossary.theta_eff_pade"></li>
<li data-i18n="gamma_check.glossary.efficiency"></li>
<li data-i18n="gamma_check.glossary.delta_h"></li>
<li data-i18n="gamma_check.glossary.regime"></li>
</ul>
</details>
`;
if (window.__taf_applyTranslations) window.__taf_applyTranslations();
};
$("gc-gobs").addEventListener("input", updateGammaCheck);
$("gc-random").addEventListener("change", updateGammaCheck);
updateGammaCheck();
// v0.5.1: Algebraic consistency check button
$("verify-consistency-btn").addEventListener("click", () => {
const gammaVal = kn.gamma_decomposed ?? kn.gamma_pade;
if (gammaVal === null || gammaVal === undefined) {
$("consistency-result").innerHTML = `<div class="subtle">⚠ No γ value available for verification.</div>`;
return;
}
if (gammaVal <= 0 || gammaVal >= 1) {
$("consistency-result").innerHTML = `
<div style="padding:0.6em; border-left:3px solid #d29922; background:rgba(210,153,34,0.08);">
⚠ <strong>γ = ${gammaVal.toFixed(4)} out of Phase A</strong> — verification requires γ ∈ (0, 1).
${gammaVal >= 1 ? "Hagedorn boundary reached." : "Phase B / negative regime."}
</div>`;
return;
}
try {
const json = state.pyodide.runPython(`
import json
result = verify_algebraic_consistency(${gammaVal})
json.dumps(result)
`);
const r = JSON.parse(json);
const passed = r.n_checks_passed;
const total = r.n_checks_total;
const allOk = r.all_consistent;
const tooltipText = (id) => {
const key = `v05.tooltip.${id.replace(/[^a-zA-Z0-9]/g, '_')}`;
const tip = t(key);
return (tip === key) ? '' : tip;
};
const checksRows = Object.entries(r.checks).map(([id, c]) => {
const tip = tooltipText(id);
return `<div class="num-row" style="padding:0.25em 0;" ${tip ? `title="${escapeHtml(tip)}"` : ''}>
<span class="num-label" style="font-family:monospace;font-size:0.85em;${tip ? 'cursor:help;border-bottom:1px dotted rgba(110,180,255,0.5);' : ''}">${escapeHtml(id)}: ${escapeHtml(c.claim)}</span>
<span class="num-value" style="color:${c.passes ? "#3fb950" : "#f85149"};">${c.passes ? "✓" : "✗"}</span>
</div>`;
}).join("");
$("consistency-result").innerHTML = `
<div style="padding:0.7em; border-left:3px solid ${allOk ? "#3fb950" : "#f85149"}; background:rgba(${allOk ? "63,185,80" : "248,81,73"},0.08); margin-bottom:0.5em;">
<strong>${allOk ? "✅" : "❌"} ${passed}/${total} D-SAGE identities ${allOk ? "consistent" : "FAILED"}</strong>
<div style="font-size:0.9em; opacity:0.85; margin-top:0.3em;">${escapeHtml(r.interpretation)}</div>
<div style="font-size:0.82em; opacity:0.75; margin-top:0.3em; font-style:italic;">Verified by: ${escapeHtml(r.framework_verified_by)}</div>
</div>
<details style="margin-top:0.4em;">
<summary style="cursor:pointer; font-size:0.9em;">🔍 Per-identity details (${total} checks)</summary>
<div style="padding:0.5em 0;">${checksRows}</div>
</details>
`;
} catch (err) {
$("consistency-result").innerHTML = `<div style="color:#f85149;">❌ Error: ${escapeHtml(err.message || String(err))}</div>`;
console.error(err);
}
});
}
// ════════════════════════════════════════════════════════════════════
// COMPARE mode
// ════════════════════════════════════════════════════════════════════
$("compare-recipe").addEventListener("change", () => {
$("compare-btn").disabled = !$("compare-recipe").value;
});
document.querySelectorAll(".compare-preset").forEach(sel => {
sel.addEventListener("change", (e) => {
const slot = e.target.closest(".compare-slot");
if (e.target.value) {
slot.querySelector(".compare-hf-id").value = e.target.value;
}
});
});
$("compare-btn").addEventListener("click", async () => {
const recipeId = $("compare-recipe").value;
if (!recipeId) { alert("Pick a recipe first."); return; }
const T_eval = parseInt($("compare-T_eval").value);
const slots = document.querySelectorAll(".compare-slot");
const specs = [];
setStatus("⏳ Fetching configs for compared models...");
$("compare-btn").disabled = true;
for (const slot of slots) {
const id = slot.querySelector(".compare-hf-id").value.trim();
if (!id) continue;
try {
let preset = null;
const presetProxy = state.pyodide.runPython(`get_preset(${JSON.stringify(id)})`);
const p = presetProxy.toJs ? presetProxy.toJs({ dict_converter: Object.fromEntries }) : presetProxy;
if (p && Object.keys(p).length > 0) {
preset = p;
} else {
const cfg = await fetchHfConfig(id);
preset = configToPreset(cfg, id);
}
specs.push({ ...preset, label: id.split("/").pop() });
} catch (err) {
console.error("compare fetch fail for", id, err);
setStatus(`⚠ Skipped ${id}: ${err.message}`);
}
}
if (specs.length < 2) {
setStatus("❌ Need at least 2 models to compare.");
$("compare-btn").disabled = false;
return;
}
setStatus(`🧮 Comparing ${specs.length} models on ${recipeId}...`);
try {
state.pyodide.globals.set("__cspecs", state.pyodide.toPy(specs));
state.pyodide.globals.set("__crid", recipeId);
state.pyodide.globals.set("__cshared", state.pyodide.toPy({ T_eval }));
const json = state.pyodide.runPython(`
import json
result = compare_models(__cspecs.to_py(), __crid, __cshared.to_py())
json.dumps(result)
`);
const cmp = JSON.parse(json);
renderCompare(cmp);
state.lastResult = { type: "compare", recipeId, T_eval, specs };
state.lastFullResult = cmp;
setStatus("✅ Comparison ready.");
} catch (err) {
setStatus(`❌ ${err.message}`);
console.error(err);
} finally {
$("compare-btn").disabled = false;
}
});
function renderCompare(cmp) {
$("compare-output").style.display = "block";
$("output-section").style.display = "none";
$("profile-output").style.display = "none";
const verdictClass = (v) => {
if (v.startsWith("YES") || v === "GO" || v.startsWith("USE SOFT")) return "v-yes";
if (v.startsWith("NO") || v.startsWith("MEMORY")) return "v-no";
return "v-deg";
};
// Collect all unique key_numbers across rows
const allKeys = new Set();
cmp.rows.forEach(r => Object.keys(r.key_numbers || {}).forEach(k => allKeys.add(k)));
let html = `
<p class="recipe-desc"><strong>Recipe:</strong> ${escapeHtml(cmp.recipe_id)}${escapeHtml(cmp.recipe_name)}</p>
<p class="recipe-desc"><strong>Shared params:</strong> ${escapeHtml(JSON.stringify(cmp.shared_params))}</p>
<table class="compare-table">
<thead>
<tr><th>Model</th><th>Verdict</th><th>Reason</th>
`;
allKeys.forEach(k => html += `<th>${escapeHtml(k)}</th>`);
html += "</tr></thead><tbody>";
cmp.rows.forEach(r => {
const cls = verdictClass(r.verdict);
html += `<tr><td><strong>${escapeHtml(r.label)}</strong></td>`;
html += `<td class="${cls}">${escapeHtml(r.verdict)}</td>`;
html += `<td>${escapeHtml(r.reason)}</td>`;
allKeys.forEach(k => {
const v = r.key_numbers ? r.key_numbers[k] : null;
html += `<td>${v === undefined || v === null ? "—" : (typeof v === "number" ? v.toLocaleString(undefined, { maximumFractionDigits: 2 }) : escapeHtml(String(v)))}</td>`;
});
html += "</tr>";
});
html += `</tbody></table>
<div class="share-bar">
<button class="secondary" id="compare-share-btn" data-i18n="share.btn">🔗 Copy share link</button>
<button class="secondary" id="compare-download-btn" data-i18n="share.download">💾 Download JSON</button>
<button class="secondary" id="compare-download-md-btn" data-i18n="share.download_md">📝 Markdown</button>
<button class="secondary" id="compare-download-tex-btn" data-i18n="share.download_tex">📜 LaTeX</button>
<button class="secondary" id="compare-submit-btn" data-i18n="share.submit">📤 Submit to registry</button>
<span id="compare-share-status" class="subtle"></span>
</div>
`;
$("compare-box").innerHTML = html;
if (window.__taf_applyTranslations) window.__taf_applyTranslations();
$("compare-share-btn").addEventListener("click", () => {
const params = { recipeId: cmp.recipe_id, T_eval: cmp.shared_params.T_eval,
models: cmp.rows.map(r => r.label) };
copyShareLink("compare", params);
});
$("compare-download-btn").addEventListener("click", async () => {
const filename = await makeFilename("compare", cmp);
const data = await exportableData("compare", cmp);
downloadJSON(filename, data);
$("compare-share-status").textContent = `✅ Downloaded ${filename}`;
setTimeout(() => $("compare-share-status").textContent = "", 5000);
});
$("compare-download-md-btn").addEventListener("click", async () => {
const hash = await inputHash("compare", cmp);
const base = (await makeFilename("compare", cmp)).replace(/\.json$/, "");
downloadText(`${base}.md`, compareToMarkdown(cmp, hash), "text/markdown;charset=utf-8");
$("compare-share-status").textContent = `✅ Downloaded ${base}.md`;
setTimeout(() => $("compare-share-status").textContent = "", 5000);
});
$("compare-download-tex-btn").addEventListener("click", async () => {
const hash = await inputHash("compare", cmp);
const base = (await makeFilename("compare", cmp)).replace(/\.json$/, "");
downloadText(`${base}.tex`, compareToLatex(cmp, hash), "application/x-tex;charset=utf-8");
$("compare-share-status").textContent = `✅ Downloaded ${base}.tex`;
setTimeout(() => $("compare-share-status").textContent = "", 5000);
});
$("compare-submit-btn").addEventListener("click", async () => {
await submitToRegistry("compare", cmp, $("compare-share-status"));
setTimeout(() => $("compare-share-status").textContent = "", 8000);
});
}
// ════════════════════════════════════════════════════════════════════
// SHARE — encode current state to URL
// ════════════════════════════════════════════════════════════════════
function copyShareLink(mode, params) {
const url = new URL(window.location.href.split("?")[0]);
url.searchParams.set("mode", mode);
url.searchParams.set("p", btoa(JSON.stringify(params)));
navigator.clipboard.writeText(url.toString()).then(
() => {
const tgt = $("share-status") || $("profile-share-status") || $("compare-share-status");
if (tgt) {
tgt.textContent = "✅ Copied to clipboard!";
setTimeout(() => tgt.textContent = "", 3000);
}
},
() => alert("Copy failed. Manually copy: " + url.toString())
);
}
function parseUrlState() {
const params = new URLSearchParams(window.location.search);
const mode = params.get("mode");
const pData = params.get("p");
if (!mode || !pData) return;
try {
const decoded = JSON.parse(atob(pData));
// Switch to right mode tab
const btn = document.querySelector(`.mode-btn[data-mode="${mode}"]`);
if (btn) btn.click();
// Wait a tick for tab to render
setTimeout(() => {
if (mode === "profile") {
Object.entries(decoded).forEach(([k, v]) => {
const map = { theta: "profile-theta", T_train: "profile-T_train",
T_eval: "profile-T_eval",
n_attention_heads: "profile-n_attn", n_kv_heads: "profile-n_kv",
d_head: "profile-d_head", n_layers: "profile-n_layers",
n_params: "profile-n_params", has_SWA: "profile-has_swa" };
const id = map[k];
if (id && $(id)) $(id).value = String(v);
});
setTimeout(() => $("profile-btn").click(), 200);
}
// Other modes: future
}, 200);
} catch (e) {
console.warn("Failed to parse URL state:", e);
}
}
// Wire single-recipe share/download/submit buttons
$("share-btn").addEventListener("click", () => {
if (!state.lastResult) return;
copyShareLink(state.lastResult.type || "recipe", state.lastResult.params || {});
});
$("recipe-download-btn").addEventListener("click", async () => {
if (!state.lastFullResult) return;
const filename = await makeFilename("recipe", state.lastFullResult);
const data = await exportableData("recipe", state.lastFullResult);
downloadJSON(filename, data);
$("share-status").textContent = `✅ Downloaded ${filename}`;
setTimeout(() => $("share-status").textContent = "", 5000);
});
$("recipe-download-md-btn").addEventListener("click", async () => {
if (!state.lastFullResult) return;
const r = state.lastFullResult;
const hash = await inputHash("recipe", r);
const base = (await makeFilename("recipe", r)).replace(/\.json$/, "");
downloadText(`${base}.md`, recipeToMarkdown(r, hash), "text/markdown;charset=utf-8");
$("share-status").textContent = `✅ Downloaded ${base}.md`;
setTimeout(() => $("share-status").textContent = "", 5000);
});
$("recipe-download-tex-btn").addEventListener("click", async () => {
if (!state.lastFullResult) return;
const r = state.lastFullResult;
const hash = await inputHash("recipe", r);
const base = (await makeFilename("recipe", r)).replace(/\.json$/, "");
downloadText(`${base}.tex`, recipeToLatex(r, hash), "application/x-tex;charset=utf-8");
$("share-status").textContent = `✅ Downloaded ${base}.tex`;
setTimeout(() => $("share-status").textContent = "", 5000);
});
$("recipe-submit-btn").addEventListener("click", async () => {
if (!state.lastFullResult) return;
await submitToRegistry("recipe", state.lastFullResult, $("share-status"));
setTimeout(() => $("share-status").textContent = "", 8000);
});
// ════════════════════════════════════════════════════════════════════
// Help modal
// ════════════════════════════════════════════════════════════════════
// a11y: focus trap + restore + Esc handling, generalized to any modal that follows
// the [role="dialog"] + .open pattern. Each call to wireModal() returns { open, close }
// and registers the modal so the global keyboard handler can find the active one.
const __modalCloseFns = new Map();
function wireModal(modalId, btnId, closeId) {
const modal = $(modalId);
if (!modal) return null;
let returnFocus = null;
const open = () => {
returnFocus = document.activeElement;
modal.classList.add("open");
modal.setAttribute("aria-hidden", "false");
setTimeout(() => $(closeId)?.focus(), 0);
};
const close = () => {
modal.classList.remove("open");
modal.setAttribute("aria-hidden", "true");
if (returnFocus && typeof returnFocus.focus === "function") returnFocus.focus();
returnFocus = null;
};
$(btnId)?.addEventListener("click", open);
$(closeId)?.addEventListener("click", close);
modal.addEventListener("click", (e) => { if (e.target.id === modalId) close(); });
__modalCloseFns.set(modalId, close);
return { open, close };
}
wireModal("help-modal", "help-btn", "help-close");
wireModal("quickstart-modal", "quickstart-btn", "quickstart-close");
wireModal("inventory-modal", "inventory-btn", "inventory-close");
// Quick-start modal "↓ Start now" link should also close the modal so user lands on mode-section.
$("qs-start-link")?.addEventListener("click", () => __modalCloseFns.get("quickstart-modal")?.());
// Esc closes whichever modal is open; Tab cycles within it.
document.addEventListener("keydown", (e) => {
const openModal = document.querySelector('[role="dialog"].open');
if (!openModal) return;
if (e.key === "Escape") {
e.preventDefault();
__modalCloseFns.get(openModal.id)?.();
return;
}
if (e.key !== "Tab") return;
const focusables = openModal.querySelectorAll(
'a[href], button:not([disabled]), input:not([disabled]), select:not([disabled]), textarea:not([disabled]), [tabindex]:not([tabindex="-1"])'
);
if (!focusables.length) return;
const first = focusables[0];
const last = focusables[focusables.length - 1];
if (e.shiftKey && document.activeElement === first) { e.preventDefault(); last.focus(); }
else if (!e.shiftKey && document.activeElement === last) { e.preventDefault(); first.focus(); }
});
// ════════════════════════════════════════════════════════════════════
// SHARING — Download / Upload / Submit to registry
// ════════════════════════════════════════════════════════════════════
const REGISTRY_REPO = "karlesmarin/tafagent-registry";
function downloadJSON(filename, data) {
const blob = new Blob([JSON.stringify(data, null, 2)], { type: "application/json" });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
setTimeout(() => { document.body.removeChild(a); URL.revokeObjectURL(url); }, 100);
}
function downloadText(filename, text, mime = "text/plain;charset=utf-8") {
const blob = new Blob([text], { type: mime });
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
setTimeout(() => { document.body.removeChild(a); URL.revokeObjectURL(url); }, 100);
}
// LaTeX-escape a plain string for inclusion in a tabular cell.
function latexEscape(s) {
return String(s ?? "")
.replace(/\\/g, "\\textbackslash{}")
.replace(/[#$%&_{}]/g, m => "\\" + m)
.replace(/~/g, "\\textasciitilde{}")
.replace(/\^/g, "\\textasciicircum{}")
.replace(/</g, "\\textless{}")
.replace(/>/g, "\\textgreater{}");
}
function profileToLatex(p, hash = "") {
const ms = p.model_summary || {};
const kn = p.key_numbers || {};
let tex = `% TAF Profile — auto-generated by TAF Agent\n`;
if (hash) tex += `% input hash: #${hash}\n`;
tex += `\\begin{table}[ht]\n\\centering\n`;
tex += `\\caption{TAF Profile — ${latexEscape(ms.architecture_class || "?")}${hash ? ` (\\#${latexEscape(hash)})` : ""}}\n`;
tex += `\\begin{tabular}{lll}\n\\toprule\nRecipe & Verdict & Reason \\\\\n\\midrule\n`;
Object.entries(p.recipes || {}).forEach(([rid, r]) => {
tex += `${latexEscape(rid)} & ${latexEscape(r.verdict || "")} & ${latexEscape((r.reason || "").slice(0, 80))} \\\\\n`;
});
tex += `\\bottomrule\n\\end{tabular}\n\\end{table}\n\n`;
tex += `% Key numbers (JSON):\n`;
for (const [k, v] of Object.entries(kn)) {
tex += `% ${k} = ${typeof v === "object" ? JSON.stringify(v) : v}\n`;
}
return tex;
}
function compareToLatex(c, hash = "") {
let tex = `% TAF Comparison — ${c.recipe_id} (${c.recipe_name})\n`;
if (hash) tex += `% input hash: #${hash}\n`;
tex += `\\begin{table}[ht]\n\\centering\n`;
tex += `\\caption{TAF Comparison — ${latexEscape(c.recipe_id)} ${latexEscape(c.recipe_name || "")}${hash ? ` (\\#${latexEscape(hash)})` : ""}}\n`;
tex += `\\begin{tabular}{lll}\n\\toprule\nModel & Verdict & Reason \\\\\n\\midrule\n`;
c.rows.forEach(r => {
tex += `${latexEscape(r.label)} & ${latexEscape(r.verdict)} & ${latexEscape((r.reason || "").slice(0, 80))} \\\\\n`;
});
tex += `\\bottomrule\n\\end{tabular}\n\\end{table}\n`;
return tex;
}
function recipeToLatex(r, hash = "") {
let tex = `% TAF Recipe ${r.recipe_id}${r.recipe_name}\n`;
if (hash) tex += `% input hash: #${hash}\n`;
tex += `\\begin{table}[ht]\n\\centering\n`;
tex += `\\caption{TAF Recipe \\texttt{${latexEscape(r.recipe_id)}} — verdict: ${latexEscape(r.verdict)}${hash ? ` (\\#${latexEscape(hash)})` : ""}}\n`;
tex += `\\begin{tabular}{rll}\n\\toprule\nStep & Formula & Result \\\\\n\\midrule\n`;
(r.chain || []).forEach(s => {
tex += `${latexEscape(s.step)} & \\texttt{${latexEscape(s.formula || "")}} & ${latexEscape(formatResultPlain(s.result))} \\\\\n`;
});
tex += `\\bottomrule\n\\end{tabular}\n\\end{table}\n\n`;
tex += `% Reason: ${latexEscape(r.reason || "")}\n`;
if (r.mitigation) tex += `% Mitigation: ${latexEscape(r.mitigation)}\n`;
return tex;
}
// Sort object keys recursively for deterministic JSON
function sortKeys(o) {
if (Array.isArray(o)) return o.map(sortKeys);
if (o && typeof o === "object") {
return Object.keys(o).sort().reduce((acc, k) => { acc[k] = sortKeys(o[k]); return acc; }, {});
}
return o;
}
// Compute 8-char hex hash of canonical inputs.
// Identical inputs → identical hash (forever). Different inputs → different hash.
async function inputHash(type, data) {
let canonical;
if (type === "profile") {
const ms = data.model_summary || data;
canonical = sortKeys({
type: "profile",
theta: ms.rope_theta ?? ms.theta,
T_train: ms.T_train,
T_eval: ms.T_eval,
n_attn: ms.n_attention_heads ?? ms.n_attn,
n_kv: ms.n_kv_heads ?? ms.n_kv,
d_head: ms.d_head,
n_layers: ms.n_layers,
n_params: ms.n_params,
has_SWA: ms.has_SWA,
});
} else if (type === "compare") {
canonical = sortKeys({
type: "compare",
recipe: data.recipe_id,
T_eval: (data.shared_params || {}).T_eval,
models: (data.rows || []).map(r => r.label).sort(),
});
} else {
canonical = sortKeys({
type: "recipe",
recipe: data.recipe_id,
inputs: data.inputs || {},
});
}
const text = JSON.stringify(canonical);
const buf = new TextEncoder().encode(text);
const hashBuf = await crypto.subtle.digest("SHA-256", buf);
return Array.from(new Uint8Array(hashBuf)).slice(0, 4)
.map(b => b.toString(16).padStart(2, "0")).join("");
}
function safeFilename(s) {
return String(s).replace(/[/\\?%*:|"<>]/g, "-").replace(/^-+|-+$/g, "").slice(0, 60);
}
function modelShortName(data, fallback="model") {
// Try to get from various places
if (state.lastModelId) return safeFilename(state.lastModelId);
if (data && data.model_summary) {
const ms = data.model_summary;
return safeFilename(`m${ms.n_params || 0}${ms.rope_theta || 0}`);
}
if (data && data.inputs) {
const i = data.inputs;
return safeFilename(`m${i.n_params || ""}${i.theta || ""}`);
}
return fallback;
}
async function exportableData(type, data) {
const hash = await inputHash(type, data);
return {
_taf_export: true,
_taf_type: type,
_taf_version: "0.2",
_taf_input_hash: hash, // identical inputs ⇒ identical hash
_taf_timestamp: new Date().toISOString(),
payload: data,
};
}
async function makeFilename(type, data) {
const hash = await inputHash(type, data);
const name = modelShortName(data);
let suffix;
if (type === "profile" && data.model_summary?.T_eval) suffix = `T${data.model_summary.T_eval}`;
else if (type === "compare" && data.shared_params?.T_eval) suffix = `T${data.shared_params.T_eval}`;
else if (type === "recipe" && data.inputs?.T_eval) suffix = `T${data.inputs.T_eval}`;
else suffix = data.recipe_id || "result";
return `taf-${type}-${name}-${suffix}-${hash}.json`;
}
// v0.6 privacy fix: previously placed full JSON body in URL params → GH server logs +
// referer headers captured user data. Now copy body to clipboard, open issue page
// with title only, user pastes body manually. Title is non-sensitive (model name +
// hash). On clipboard failure, fall back to console log so user can grab body.
async function submitToRegistry(type, data, statusEl) {
const hash = await inputHash(type, data);
const modelName = modelShortName(data, "model");
let title, body;
if (type === "profile") {
const ms = data.model_summary || {};
title = `[TAF Profile] ${modelName} @ T=${ms.T_eval || "?"} #${hash}`;
body = profileToMarkdown(data, hash);
} else if (type === "compare") {
title = `[TAF Compare] ${data.recipe_id} × ${data.rows.length} models #${hash}`;
body = compareToMarkdown(data, hash);
} else {
title = `[TAF ${data.recipe_id}] ${modelName}${data.verdict} #${hash}`;
body = recipeToMarkdown(data, hash);
}
const dedupNote = `\n\n> **Input hash**: \`#${hash}\` — search this hash in registry issues to find independent verifications. Same inputs always produce the same hash.`;
const fullBody = body + dedupNote + "\n\n---\n*Submitted via [TAF Agent](https://karlesmarin.github.io/tafagent)*";
let clipboardOk = false;
try {
await navigator.clipboard.writeText(fullBody);
clipboardOk = true;
} catch (e) {
console.warn("Clipboard write failed; body logged below:", e);
console.log("[TAF Agent] Issue body to paste:\n\n" + fullBody);
}
// Title-only URL — body intentionally omitted to avoid leaking via GH server logs / referer.
const params = new URLSearchParams({ title });
window.open(`https://github.com/${REGISTRY_REPO}/issues/new?${params.toString()}`, "_blank");
if (statusEl) {
statusEl.textContent = clipboardOk
? (t("share.submit_clip_ok") || "↗ Opened GitHub. Body copied to clipboard — paste it into the issue body.")
: (t("share.submit_clip_fail") || "↗ Opened GitHub. Clipboard blocked — body logged in browser console (F12).");
}
}
function profileToMarkdown(p, hash="") {
const ms = p.model_summary || {};
const kn = p.key_numbers || {};
let md = `## TAF Profile`;
if (hash) md += ` \`#${hash}\``;
md += `\n\n`;
md += `**Architecture**: ${ms.architecture_class || "?"}\n`;
md += `**Params**: ${ms.n_params}, **T_train**: ${ms.T_train}, **T_eval**: ${ms.T_eval}\n`;
md += `**θ**: ${ms.rope_theta}, GQA=${ms.has_GQA}, SWA=${ms.has_SWA}\n\n`;
md += `### Recipes\n\n`;
Object.entries(p.recipes || {}).forEach(([rid, r]) => {
md += `- **${rid}** (${r.name || ""}): ${r.verdict}${r.reason}\n`;
});
md += `\n### Key numbers\n\n\`\`\`json\n${JSON.stringify(kn, null, 2)}\n\`\`\`\n`;
md += `\n### Full data\n\n<details><summary>Click to expand</summary>\n\n\`\`\`json\n${JSON.stringify(p, null, 2)}\n\`\`\`\n\n</details>\n`;
return md;
}
function compareToMarkdown(c, hash="") {
let md = `## TAF Comparison — ${c.recipe_id} (${c.recipe_name})`;
if (hash) md += ` \`#${hash}\``;
md += `\n\n`;
md += `**Shared params**: \`${JSON.stringify(c.shared_params)}\`\n\n`;
md += `| Model | Verdict | Reason |\n|-------|---------|--------|\n`;
c.rows.forEach(r => {
md += `| ${r.label} | ${r.verdict} | ${r.reason.slice(0, 80)}${r.reason.length > 80 ? "..." : ""} |\n`;
});
md += `\n<details><summary>Full data</summary>\n\n\`\`\`json\n${JSON.stringify(c, null, 2)}\n\`\`\`\n\n</details>\n`;
return md;
}
function recipeToMarkdown(r, hash="") {
let md = `## TAF Recipe ${r.recipe_id}${r.recipe_name}`;
if (hash) md += ` \`#${hash}\``;
md += `\n\n`;
md += `**Verdict**: ${r.verdict}\n`;
md += `**Reason**: ${r.reason}\n`;
if (r.mitigation) md += `**Action**: ${r.mitigation}\n`;
md += `\n### Inputs\n\n\`\`\`json\n${JSON.stringify(r.inputs, null, 2)}\n\`\`\`\n`;
md += `\n### Computation chain\n\n`;
(r.chain || []).forEach(s => {
md += `**Step ${s.step} ${s.section}** — ${s.name}: \`${s.formula}\` → ${formatResultPlain(s.result)}\n`;
});
md += `\n<details><summary>Full data</summary>\n\n\`\`\`json\n${JSON.stringify(r, null, 2)}\n\`\`\`\n\n</details>\n`;
return md;
}
function importJSON(file, statusEl) {
const reader = new FileReader();
reader.onload = (e) => {
try {
const data = JSON.parse(e.target.result);
if (!data._taf_export) {
statusEl.innerHTML = "❌ Not a TAF export file (missing _taf_export marker).";
return;
}
const type = data._taf_type;
const payload = data.payload;
if (type === "profile") {
renderProfile(payload, payload.model_summary || {});
statusEl.innerHTML = `✅ Profile loaded (${data._taf_timestamp || "?"})`;
} else if (type === "compare") {
renderCompare(payload);
statusEl.innerHTML = `✅ Comparison loaded (${data._taf_timestamp || "?"})`;
} else if (type === "recipe") {
renderResult(payload);
$("output-section").style.display = "block";
statusEl.innerHTML = `✅ Recipe result loaded (${data._taf_timestamp || "?"})`;
} else {
statusEl.innerHTML = `❌ Unknown TAF type: ${type}`;
}
} catch (err) {
statusEl.innerHTML = `❌ Failed to parse JSON: ${err.message}`;
}
};
reader.readAsText(file);
}
// Wire import button (always available)
document.addEventListener("DOMContentLoaded", () => {
const importBtn = document.getElementById("import-btn");
const importFile = document.getElementById("import-file");
if (importBtn && importFile) {
importBtn.addEventListener("click", () => importFile.click());
importFile.addEventListener("change", (e) => {
const file = e.target.files[0];
if (file) importJSON(file, document.getElementById("import-status"));
});
}
// Lean+Mathlib manifest — load in parallel with everything else; badges
// appear once it resolves, but app stays usable if it fails.
loadLeanManifest().catch(err => console.warn("Lean manifest unavailable:", err));
});
// ════════════════════════════════════════════════════════════════════
// Language switcher
// ════════════════════════════════════════════════════════════════════
document.querySelectorAll(".lang-btn").forEach(btn => {
btn.addEventListener("click", () => setLang(btn.dataset.lang));
});
// ════════════════════════════════════════════════════════════════════
// 📈 Benchmark Saturation Detector (v0.8.0 anti-bullshit pack #6)
// ════════════════════════════════════════════════════════════════════
const SATURATION_VERDICT_COLOR = {
saturated: "#f85149",
near_saturated: "#d29922",
discriminative: "#3fb950",
sparse_data: "#8b949e",
unknown_benchmark: "#8b949e",
};
let __saturationInited = false;
async function initSaturation() {
if (__saturationInited) return;
__saturationInited = true;
try {
await loadSaturationKB();
} catch (e) {
$("saturation-status").textContent = (t("saturation.status.kb_fail") || "⚠ Could not load saturation KB.") + " " + (e.message || e);
return;
}
const sel = $("saturation-select");
if (sel) {
sel.innerHTML = "";
const allOpt = document.createElement("option");
allOpt.value = "__all__";
allOpt.textContent = t("saturation.select.all") || "— show all benchmarks —";
sel.appendChild(allOpt);
listBenchmarks().forEach(name => {
const opt = document.createElement("option");
opt.value = name;
opt.textContent = name;
sel.appendChild(opt);
});
}
// Try live fetch in the background; results that come back update _liveData.
// If CORS / network fails the tool transparently uses the baked snapshot.
tryFetchLive().then(live => {
if (live) {
$("saturation-status").textContent = tFmt("saturation.status.live", { count: live.model_count || (live.models?.length ?? 0) });
} else {
$("saturation-status").textContent = t("saturation.status.baked") || "ℹ Using baked snapshot (live fetch unavailable).";
}
});
}
function renderSaturationCard(result) {
if (result.code === "unknown_benchmark") {
return `<div class="recipe-desc">${t("saturation.unknown") || "Unknown benchmark."}</div>`;
}
const color = SATURATION_VERDICT_COLOR[result.code] || "#8b949e";
const verdictLabel = t(`saturation.verdict.${result.code}`) || result.code;
const top3Rows = (result.top3 || [])
.filter(x => typeof x.score === "number")
.map((x, i) => `<tr><td>${i + 1}</td><td>${x.model}</td><td class="arena-elo">${x.score.toFixed(1)}</td></tr>`)
.join("");
const recoItems = (result.recommendations || [])
.map(r => `<li>${r}</li>`)
.join("");
const borderlineNote = result.borderline
? `<p class="recipe-desc" style="color:#d29922; font-size:0.9em;">⚠ ${t("saturation.borderline") || "Borderline — within ±1pp of a threshold cutoff. Treat verdict as 'check carefully'."}</p>`
: "";
const sourceTag = result.source === "live"
? `<span class="badge" style="background:#0969da;">live</span>`
: (result.source === "baked_consensus"
? `<span class="badge" style="background:#6e7781;">consensus</span>`
: `<span class="badge" style="background:#8b949e;">baked</span>`);
const spreadStr = result.params.spread != null ? `${result.params.spread.toFixed(1)} pp` : "n/a";
const meanStr = result.params.mean != null ? `${result.params.mean.toFixed(1)}%` : "n/a";
return `
<div class="arena-result">
<div class="unmask-hero" style="border-color: ${color};">
<div class="unmask-verdict" style="color: ${color};">${result.params.name}${verdictLabel} ${sourceTag}</div>
<div class="unmask-num-grid">
<div><span class="unmask-num-label">${t("saturation.col.spread") || "Top-3 spread"}</span><span class="unmask-num-val">${spreadStr}</span></div>
<div><span class="unmask-num-label">${t("saturation.col.mean") || "Top-3 mean"}</span><span class="unmask-num-val">${meanStr}</span></div>
<div><span class="unmask-num-label">${t("saturation.col.n") || "Models"}</span><span class="unmask-num-val">${result.params.n || 0}</span></div>
</div>
</div>
${borderlineNote}
<div class="unmask-details">
${top3Rows ? `<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("saturation.section.top3") || "Top-3 frontier scores"}</summary>
<table class="arena-table">
<thead><tr>
<th>#</th>
<th>${t("saturation.col.model") || "Model"}</th>
<th>${t("saturation.col.score") || "Score"}</th>
</tr></thead>
<tbody>${top3Rows}</tbody>
</table>
</details>` : ""}
${recoItems ? `<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("saturation.section.recommendations") || "Recommended alternatives"}</summary>
<ul>${recoItems}</ul>
</details>` : ""}
${result.note ? `<details class="unmask-panel">
<summary class="unmask-panel-title">${t("saturation.section.note") || "Notes"}</summary>
<p class="recipe-desc">${result.note}</p>
</details>` : ""}
</div>
</div>
`;
}
function renderSaturationAll(results) {
const rows = results.map(r => {
if (r.code === "unknown_benchmark") return "";
const color = SATURATION_VERDICT_COLOR[r.code] || "#8b949e";
const verdictLabel = t(`saturation.verdict.${r.code}`) || r.code;
const spread = r.params.spread != null ? r.params.spread.toFixed(1) + " pp" : "—";
const mean = r.params.mean != null ? r.params.mean.toFixed(1) + "%" : "—";
const reco = (r.recommendations || []).slice(0, 2).join(", ") || "—";
const borderlineMark = r.borderline ? " ⚠" : "";
return `<tr>
<td><strong>${r.params.name}</strong></td>
<td>${spread}</td>
<td>${mean}</td>
<td style="color:${color};"><strong>${verdictLabel}${borderlineMark}</strong></td>
<td>${reco}</td>
</tr>`;
}).join("");
return `
<div class="arena-result">
<div class="unmask-details">
<details class="unmask-panel" open>
<summary class="unmask-panel-title">${t("saturation.section.all") || "All tracked benchmarks"}</summary>
<table class="arena-table">
<thead><tr>
<th>${t("saturation.col.bench") || "Benchmark"}</th>
<th>${t("saturation.col.spread") || "Spread"}</th>
<th>${t("saturation.col.mean") || "Mean"}</th>
<th>${t("saturation.col.verdict") || "Verdict"}</th>
<th>${t("saturation.col.reco") || "Top reco"}</th>
</tr></thead>
<tbody>${rows}</tbody>
</table>
</details>
</div>
</div>
`;
}
function runSaturationOne() {
const sel = $("saturation-select");
const name = sel?.value;
if (!name || name === "__all__") { runSaturationAll(); return; }
const result = classifyBenchmark(name);
$("saturation-output").innerHTML = renderSaturationCard(result);
$("saturation-status").textContent = tFmt("saturation.status.done", {
name,
verdict: t(`saturation.verdict.${result.code}`) || result.code,
});
}
function runSaturationAll() {
const results = classifyAll();
$("saturation-output").innerHTML = renderSaturationAll(results);
$("saturation-status").textContent = tFmt("saturation.status.all_done", { n: results.length });
}
$("saturation-run-btn")?.addEventListener("click", runSaturationOne);
$("saturation-all-btn")?.addEventListener("click", runSaturationAll);
// ════════════════════════════════════════════════════════════════════
// 🧭 Solutions Hub (v0.8.1) — integrator portal
// ════════════════════════════════════════════════════════════════════
const HUB_TYPE_BADGE = {
tool: "🔧",
leaderboard: "📊",
paper: "📄",
article: "📝",
docs: "📘",
issue: "🐛",
spec: "📐",
benchmark: "🧪",
};
let __hubInited = false;
async function initHub() {
if (__hubInited) return;
__hubInited = true;
try {
await loadHub();
} catch (e) {
$("hub-status").textContent = (t("hub.status.fail") || "⚠ Could not load Solutions Hub.") + " " + (e.message || e);
return;
}
const stats = hubStats();
$("hub-status").textContent = tFmt("hub.status.loaded", stats);
renderHubAll();
}
function renderEntry(e) {
const modeBadge = e.tafagent_mode
? `<span class="badge" style="background:#3fb950;color:#fff;border-color:#3fb950;">${e.tafagent_mode}</span>`
: (e.tafagent_planned_mode
? `<span class="badge" style="background:#d29922;color:#1a1a1a;border-color:#d29922;">${t("hub.planned") || "planned:"} ${e.tafagent_planned_mode}</span>`
: `<span class="badge" style="background:#6e7781;color:#fff;border-color:#6e7781;">${t("hub.no_mode") || "external"}</span>`);
const tools = (e.external_tools || [])
.map(tl => {
const icon = HUB_TYPE_BADGE[tl.type] || "🔗";
return `<li>${icon} <a href="${tl.url}" target="_blank" rel="noopener noreferrer">${tl.name}</a> <span class="subtle" style="font-size:0.82em;">(${tl.type})</span></li>`;
})
.join("");
const bestFor = e.best_for ? `<p><strong>${t("hub.best_for") || "Best for"}:</strong> ${e.best_for}</p>` : "";
const notFor = e.not_for ? `<p><strong>${t("hub.not_for") || "Not for"}:</strong> ${e.not_for}</p>` : "";
return `
<details class="unmask-panel" style="margin: 0.5em 0;">
<summary class="unmask-panel-title">${e.pain} ${modeBadge}</summary>
${bestFor}
${notFor}
${tools ? `<p><strong>${t("hub.tools") || "External tools"}:</strong></p><ul>${tools}</ul>` : ""}
</details>
`;
}
function renderHubAll() {
const cats = listCategories();
const html = cats.map(c => {
const entries = listEntries(c.key);
if (entries.length === 0) return "";
const inner = entries.map(renderEntry).join("");
return `
<details class="unmask-panel" open style="margin-top: 1em;">
<summary class="unmask-panel-title" style="font-size:1.05em;">
${c.icon} ${c.label} <span class="subtle" style="font-size:0.85em;">(${c.count})</span>
</summary>
<p class="recipe-desc" style="font-style:italic;">${c.description}</p>
${inner}
</details>
`;
}).join("");
$("hub-output").innerHTML = `<div class="arena-result">${html}</div>`;
}
function renderHubSearch(query) {
const matches = searchEntries(query);
if (matches.length === 0) {
$("hub-output").innerHTML = `<p class="recipe-desc">${tFmt("hub.search.empty", { query })}</p>`;
return;
}
const html = matches.map(renderEntry).join("");
$("hub-output").innerHTML = `<div class="arena-result">
<p class="recipe-desc">${tFmt("hub.search.results", { n: matches.length, query })}</p>
${html}
</div>`;
}
let __hubSearchTimer = null;
$("hub-search")?.addEventListener("input", (e) => {
clearTimeout(__hubSearchTimer);
const q = e.target.value;
__hubSearchTimer = setTimeout(() => {
if (!q.trim()) renderHubAll();
else renderHubSearch(q);
}, 200);
});
$("hub-clear-btn")?.addEventListener("click", () => {
$("hub-search").value = "";
renderHubAll();
});
// ════════════════════════════════════════════════════════════════════
// 📋 JSON CoT-aware Linter (v0.8.2 anti-bullshit pack #8)
// ════════════════════════════════════════════════════════════════════
const COT_FIELD_TYPE_BADGE = {
reasoning: "🧠",
answer: "🎯",
other: "·",
};
const COT_VERDICT_BADGE_BG = {
good_order: "#3fb950", // green
anti_pattern: "#f85149", // red
missing_reasoning: "#d29922", // amber
missing_answer: "#d29922", // amber
no_cot_fields: "#8b949e", // gray
non_object: "#8b949e",
empty_fields: "#8b949e",
invalid_json: "#f85149", // red
};
let __cotInited = false;
function initCot() {
if (__cotInited) return;
__cotInited = true;
// No-op (no async data); placeholder kept for symmetry with other modes.
}
function renderCotResult(result, originalText) {
const verdict = t(`cot.verdict.${result.code}`) || result.code;
const verdictBg = COT_VERDICT_BADGE_BG[result.code] || "#8b949e";
const verdictBadge = `<span class="badge" style="background:${verdictBg};">${verdict}</span>`;
// Failure cases short-circuit: just show the verdict + reason.
if (result.code === "invalid_json") {
const reason = result.params?.error || "";
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}</p>
<pre style="background:#21262d;padding:0.75em;border-radius:4px;color:#f0883e;">${escapeHtml(reason)}</pre>
</div>`;
}
if (result.code === "empty_fields" || result.code === "non_object") {
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}</p>
<p class="recipe-desc">${t(`cot.hint.${result.code}`) || ""}</p>
</div>`;
}
const fields = result.params?.fields || [];
const fieldRows = fields.map(f => {
const icon = COT_FIELD_TYPE_BADGE[f.type] || "·";
const typeLabel = t(`cot.field.${f.type}`) || f.type;
const color = f.type === "reasoning" ? "#3fb950"
: f.type === "answer" ? "#f0883e"
: "#8b949e";
return `<tr>
<td style="text-align:right;color:#8b949e;">${f.idx}</td>
<td><code>${escapeHtml(f.name)}</code></td>
<td><span style="color:${color};">${icon} ${typeLabel}</span></td>
</tr>`;
}).join("");
const fieldTable = `
<table class="lean-table" style="margin-top:0.5em;">
<thead><tr>
<th>#</th>
<th data-i18n="cot.col.field">Field</th>
<th data-i18n="cot.col.type">Type</th>
</tr></thead>
<tbody>${fieldRows}</tbody>
</table>
`;
// Suggested-fix block — only when there's a meaningful reorder.
let fixBlock = "";
if (result.code === "anti_pattern") {
const suggested = result.params?.suggested_order || [];
const fixed = reorderJsonText(originalText, suggested);
if (fixed) {
fixBlock = `
<details open style="margin-top:1em;">
<summary style="cursor:pointer;color:#3fb950;">
<strong>${t("cot.suggested_fix.title") || "✓ Suggested fix"}</strong>
</summary>
<p class="recipe-desc">${t("cot.suggested_fix.desc") || ""}</p>
<pre style="background:#0d1117;padding:0.75em;border-radius:4px;overflow-x:auto;"><code>${escapeHtml(fixed)}</code></pre>
<button type="button" class="secondary" onclick="navigator.clipboard.writeText(this.previousElementSibling.textContent).then(()=>{this.textContent='${t("cot.suggested_fix.copied") || "✓ Copied"}';setTimeout(()=>{this.textContent='${t("cot.suggested_fix.copy") || "📋 Copy"}';},1500);})">${t("cot.suggested_fix.copy") || "📋 Copy"}</button>
</details>
`;
}
}
// Verdict explainer
const explainer = t(`cot.explain.${result.code}`) || "";
const explainerBlock = explainer
? `<p class="recipe-desc">${explainer}</p>`
: "";
// Source attribution footer
const attribution = `
<p class="recipe-desc subtle" style="font-size:0.82em;margin-top:1em;">
${t("cot.attribution") || ""}
<a href="https://collinwilkins.com/articles/structured-output" target="_blank" rel="noopener noreferrer">collinwilkins.com</a> ·
<a href="https://github.com/guidance-ai/jsonschemabench" target="_blank" rel="noopener noreferrer">JSONSchemaBench</a> ·
<a href="https://github.com/guidance-ai/llguidance" target="_blank" rel="noopener noreferrer">llguidance</a>
</p>
`;
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}
<span class="subtle" style="font-size:0.9em;">(${tFmt("cot.field_count", { n: result.params.field_count }) || `${result.params.field_count} fields`})</span>
</p>
${explainerBlock}
${fieldTable}
${fixBlock}
${attribution}
</div>`;
}
function runCotLint() {
const text = $("cot-input")?.value || "";
const result = lintJsonCot(text);
$("cot-output").innerHTML = renderCotResult(result, text);
$("cot-status").textContent = tFmt("cot.status.done", {
verdict: t(`cot.verdict.${result.code}`) || result.code,
});
}
const COT_EXAMPLE_GOOD = JSON.stringify({
type: "object",
properties: {
reasoning: {
type: "string",
description: "Step-by-step rationale before committing to an answer.",
},
answer: {
type: "string",
description: "Final answer, derived from the reasoning above.",
},
},
required: ["reasoning", "answer"],
}, null, 2);
const COT_EXAMPLE_BAD = JSON.stringify({
type: "object",
properties: {
final_answer: {
type: "string",
description: "The model's final answer.",
},
chain_of_thought: {
type: "string",
description: "Justification for the answer above.",
},
},
required: ["final_answer", "chain_of_thought"],
}, null, 2);
$("cot-lint-btn")?.addEventListener("click", runCotLint);
$("cot-example-good-btn")?.addEventListener("click", () => {
$("cot-input").value = COT_EXAMPLE_GOOD;
runCotLint();
});
$("cot-example-bad-btn")?.addEventListener("click", () => {
$("cot-input").value = COT_EXAMPLE_BAD;
runCotLint();
});
// ════════════════════════════════════════════════════════════════════
// 🔧 PEFT Anti-Pattern Checker (v0.8.3 anti-bullshit pack #9)
// ════════════════════════════════════════════════════════════════════
const PEFT_SEVERITY_BG = {
error: "#f85149",
warning: "#d29922",
info: "#58a6ff",
};
const PEFT_VERDICT_BG = {
errors_found: "#f85149",
warnings_only: "#d29922",
info_only: "#58a6ff",
clean: "#3fb950",
no_peft_calls: "#8b949e",
empty_input: "#8b949e",
};
let __peftInited = false;
function initPeft() {
if (__peftInited) return;
__peftInited = true;
// No-op (no async data); placeholder kept for symmetry with other modes.
}
function renderPeftFinding(f) {
const sevBg = PEFT_SEVERITY_BG[f.severity] || "#8b949e";
const sevBadge = `<span class="badge" style="background:${sevBg};">${f.severity.toUpperCase()}</span>`;
const ruleLabel = t(`peft.rule.${f.rule}.label`) || f.rule;
const lineLabel = f.line != null
? `<span class="subtle" style="font-size:0.85em;">${tFmt("peft.line", { n: f.line }) || `line ${f.line}`}</span>`
: "";
const explainer = t(`peft.rule.${f.rule}.explain`) || "";
const fixHint = t(`peft.rule.${f.rule}.fix`) || "";
// Per-rule rendering details
let detail = "";
if (f.rule === "silent_base_load") {
detail = `<p><code>${escapeHtml(f.params.checkpoint_hint)}</code> ${t("peft.detected_at_line") || "appears at line"} ${f.params.checkpoint_line}</p>
<p><strong>${t("peft.suggested_fix") || "Suggested:"}</strong> <code>${escapeHtml(f.params.fix)}</code></p>`;
} else if (f.rule === "qlora_order") {
detail = `<p>${tFmt("peft.qlora_order.detail", f.params) || `prepare_model_for_kbit_training (line ${f.params.prepare_line}) runs AFTER get_peft_model (line ${f.params.get_peft_model_line}). Reverse the order.`}</p>`;
} else if (f.rule === "target_modules_mismatch") {
detail = `
<p><strong>${t("peft.detected_arch") || "Detected arch"}:</strong> <code>${escapeHtml(f.params.detected_arch)}</code> ${t("peft.from_model_id") || "(from model id"} <code>${escapeHtml(f.params.detected_from)}</code>)</p>
<p><strong>${t("peft.your_modules") || "Your target_modules"}:</strong> <code>${escapeHtml(f.params.user_modules.join(", "))}</code></p>
<p><strong>${t("peft.expected_modules") || "Expected for this arch"}:</strong> <code>${escapeHtml(f.params.expected_modules.join(", "))}</code></p>
<p class="subtle" style="font-size:0.85em;">${tFmt("peft.match_ratio", f.params) || `${f.params.hits} of ${f.params.total} match.`}</p>
`;
} else if (f.rule === "alpha_not_2r") {
detail = `<p><code>r=${f.params.r}, lora_alpha=${f.params.lora_alpha}</code> → ${t("peft.ratio") || "ratio"} ${f.params.ratio}× (${t("peft.alpha.convention") || "convention is α=2r or α=r"})</p>`;
} else if (f.rule === "no_peft_calls") {
detail = `<p>${t("peft.no_peft_calls.detail") || "No get_peft_model / PeftModel.from_pretrained / LoraConfig calls detected. Paste a PEFT/LoRA setup snippet."}</p>`;
}
return `
<details open class="unmask-panel" style="margin: 0.5em 0;">
<summary class="unmask-panel-title">
${sevBadge} <strong>${ruleLabel}</strong> ${lineLabel}
</summary>
${explainer ? `<p>${explainer}</p>` : ""}
${detail}
${fixHint ? `<p class="recipe-desc" style="margin-top:0.5em;">${fixHint}</p>` : ""}
</details>
`;
}
function renderPeftResult(result) {
const verdict = t(`peft.verdict.${result.code}`) || result.code;
const verdictBg = PEFT_VERDICT_BG[result.code] || "#8b949e";
const verdictBadge = `<span class="badge" style="background:${verdictBg};">${verdict}</span>`;
const findings = result.findings || [];
const findingsHtml = findings.map(renderPeftFinding).join("");
const summary = result.summary
? `<p class="subtle" style="font-size:0.9em;">${tFmt("peft.summary", result.summary) || `${result.summary.total} finding(s)`}</p>`
: "";
// Source attribution
const attribution = `
<p class="recipe-desc subtle" style="font-size:0.82em;margin-top:1em;">
${t("peft.attribution") || "Refs:"}
<a href="https://github.com/huggingface/peft/issues/2115" target="_blank" rel="noopener noreferrer">peft #2115</a> ·
<a href="https://huggingface.co/docs/peft/main/en/developer_guides/troubleshooting" target="_blank" rel="noopener noreferrer">PEFT troubleshooting</a> ·
<a href="https://huggingface.co/docs/peft/main/en/package_reference/peft_model" target="_blank" rel="noopener noreferrer">get_layer_status / get_model_status</a>
</p>
`;
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}</p>
${summary}
${findingsHtml}
${attribution}
</div>`;
}
function runPeftLint() {
const text = $("peft-input")?.value || "";
const result = lintPeftCode(text);
$("peft-output").innerHTML = renderPeftResult(result);
$("peft-status").textContent = tFmt("peft.status.done", {
verdict: t(`peft.verdict.${result.code}`) || result.code,
n: result.findings?.length || 0,
});
}
const PEFT_EXAMPLE_BUG = `from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM
base = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3-8B")
config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
)
model = get_peft_model(base, config)
# resume from saved checkpoint?
model.load_state_dict("./outputs/checkpoint-1000/adapter_model.bin")
`;
const PEFT_EXAMPLE_QLORA = `from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
bnb = BitsAndBytesConfig(load_in_4bit=True)
base = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-3-8B",
quantization_config=bnb,
)
config = LoraConfig(r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"])
model = get_peft_model(base, config)
# WRONG ORDER: prepare_model_for_kbit_training must come BEFORE get_peft_model
model = prepare_model_for_kbit_training(model)
`;
const PEFT_EXAMPLE_CLEAN = `from peft import PeftModel
from transformers import AutoModelForCausalLM
base = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3-8B")
# Resume from saved adapter — correct PEFT pattern.
model = PeftModel.from_pretrained(base, "./outputs/checkpoint-1000")
`;
$("peft-lint-btn")?.addEventListener("click", runPeftLint);
$("peft-example-bug-btn")?.addEventListener("click", () => {
$("peft-input").value = PEFT_EXAMPLE_BUG;
runPeftLint();
});
$("peft-example-qlora-btn")?.addEventListener("click", () => {
$("peft-input").value = PEFT_EXAMPLE_QLORA;
runPeftLint();
});
$("peft-example-clean-btn")?.addEventListener("click", () => {
$("peft-input").value = PEFT_EXAMPLE_CLEAN;
runPeftLint();
});
// ════════════════════════════════════════════════════════════════════
// 🔁 Prompt-Cache Diff Predictor (v0.8.4 anti-bullshit pack #10)
// ════════════════════════════════════════════════════════════════════
const CACHE_VERDICT_BG = {
identical: "#3fb950",
divergent_can_cache: "#d29922",
divergent_below_min: "#f0883e",
fully_divergent: "#f85149",
empty_input: "#8b949e",
};
let __cacheInited = false;
function initCacheDiff() {
if (__cacheInited) return;
__cacheInited = true;
// No-op (no async data); placeholder kept for symmetry.
}
function fmtUsd(n) {
if (n == null || isNaN(n)) return "—";
if (n === 0) return "$0";
if (n < 0.01) return `$${n.toFixed(6)}`;
if (n < 1) return `$${n.toFixed(4)}`;
return `$${n.toFixed(2)}`;
}
function fmtPct(n) {
if (n == null || isNaN(n)) return "—";
return `${Math.round(n * 100)}%`;
}
function renderCacheProvider(p) {
const bgRow = p.reason === "below_min" ? "#21262d" : "#161b22";
const noteHtml = [];
if (p.requires_explicit && p.reason !== "below_min") {
noteHtml.push(`<span class="subtle" style="font-size:0.8em;">${t("cache.note.requires_marker") || "(requires cache_control marker)"}</span>`);
}
if (p.reason === "below_min") {
noteHtml.push(`<span class="subtle" style="font-size:0.8em;color:#f0883e;">${tFmt("cache.note.below_min", { min: p.min_cache_tokens.toLocaleString() }) || `(prefix < ${p.min_cache_tokens.toLocaleString()} tokens — provider min)`}</span>`);
}
const noteCell = noteHtml.length ? `<br>${noteHtml.join(" ")}` : "";
const ttlMin = p.cache_ttl_seconds >= 3600
? `${Math.round(p.cache_ttl_seconds / 3600)}h`
: `${Math.round(p.cache_ttl_seconds / 60)}min`;
const savingsColor = p.savings_usd > 0 ? "#3fb950" : (p.reason ? "#8b949e" : "#d29922");
const writeRow = p.cache_write_surcharge_usd && p.cache_write_surcharge_usd > 0
? `<tr style="background:${bgRow};"><td colspan="4" class="subtle" style="font-size:0.8em;padding-left:1em;">${tFmt("cache.write_surcharge", { cost: fmtUsd(p.cache_write_surcharge_usd) }) || `+ ${fmtUsd(p.cache_write_surcharge_usd)} cache-write surcharge first time (Anthropic)`}</td></tr>`
: "";
return `
<tr style="background:${bgRow};">
<td><strong>${escapeHtml(p.provider_name)}</strong>${noteCell}<br><span class="subtle" style="font-size:0.78em;">TTL ${ttlMin}</span></td>
<td style="text-align:right;">${fmtPct(p.hit_ratio)}</td>
<td style="text-align:right;">${fmtUsd(p.base_cost_usd)}${fmtUsd(p.cached_cost_usd)}</td>
<td style="text-align:right;color:${savingsColor};"><strong>${fmtUsd(p.savings_usd)}</strong> (${fmtPct(p.savings_pct ?? 0)})</td>
</tr>
${writeRow}
`;
}
function renderCacheDiffVisualization(oldText, newText, lcpChars) {
// Truncate context — show last 200 chars of common prefix, and the
// first 200 chars of each diverging suffix. Keeps UI tight.
const ctxBefore = 200;
const startCommon = Math.max(0, lcpChars - ctxBefore);
const commonTail = oldText.slice(startCommon, lcpChars);
const oldDiv = oldText.slice(lcpChars);
const newDiv = newText.slice(lcpChars);
const commonLeader = startCommon > 0 ? "…" : "";
return `
<details style="margin-top:1em;">
<summary style="cursor:pointer;"><strong>${t("cache.diff.title") || "Where the cache breaks"}</strong></summary>
<div style="background:#0d1117;padding:0.75em;border-radius:4px;font-family:monospace;font-size:0.85em;line-height:1.4;overflow-x:auto;white-space:pre-wrap;">
<span style="color:#3fb950;">${escapeHtml(commonLeader + commonTail)}</span><span style="color:#f85149;text-decoration:underline;">${escapeHtml(oldDiv.slice(0, 200))}</span><span class="subtle"> ← old</span>
<span style="color:#3fb950;">${escapeHtml(commonLeader + commonTail)}</span><span style="color:#3fb950;text-decoration:underline;">${escapeHtml(newDiv.slice(0, 200))}</span><span class="subtle"> ← new</span>
</div>
<p class="subtle" style="font-size:0.82em;">${t("cache.diff.legend") || "Green = shared prefix (cacheable). Red = first edit (everything from here is re-billed)."}</p>
</details>
`;
}
function renderCacheResult(result, oldText, newText) {
const verdict = t(`cache.verdict.${result.code}`) || result.code;
const verdictBg = CACHE_VERDICT_BG[result.code] || "#8b949e";
const verdictBadge = `<span class="badge" style="background:${verdictBg};">${verdict}</span>`;
if (result.code === "empty_input") {
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}</p>
<p class="recipe-desc">${t("cache.hint.empty") || "Paste two prompts, then Predict."}</p>
</div>`;
}
const p = result.params;
const summary = `
<p class="recipe-desc">
${tFmt("cache.summary.tokens", { common: p.tokens_common.toLocaleString(), total: p.tokens_total.toLocaleString(), pct: Math.round(p.hit_ratio * 100) })
|| `Common prefix ${p.tokens_common.toLocaleString()} / ${p.tokens_total.toLocaleString()} tokens (${Math.round(p.hit_ratio * 100)}% theoretical hit ratio).`}
</p>
<p class="recipe-desc subtle">
${tFmt("cache.summary.diff_at", { line: p.diff_point.line }) || `First difference at line ${p.diff_point.line}.`}
</p>
`;
const rows = (result.providers || []).map(renderCacheProvider).join("");
const table = rows ? `
<table class="lean-table" style="margin-top:1em;width:100%;">
<thead><tr>
<th style="text-align:left;">${t("cache.col.provider") || "Provider"}</th>
<th style="text-align:right;">${t("cache.col.hit") || "Hit"}</th>
<th style="text-align:right;">${t("cache.col.cost") || "Base → cached"}</th>
<th style="text-align:right;">${t("cache.col.savings") || "Savings"}</th>
</tr></thead>
<tbody>${rows}</tbody>
</table>
` : "";
const diffViz = result.code !== "identical"
? renderCacheDiffVisualization(oldText, newText, p.lcp_chars)
: "";
const attribution = `
<p class="recipe-desc subtle" style="font-size:0.82em;margin-top:1em;">
${t("cache.attribution") || "Refs:"}
<a href="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching" target="_blank" rel="noopener noreferrer">Anthropic prompt caching</a> ·
<a href="https://platform.openai.com/docs/guides/prompt-caching" target="_blank" rel="noopener noreferrer">OpenAI prompt caching</a> ·
<a href="https://ai.google.dev/gemini-api/docs/caching" target="_blank" rel="noopener noreferrer">Gemini context caching</a>
<br><em>${t("cache.attribution.snapshot") || "Prices snapshot 2026-01; verify against current provider docs before acting on $."}</em>
</p>
`;
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}</p>
${summary}
${table}
${diffViz}
${attribution}
</div>`;
}
function runCacheDiff() {
const oldText = $("cache-old")?.value || "";
const newText = $("cache-new")?.value || "";
const profile = $("cache-profile")?.value || "english";
const outputTokens = parseInt($("cache-output-tokens")?.value || "500", 10);
const result = diffPromptCache(oldText, newText, {
profile,
outputTokensEstimate: outputTokens,
});
$("cache-output").innerHTML = renderCacheResult(result, oldText, newText);
$("cache-status").textContent = tFmt("cache.status.done", {
verdict: t(`cache.verdict.${result.code}`) || result.code,
hit: Math.round((result.params?.hit_ratio || 0) * 100),
});
}
const CACHE_LONG_SYS = "You are a helpful, harmless, and honest assistant. " +
"Always cite your sources. ".repeat(40) +
"Always show your reasoning step by step. ".repeat(40) +
"Be concise. Format code with backticks. ".repeat(40) +
"\n\nUser tools available:\n- search\n- calculator\n- code_runner\n";
const CACHE_EXAMPLE_GOOD_OLD = CACHE_LONG_SYS + "\nUser: What is 2 + 2?";
const CACHE_EXAMPLE_GOOD_NEW = CACHE_LONG_SYS + "\nUser: What is 2 + 3?";
const CACHE_EXAMPLE_BROKEN_OLD = CACHE_LONG_SYS.replace("helpful, harmless, and honest", "helpful AND honest")
+ "\nUser: What is 2 + 2?";
const CACHE_EXAMPLE_BROKEN_NEW = CACHE_LONG_SYS + "\nUser: What is 2 + 2?";
const CACHE_EXAMPLE_BELOWMIN_OLD = "Q: name 3 colors";
const CACHE_EXAMPLE_BELOWMIN_NEW = "Q: name 4 colors";
$("cache-diff-btn")?.addEventListener("click", runCacheDiff);
$("cache-example-good-btn")?.addEventListener("click", () => {
$("cache-old").value = CACHE_EXAMPLE_GOOD_OLD;
$("cache-new").value = CACHE_EXAMPLE_GOOD_NEW;
runCacheDiff();
});
$("cache-example-broken-btn")?.addEventListener("click", () => {
$("cache-old").value = CACHE_EXAMPLE_BROKEN_OLD;
$("cache-new").value = CACHE_EXAMPLE_BROKEN_NEW;
runCacheDiff();
});
$("cache-example-belowmin-btn")?.addEventListener("click", () => {
$("cache-old").value = CACHE_EXAMPLE_BELOWMIN_OLD;
$("cache-new").value = CACHE_EXAMPLE_BELOWMIN_NEW;
runCacheDiff();
});
// ════════════════════════════════════════════════════════════════════
// 🔬 Speculative-Decode Compatibility (v0.8.5 anti-bullshit pack #11)
// ════════════════════════════════════════════════════════════════════
const SPEC_VERDICT_BG = {
compatible: "#3fb950",
compatible_with_caveats: "#3fb950",
partial_compatible: "#d29922",
type_mismatch: "#f85149",
vocab_size_mismatch: "#f85149",
incompatible: "#f85149",
fetch_failed: "#8b949e",
identical_models: "#58a6ff",
missing_input: "#8b949e",
};
let __specInited = false;
function initSpeculative() {
if (__specInited) return;
__specInited = true;
// No-op (no async preload); placeholder kept for symmetry.
}
function fmtParams(p) {
if (!p) return "—";
if (p >= 1e9) return `${(p / 1e9).toFixed(1)}B`;
if (p >= 1e6) return `${(p / 1e6).toFixed(1)}M`;
return p.toLocaleString();
}
function renderSpecResult(result) {
const verdict = t(`speculative.verdict.${result.code}`) || result.code;
const verdictBg = SPEC_VERDICT_BG[result.code] || "#8b949e";
const verdictBadge = `<span class="badge" style="background:${verdictBg};">${verdict}</span>`;
// Failure-mode short-circuits
if (result.code === "missing_input" || result.code === "identical_models") {
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}</p>
<p class="recipe-desc">${t(`speculative.hint.${result.code}`) || ""}</p>
</div>`;
}
if (result.code === "fetch_failed") {
const errs = (result.errors || []).map(e => {
const sideLabel = e.side === "target" ? (t("speculative.side.target") || "Target") : (t("speculative.side.draft") || "Draft");
const reason = t(`speculative.fetch_error.${e.error}`) || e.error;
return `<li><strong>${sideLabel}</strong>: ${reason}${e.status ? ` (HTTP ${e.status})` : ""}</li>`;
}).join("");
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}</p>
<ul>${errs}</ul>
<p class="recipe-desc subtle">${t("speculative.fetch_error.hint") || "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth."}</p>
</div>`;
}
const p = result.params;
// Mirror banner — when a gated model was fetched via an open mirror.
let mirrorBanner = "";
if (p.target_via_mirror || p.draft_via_mirror) {
const lines = [];
if (p.target_via_mirror) {
lines.push(tFmt("speculative.mirror.target_used", {
original: escapeHtml(p.targetId),
mirror: escapeHtml(p.target_via_mirror),
}) || `Target was gated; used mirror <code>${escapeHtml(p.target_via_mirror)}</code>.`);
}
if (p.draft_via_mirror) {
lines.push(tFmt("speculative.mirror.draft_used", {
original: escapeHtml(p.draftId),
mirror: escapeHtml(p.draft_via_mirror),
}) || `Draft was gated; used mirror <code>${escapeHtml(p.draft_via_mirror)}</code>.`);
}
mirrorBanner = `
<div style="margin-bottom:0.75em;padding:0.6em;background:#332b00;border-left:3px solid #d29922;border-radius:4px;font-size:0.92em;">
<strong>ℹ ${t("speculative.mirror.heading") || "Open-mirror fallback"}</strong>
${lines.map(l => `<br>${l}`).join("")}
<br><span class="subtle" style="font-size:0.85em;">${t("speculative.mirror.warn") || "Mirror tokenizers (e.g. unsloth/) are usually byte-identical to the gated original because quantization touches weights, not tokens. Verify chat-template if exact match is required."}</span>
</div>
`;
}
// Section 1 — vocab summary
const typeBadge = (label, val, bg) =>
`<span class="badge" style="background:${bg};">${label}: <code>${val ?? "—"}</code></span>`;
const typeRow = `
${typeBadge(t("speculative.target_label_short") || "target", p.target_type, p.type_match ? "#3fb950" : "#f85149")}
${typeBadge(t("speculative.draft_label_short") || "draft", p.draft_type, p.type_match ? "#3fb950" : "#f85149")}
${p.type_match ? "" : `<span class="subtle"> ← ${t("speculative.type_mismatch_note") || "tokenizer types differ; spec-dec impossible"}</span>`}
`;
const sizeRow = `
<strong>${t("speculative.vocab_size") || "Vocab size"}:</strong>
target = <code>${p.target_vocab_size.toLocaleString()}</code>,
draft = <code>${p.draft_vocab_size.toLocaleString()}</code>
${p.vocab_size_match ? "" : `<span style="color:#f85149;"> ← ${t("speculative.size_diff") || "differ — every reused id is a misalignment"}</span>`}
`;
// Sampled match
const matchPct = p.sampled_total > 0 ? Math.round(p.sampled_match_ratio * 100) : 0;
const matchColor = matchPct >= 99.9 ? "#3fb950" : matchPct >= 95 ? "#d29922" : "#f85149";
const sampleRow = `
<strong>${t("speculative.sampled") || "Token-id sample match"}:</strong>
<span style="color:${matchColor};font-weight:600;">${matchPct}%</span>
<span class="subtle">(${p.sampled_match_count.toLocaleString()} / ${p.sampled_total.toLocaleString()} tokens)</span>
${p.first_mismatch ? `<br><span class="subtle">${t("speculative.first_mismatch") || "First mismatch"}: <code>${escapeHtml(p.first_mismatch.token).slice(0, 40)}</code> → target id ${p.first_mismatch.target_id ?? "—"}, draft id ${p.first_mismatch.draft_id ?? "—"}</span>` : ""}
`;
// Special / added token diffs
const specDiffRows = (p.special_tokens_diff || []).map(d =>
`<li><code>${d.name}</code>: target=<code>${escapeHtml(String(d.target ?? "—"))}</code>, draft=<code>${escapeHtml(String(d.draft ?? "—"))}</code></li>`
).join("");
const specDiffBlock = specDiffRows
? `<details style="margin-top:0.5em;"><summary>${t("speculative.special_diff") || "Special-token differences"} (${p.special_tokens_diff.length})</summary><ul>${specDiffRows}</ul></details>`
: "";
const addedDiffPreview = (p.added_tokens_diff || []).slice(0, 12).map(d =>
`<li><span class="subtle">${d.side === "target_only" ? "target only" : "draft only"}:</span> <code>${escapeHtml(d.token).slice(0, 40)}</code></li>`
).join("");
const addedDiffBlock = addedDiffPreview
? `<details style="margin-top:0.5em;"><summary>${t("speculative.added_diff") || "Added-token differences"} (${(p.added_tokens_diff||[]).length})</summary><ul>${addedDiffPreview}${p.added_tokens_diff.length > 12 ? `<li class="subtle">${t("speculative.added_diff_more") || "+ more …"}</li>` : ""}</ul></details>`
: "";
// Section 2 — speedup band (only when compatible-ish)
let speedupBlock = "";
if (p.speedup_expected != null) {
const ratio = p.param_ratio ? `${(p.param_ratio * 100).toFixed(1)}%` : "—";
speedupBlock = `
<div style="margin-top:1em;padding:0.75em;background:#161b22;border-left:3px solid #3fb950;border-radius:4px;">
<strong>${t("speculative.speedup.title") || "Estimated speedup band"}</strong><br>
<span class="subtle" style="font-size:0.85em;">${tFmt("speculative.speedup.params", { target: fmtParams(p.target_params), draft: fmtParams(p.draft_params), ratio }) || `target ${fmtParams(p.target_params)} / draft ${fmtParams(p.draft_params)} (param ratio ${ratio})`}</span>
<div style="margin-top:0.5em;display:flex;gap:1em;flex-wrap:wrap;">
<div>${t("speculative.speedup.low") || "Low (α=0.50)"}:<br><strong style="font-size:1.2em;">${p.speedup_low}×</strong></div>
<div>${t("speculative.speedup.expected") || "Expected (α=0.70)"}:<br><strong style="font-size:1.4em;color:#3fb950;">${p.speedup_expected}×</strong></div>
<div>${t("speculative.speedup.high") || "High (α=0.85)"}:<br><strong style="font-size:1.2em;">${p.speedup_high}×</strong></div>
</div>
<p class="subtle" style="font-size:0.78em;margin-top:0.5em;">${t("speculative.speedup.disclaimer") || "α = draft acceptance rate. Real speedup depends on prompt domain, lookahead K, and engine overhead. Bands assume ideal verifier batching."}</p>
</div>
`;
} else if (p.target_params && p.draft_params && p.param_ratio >= 1) {
speedupBlock = `<p class="recipe-desc" style="color:#f85149;margin-top:1em;">${t("speculative.speedup.draft_not_smaller") || "Draft is not smaller than target — spec-dec is misuse here."}</p>`;
}
// Attribution
const attribution = `
<p class="recipe-desc subtle" style="font-size:0.82em;margin-top:1em;">
${t("speculative.attribution") || "Refs:"}
<a href="https://docs.vllm.ai/en/latest/serving/speculative_decoding.html" target="_blank" rel="noopener noreferrer">vLLM spec-dec docs</a> ·
<a href="https://docs.sglang.ai/router/router.html" target="_blank" rel="noopener noreferrer">SGLang</a> ·
<a href="https://huggingface.co/docs/transformers/main/en/llm_optims#speculative-decoding" target="_blank" rel="noopener noreferrer">transformers assistant_model</a> ·
<a href="https://arxiv.org/abs/2211.17192" target="_blank" rel="noopener noreferrer">Leviathan et al. 2022</a>
</p>
`;
return `<div class="arena-result">
<p style="font-size:1.1em;">${verdictBadge}</p>
${mirrorBanner}
<p>${typeRow}</p>
<p>${sizeRow}</p>
<p>${sampleRow}</p>
${specDiffBlock}
${addedDiffBlock}
${speedupBlock}
${attribution}
</div>`;
}
async function runSpecCheck() {
const targetId = $("spec-target-id")?.value?.trim() || "";
const draftId = $("spec-draft-id")?.value?.trim() || "";
$("spec-status").textContent = t("speculative.status.fetching") || "🔄 Fetching tokenizer.json from HF Hub for both models…";
$("spec-output").innerHTML = "";
try {
const result = await specCheckCompat(targetId, draftId);
$("spec-output").innerHTML = renderSpecResult(result);
$("spec-status").textContent = tFmt("speculative.status.done", {
verdict: t(`speculative.verdict.${result.code}`) || result.code,
});
} catch (e) {
$("spec-status").textContent = (t("speculative.status.error") || "❌ Error") + " " + (e.message || e);
}
}
$("spec-check-btn")?.addEventListener("click", runSpecCheck);
// Examples mix gated + open: gated ids (Llama) trigger the open-mirror
// fallback (unsloth/...) so the user sees both the demo result AND the
// mirror-resolution mechanism. Pure open-weight pairs (Qwen + Phi)
// stay as the "no fallback needed" path for the second example.
$("spec-example-good-btn")?.addEventListener("click", () => {
// Gated → triggers unsloth mirror fallback for both sides.
$("spec-target-id").value = "meta-llama/Llama-3.1-70B-Instruct";
$("spec-draft-id").value = "meta-llama/Llama-3.1-8B-Instruct";
runSpecCheck();
});
$("spec-example-bad-btn")?.addEventListener("click", () => {
// Open-weight cross-family → no fallback, plain incompatibility demo.
$("spec-target-id").value = "Qwen/Qwen2.5-7B-Instruct";
$("spec-draft-id").value = "microsoft/Phi-3.5-mini-instruct";
runSpecCheck();
});
// (HF autocomplete on spec-target-id / spec-draft-id is registered via
// the known-id list in hf_autocomplete.js; no extra wiring needed here.)
// ════════════════════════════════════════════════════════════════════
// 🌍 Multilingual Tokenizer Tax (v0.8.7 anti-bullshit pack #13)
// ════════════════════════════════════════════════════════════════════
let __taxInited = false;
function initTax() {
if (__taxInited) return;
__taxInited = true;
// No async preload — transformers.js + tokenizer.json are lazy-loaded
// on the first Tokenize click so users don't pay download cost just
// for opening the tab. Status string explains the wait.
}
function fmtBlocks(blocks) {
// Build a compact "60% latin · 35% cjk · 5% other" string from the
// detector output. Drops zero-counts and orders by descending size.
if (!blocks || !blocks.blocks || !blocks.total_chars) return "";
const total = blocks.total_chars;
const entries = Object.entries(blocks.blocks)
.filter(([, n]) => n > 0)
.sort((a, b) => b[1] - a[1]);
if (entries.length === 0) return "";
const parts = entries.map(([name, n]) => {
const pct = Math.round((n / total) * 100);
return `${pct}% ${name}`;
});
return parts.join(" · ");
}
function renderTaxResult(res, presetMeta) {
if (res.code === "empty_input") {
return `<div class="arena-result"><p>${t("tax.hint.empty") || "Paste some text and click Tokenize."}</p></div>`;
}
if (res.code === "all_failed") {
const errLines = res.results.map(r => {
const meta = presetMeta.find(p => p.id === r.modelId);
return `<li><code>${escapeHtml(r.modelId)}</code> ${meta ? `<span class="subtle">(${escapeHtml(meta.label)})</span>` : ""}: ${t(`tax.error.${r.error}`) || r.error}</li>`;
}).join("");
return `<div class="arena-result"><p style="color:#f85149;"><strong>❌ ${t("tax.all_failed") || "All tokenizers failed to load."}</strong></p><ul>${errLines}</ul></div>`;
}
const baselineCount = res.baseline_count;
const blocks = detectLanguageBlocks($("tax-input").value);
const ratioColor = (r) => {
if (r == null) return "#8b949e";
if (r >= 1.5) return "#f85149"; // big tax — red
if (r >= 1.15) return "#f0883e"; // moderate
if (r >= 0.85) return "#3fb950"; // about same
return "#58a6ff"; // BETTER than baseline (rare)
};
const fmtRatio = (r) => r == null ? "—" : `${r.toFixed(2)}×`;
const rows = res.results.map(r => {
const meta = presetMeta.find(p => p.id === r.modelId) || { label: r.modelId, family: "" };
if (!r.ok) {
return `<tr style="opacity:0.5;">
<td><strong>${escapeHtml(meta.label)}</strong><br><span class="subtle" style="font-size:0.8em;">${escapeHtml(meta.family)}</span></td>
<td colspan="3" style="color:#f0883e;">${t(`tax.error.${r.error}`) || r.error}</td>
</tr>`;
}
const isBaseline = r.modelId === res.baseline_id;
const baselineMark = isBaseline ? `<span class="subtle" style="font-size:0.8em;"> (baseline)</span>` : "";
return `<tr ${isBaseline ? 'style="background:#1f2933;"' : ""}>
<td><strong>${escapeHtml(meta.label)}</strong>${baselineMark}<br><span class="subtle" style="font-size:0.8em;">${escapeHtml(meta.family)}</span></td>
<td style="text-align:right;font-family:monospace;"><strong>${r.token_count.toLocaleString()}</strong></td>
<td style="text-align:right;font-family:monospace;">${r.chars_per_token != null ? r.chars_per_token.toFixed(2) : "—"}</td>
<td style="text-align:right;font-family:monospace;color:${ratioColor(r.ratio_vs_baseline)};"><strong>${fmtRatio(r.ratio_vs_baseline)}</strong></td>
</tr>`;
}).join("");
// Worst-tax explanation — find the tokenizer that scored ≥1.5× baseline.
const worst = res.results
.filter(r => r.ok && r.ratio_vs_baseline != null)
.sort((a, b) => b.ratio_vs_baseline - a.ratio_vs_baseline)[0];
let interpretation = "";
if (worst && worst.ratio_vs_baseline >= 1.3) {
const meta = presetMeta.find(p => p.id === worst.modelId);
const pct = Math.round((worst.ratio_vs_baseline - 1) * 100);
interpretation = `<p style="color:#f0883e;margin-top:0.5em;">⚠ <strong>${tFmt("tax.interp.worst", {
label: meta?.label || worst.modelId,
pct,
}) || `${meta?.label || worst.modelId} costs ${pct}% more tokens than baseline for this text.`}</strong></p>`;
} else if (worst && worst.ratio_vs_baseline <= 1.05) {
interpretation = `<p style="color:#3fb950;margin-top:0.5em;">${t("tax.interp.uniform") || "✓ All tokenizers within ±5% — text is well-handled across vendors."}</p>`;
}
return `<div class="arena-result">
<p>
<strong>${tFmt("tax.summary.input", { chars: res.chars.toLocaleString(), bytes: res.bytes.toLocaleString() }) || `Input: ${res.chars.toLocaleString()} chars, ${res.bytes.toLocaleString()} bytes`}</strong>
${blocks.dominant ? `<span class="subtle"> · ${t("tax.script_breakdown") || "scripts"}: ${fmtBlocks(blocks)}</span>` : ""}
</p>
${interpretation}
<table class="lean-table" style="margin-top:0.5em;width:100%;">
<thead><tr>
<th style="text-align:left;">${t("tax.col.tokenizer") || "Tokenizer"}</th>
<th style="text-align:right;">${t("tax.col.tokens") || "Tokens"}</th>
<th style="text-align:right;">${t("tax.col.cpt") || "Chars/tok"}</th>
<th style="text-align:right;">${t("tax.col.ratio") || "Ratio"}</th>
</tr></thead>
<tbody>${rows}</tbody>
</table>
<p class="recipe-desc subtle" style="font-size:0.82em;margin-top:1em;">
${t("tax.attribution") || "Tokenizers via"}
<a href="https://github.com/huggingface/transformers.js" target="_blank" rel="noopener noreferrer">@huggingface/transformers</a>
(browser BPE runtime).
${t("tax.attribution.privacy") || "Text is tokenized locally — never leaves the browser."}
</p>
</div>`;
}
async function runTaxTokenize() {
const text = $("tax-input")?.value || "";
if (!text) {
$("tax-status").textContent = t("tax.hint.empty") || "⚠ Paste some text first.";
return;
}
$("tax-status").textContent = t("tax.status.loading") || "⏳ Loading transformers.js + tokenizers (first run can take 5-15s)…";
$("tax-output").innerHTML = "";
const ids = TAX_PRESETS.map(p => p.id);
try {
const t0 = Date.now();
const res = await tokenizeAll(ids, text);
const ms = Date.now() - t0;
$("tax-output").innerHTML = renderTaxResult(res, TAX_PRESETS);
const okN = res.results.filter(r => r.ok).length;
$("tax-status").textContent = tFmt("tax.status.done", {
n: okN, total: ids.length, ms,
}) || `✅ ${okN}/${ids.length} tokenizers ran in ${ms}ms`;
} catch (e) {
$("tax-status").textContent = `❌ ${e.message || e}`;
}
}
$("tax-tokenize-btn")?.addEventListener("click", runTaxTokenize);
$("tax-sample-en-btn")?.addEventListener("click", () => {
$("tax-input").value = TAX_SAMPLES.english;
runTaxTokenize();
});
$("tax-sample-zh-btn")?.addEventListener("click", () => {
$("tax-input").value = TAX_SAMPLES.chinese;
runTaxTokenize();
});
$("tax-sample-ar-btn")?.addEventListener("click", () => {
$("tax-input").value = TAX_SAMPLES.arabic;
runTaxTokenize();
});
$("tax-sample-mixed-btn")?.addEventListener("click", () => {
$("tax-input").value = TAX_SAMPLES.mixed;
runTaxTokenize();
});
$("tax-sample-code-btn")?.addEventListener("click", () => {
$("tax-input").value = TAX_SAMPLES.code;
runTaxTokenize();
});
// ════════════════════════════════════════════════════════════════════
// LongScore mode (v0.8.8 anti-bullshit pack #14)
// ════════════════════════════════════════════════════════════════════
let __longscoreInited = false;
function initLongscore() {
if (__longscoreInited) return;
__longscoreInited = true;
// Eager-load KB so the first lookup is instant (KB is ~70KB, no real cost)
loadLongscoreKB().catch(e => {
console.warn("longscore_kb preload failed", e);
});
}
function lsFmtPct(x, sign) {
if (x == null) return "—";
const v = (x * 100);
return `${sign && v >= 0 ? "+" : ""}${v.toFixed(1)}%`;
}
function lcColor(avg) {
if (avg == null) return "#8b949e";
if (avg >= -0.02) return "#3fb950"; // green: no degradation
if (avg >= -0.10) return "#a5d36a"; // light green
if (avg >= -0.20) return "#f0883e"; // orange
if (avg >= -0.30) return "#f85149"; // red
return "#a01b1b"; // dark red: extreme
}
function renderLongscoreResult(res) {
if (res.code === "miss") {
return `<div class="arena-result">
<p style="color:#f0883e;"><strong>${t("longscore.miss.title") || "Model not found in KB"}</strong></p>
<p>${tFmt("longscore.miss.body", { id: res.normalized_id, n: res.n_kb_total }) || `Looked up <code>${res.normalized_id}</code>. KB has ${res.n_kb_total} models. Try a canonical HF id (e.g. <code>Qwen2.5-72B-Instruct</code>, <code>Llama-3.1-70B-Instruct</code>, <code>Jamba-1.5-Mini</code>).`}</p>
<p class="subtle" style="font-size:0.85em;">${t("longscore.miss.suggest") || "Check coverage at"} <a href="https://github.com/NVIDIA/RULER" target="_blank">RULER</a> · <a href="https://github.com/princeton-nlp/HELMET" target="_blank">HELMET</a>.</p>
</div>`;
}
const verdictMap = {
no_degradation: { color: "#3fb950", label: t("longscore.verdict.no_degradation") || "✅ No degradation past short context" },
mild: { color: "#a5d36a", label: t("longscore.verdict.mild") || "🟢 Mild degradation (<10%)" },
moderate: { color: "#f0883e", label: t("longscore.verdict.moderate") || "🟠 Moderate degradation (10-20%)" },
severe: { color: "#f85149", label: t("longscore.verdict.severe") || "🔴 Severe degradation (20-30%)" },
extreme: { color: "#a01b1b", label: t("longscore.verdict.extreme") || "🚨 Extreme degradation (>30%)" },
};
let html = `<div class="arena-result">`;
html += `<p><strong>${escapeHtml(res.display_name)}</strong>`;
if (res.params_b) html += ` <span class="subtle">· ${res.params_b}B params</span>`;
if (res.recipe_class) html += ` <span class="subtle">· ${escapeHtml(res.recipe_class)}</span>`;
if (res.native_context_k) html += ` <span class="subtle">· native ctx ${res.native_context_k}K</span>`;
html += `</p>`;
// RULER per-length + LongScore
if (res.ruler_long_score) {
const ls = res.ruler_long_score;
const v = verdictMap[res.verdict] || { color: "#8b949e", label: res.verdict };
html += `<p style="margin-top:0.8em;font-size:1.1em;">
<strong>${t("longscore.score_label") || "LongScore"}:</strong>
<span style="color:${lcColor(ls.avg_lc)};font-family:monospace;font-size:1.2em;font-weight:bold;">${lsFmtPct(ls.avg_lc, true)}</span>
<span class="subtle">· Base = ${ls.base.toFixed(1)}% (mean of 4K, 8K)</span>
</p>`;
html += `<p style="color:${v.color};font-weight:bold;">${v.label}</p>`;
// Per-length bars
html += `<table class="lean-table" style="margin-top:0.8em;width:100%;">
<thead><tr>
<th style="text-align:left;">${t("longscore.col.ctx") || "Context"}</th>
<th style="text-align:right;">${t("longscore.col.score") || "Score"}</th>
<th style="text-align:right;">${t("longscore.col.lc") || "LC"}</th>
</tr></thead><tbody>`;
const ctxKeys = ["4k", "8k", "16k", "32k", "64k", "128k"];
for (const k of ctxKeys) {
const score = res.ruler_per_ctx?.[k];
if (score == null) continue;
const isShort = k === "4k" || k === "8k";
const lc = ls.per_length_lc?.[k];
html += `<tr ${isShort ? 'style="opacity:0.7;"' : ""}>
<td><strong>${k.toUpperCase()}</strong>${isShort ? ` <span class="subtle" style="font-size:0.8em;">(base)</span>` : ""}</td>
<td style="text-align:right;font-family:monospace;">${score.toFixed(1)}%</td>
<td style="text-align:right;font-family:monospace;color:${lcColor(lc)};">${lc != null ? lsFmtPct(lc, true) : "—"}</td>
</tr>`;
}
html += `</tbody></table>`;
} else {
// Helmet-only or partial
html += `<p style="margin-top:0.8em;color:#f0883e;">${t("longscore.no_ruler") || "⚠ No per-length data — LongScore not computable. Showing HELMET aggregate at 128K instead."}</p>`;
}
// HELMET breakdown if available
if (res.helmet) {
html += `<details style="margin-top:1em;" open>
<summary><strong>${t("longscore.helmet_label") || "HELMET 7-task breakdown"} (at 128K)</strong></summary>
<table class="lean-table" style="margin-top:0.5em;width:100%;">
<thead><tr>
<th style="text-align:left;">${t("longscore.col.task") || "Task"}</th>
<th style="text-align:right;">${t("longscore.col.score") || "Score"}</th>
</tr></thead><tbody>`;
if (res.helmet.overall != null) {
html += `<tr style="background:#1f2933;"><td><strong>Overall</strong></td><td style="text-align:right;font-family:monospace;"><strong>${res.helmet.overall.toFixed(1)}</strong></td></tr>`;
}
if (res.helmet.categories) {
for (const [task, score] of Object.entries(res.helmet.categories)) {
html += `<tr><td>${escapeHtml(task)}</td><td style="text-align:right;font-family:monospace;">${score != null ? score.toFixed(1) : "—"}</td></tr>`;
}
}
html += `</tbody></table></details>`;
}
html += `<p class="recipe-desc subtle" style="font-size:0.82em;margin-top:1em;">
${t("longscore.source_note") || "Data source"}: ${escapeHtml(res.source)} ·
<a href="https://arxiv.org/abs/2505.19293" target="_blank">LongScore metric</a>
</p>`;
html += `</div>`;
return html;
}
async function runLongscoreLookup() {
const id = $("longscore-input")?.value?.trim();
if (!id) {
$("longscore-status").textContent = t("longscore.hint.empty") || "⚠ Paste a model id first.";
return;
}
$("longscore-status").textContent = t("longscore.status.lookup") || "⏳ Looking up…";
$("longscore-output").innerHTML = "";
try {
const res = await longscoreLookup(id);
$("longscore-output").innerHTML = renderLongscoreResult(res);
if (res.code === "miss") {
$("longscore-status").textContent = t("longscore.status.miss") || "ℹ Model not in KB";
} else if (res.code === "ruler_hit") {
$("longscore-status").textContent = t("longscore.status.ruler_hit") || "✅ RULER per-length data found";
} else {
$("longscore-status").textContent = t("longscore.status.helmet_only") || "ℹ HELMET aggregate only (no per-length data)";
}
} catch (e) {
$("longscore-status").textContent = `❌ ${e.message || e}`;
console.error(e);
}
}
$("longscore-lookup-btn")?.addEventListener("click", runLongscoreLookup);
$("longscore-input")?.addEventListener("keydown", e => {
if (e.key === "Enter") {
e.preventDefault();
runLongscoreLookup();
}
});
$("longscore-example-good-btn")?.addEventListener("click", () => {
$("longscore-input").value = "Jamba-1.5-Large";
runLongscoreLookup();
});
$("longscore-example-mid-btn")?.addEventListener("click", () => {
$("longscore-input").value = "Llama-3.1-70B-Instruct";
runLongscoreLookup();
});
$("longscore-example-bad-btn")?.addEventListener("click", () => {
$("longscore-input").value = "dbrx";
runLongscoreLookup();
});
// ════════════════════════════════════════════════════════════════════
// Bootstrap
// ════════════════════════════════════════════════════════════════════
initI18n();
loadPyodideAndTaf().catch(err => {
setStatus(`❌ Failed to initialise: ${err.message || err}`);
console.error(err);
});