config.rope_theta. Higher = more long-range capacity. Typical: 10000 early models, 500000 Llama-3, 1000000 Qwen2.5.",
T_train: "Max context the model was trained on. From max_position_embeddings. The model has never seen positions beyond this; extrapolating much further usually fails.",
T_eval: "Your target inference context length. The key knob. The whole question is: will the model behave well at this length?",
n_attention_heads: "Number of query heads. From num_attention_heads.",
n_kv_heads: "Number of K/V heads. If < n_attention_heads → model uses GQA (Grouped Query Attention). Smaller = more memory-efficient KV cache but pushes γ toward Hagedorn boundary.",
d_head: "Per-head dimension. Typically hidden_size / n_attention_heads. Common: 64, 80, 128.",
n_layers: "Number of transformer layers. From num_hidden_layers.",
n_params: "Total parameter count. Use scientific notation: 8e9 for 8B. Threshold ~400M is the induction-head emergence boundary (sign-flip in Δγ).",
has_SWA: "Sliding Window Attention. true for Mistral, gemma-2, phi-3. SWA lowers γ_decomposition by ~0.21.",
N_params: "Same as n_params. Total parameter count, scientific notation (e.g. 8e9).",
D_tokens: "Number of training tokens. Leave empty to use Chinchilla 20:1 default (D = 20·N).",
gpu: "GPU model from the catalog. Options: H100 SXM, H100 PCIe, H200, B200, A100 80GB, A100 40GB, L40S, MI300X, RTX 4090, RTX 5090, RTX 5060Ti.",
n_gpus: "Number of GPUs in your training/serving cluster.",
mfu: "Model FLOPs Utilization. Realistic fraction of peak FLOPs achieved. Typical: 0.4-0.5 for well-tuned. Default 0.45.",
api_model: "Frontier API to compare against. Options: GPT-4o, GPT-4o-mini, Claude-Opus-4, Claude-Sonnet-4, Claude-Haiku-4, Gemini-1.5-Pro, DeepSeek-V3, Llama-3.3-70B (Together).",
monthly_tokens_M: "Expected monthly token volume in millions. e.g. 10 = 10 million tokens/month.",
USD_budget: "Your training budget in US dollars (no symbol). e.g. 5000 for $5K.",
bytes_per_weight: "Memory per parameter. BF16/FP16 = 2, INT8 = 1, INT4 = 0.5.",
target_tokens_per_day: "How many tokens/day you need to serve. e.g. 10000000 = 10M tokens/day.",
concurrent_users: "Simultaneous concurrent requests. Affects KV cache memory needed.",
};
function getRecipeDefaults(recipeId) {
const D = {
"X-1": { N_params: "8e9", D_tokens: "", gpu: "H100 SXM", n_gpus: 8, mfu: 0.45,
api_model: "GPT-4o", monthly_tokens_M: 10.0 },
"X-2": { theta: 500000, T_train: 8192, T_eval: 32000,
n_attention_heads: 32, n_kv_heads: 8, d_head: 128,
n_layers: 32, n_params: "8e9", has_SWA: false },
"X-3": { USD_budget: 5000, gpu: "H100 SXM", mfu: 0.45, n_gpus: 1 },
"X-5": { N_params: "8e9", T_eval: 4096, n_layers: 32, n_kv_heads: 8, d_head: 128,
bytes_per_weight: 2.0, target_tokens_per_day: 10000000, concurrent_users: 1 },
"X-19": { theta: 500000, T_train: 8192, T_eval: 8192,
n_attention_heads: 32, n_kv_heads: 8, d_head: 128,
n_layers: 32, n_params: "8e9", has_SWA: false },
};
return D[recipeId] || {};
}
// ════════════════════════════════════════════════════════════════════
// Preset autofill (works in recipe mode)
// ════════════════════════════════════════════════════════════════════
$("preset").addEventListener("change", (e) => {
if (!e.target.value) return;
const modelId = e.target.value;
state.lastModelId = modelId; // remember for filename/hash
// Mirror behavior with profile-preset: also fill HF id input if present.
if ($("hf-id")) {
$("hf-id").value = modelId;
if ($("hf-status")) $("hf-status").textContent = tFmt("profile.preset_loaded", { id: modelId });
}
const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(modelId)})`);
const preset = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy;
if (!preset || Object.keys(preset).length === 0) return;
fillRecipeForm(preset);
});
function fillRecipeForm(p) {
// Fill any matching field in dynamic form
Object.entries(p).forEach(([k, v]) => {
const map = {
theta: "theta", T_train: "T_train",
n_attention_heads: "n_attention_heads", n_kv_heads: "n_kv_heads",
d_head: "d_head", n_layers: "n_layers", n_params: "n_params",
has_SWA: "has_SWA",
};
const formId = "param_" + (map[k] || k);
const el = $(formId);
if (el) el.value = (typeof v === "number" && (k === "n_params" || v > 1e6))
? v.toExponential(2) : String(v);
// Also fill N_params for cost recipes
if (k === "n_params") {
const np = $("param_N_params");
if (np) np.value = (typeof v === "number" ? v.toExponential(2) : String(v));
}
});
}
// ════════════════════════════════════════════════════════════════════
// HF Hub fetch (any model)
// ════════════════════════════════════════════════════════════════════
// Build the same unsloth mirror candidates used in spec-decode. Lets us
// fetch config.json for gated families (Llama / Mistral / Gemma) without
// requiring HF auth — the unsloth redistributions are public and ship the
// original config.json verbatim (they only quantize weights, not metadata).
function _hfMirrorCandidates(modelId) {
const last = modelId.split("/").slice(-1)[0];
if (!last) return [];
const out = [
`unsloth/${last}`,
last.startsWith("Meta-") ? null : `unsloth/Meta-${last}`,
`unsloth/${last}-bnb-4bit`,
last.startsWith("Meta-") ? null : `unsloth/Meta-${last}-bnb-4bit`,
].filter(c => c && c !== modelId);
// Dedupe in case last starts with Meta- already.
return [...new Set(out)];
}
async function _tryConfigUrl(modelId) {
// /resolve/main/ rather than /raw/main/ — same lesson as spec-decode:
// /resolve follows LFS for large files (irrelevant for config.json which
// is always small, but consistent & future-proof). CORS is granted on both.
const url = `https://huggingface.co/${modelId}/resolve/main/config.json`;
const resp = await fetch(url);
if (!resp.ok) return { ok: false, status: resp.status };
try {
const j = await resp.json();
return { ok: true, data: j };
} catch (e) {
return { ok: false, error: "parse_failed" };
}
}
async function fetchHfConfig(modelId) {
// 1. Try the user-pasted id directly.
let r = await _tryConfigUrl(modelId);
if (r.ok) return r.data;
// 2. On 401/403, try open-mirror fallback (unsloth/...). On other
// errors (404/network/parse), surface as before — mirror won't help.
if (r.status === 401 || r.status === 403) {
for (const cand of _hfMirrorCandidates(modelId)) {
const m = await _tryConfigUrl(cand);
if (m.ok) {
// Stamp the mirror id so callers can surface a "fetched via mirror"
// hint if they want; backwards-compatible with code that ignores it.
m.data.__via_mirror = cand;
m.data.__mirror_of = modelId;
return m.data;
}
}
const err = new Error(`🔒 ${modelId} is gated — accept license at https://huggingface.co/${modelId}`);
err.code = "gated";
err.modelId = modelId;
throw err;
}
throw new Error(`HTTP ${r.status} — config.json not found at https://huggingface.co/${modelId}/resolve/main/config.json`);
}
$("hf-fetch-btn").addEventListener("click", async () => {
const modelId = $("hf-id").value.trim();
if (!modelId) {
$("hf-status").textContent = "⚠ Enter a model id like 'Qwen/Qwen2.5-32B-Instruct'";
return;
}
$("hf-status").textContent = `⏳ Fetching config.json from HF Hub for ${modelId}...`;
$("hf-fetch-btn").disabled = true;
state.lastModelId = modelId; // remember for filename/hash
try {
const cfg = await fetchHfConfig(modelId);
const preset = configToPreset(cfg, modelId);
fillRecipeForm(preset);
$("hf-status").innerHTML = `✅ Config loaded for ${modelId} (family: ${preset._family}). Verify values, click Analyze.`;
} catch (err) {
$("hf-status").textContent = `❌ ${err.message}`;
} finally {
$("hf-fetch-btn").disabled = false;
}
});
// ════════════════════════════════════════════════════════════════════
// 🪟 Unmask mode (v0.7.0 anti-bullshit pack #1)
// ════════════════════════════════════════════════════════════════════
// Tiny string-template helper: t(key) with {placeholder} substitution.
// Falls back to the raw key when the i18n entry is missing so dev sees the gap.
function tFmt(key, params = {}) {
let s = t(key) || key;
for (const [k, v] of Object.entries(params)) {
const fmtVal = v === null || v === undefined ? "—"
: (typeof v === "number" ? v.toLocaleString() : String(v));
s = s.replace(new RegExp(`\\{${k}\\}`, "g"), fmtVal);
}
return s;
}
const VERDICT_COLOR = {
honest: "#3fb950",
inflated: "#f1c40f",
severely_inflated: "#f85149",
yarn_extended: "#f1c40f",
unknown: "#8b949e",
};
function renderUnmaskCard(result, modelId = "") {
const color = VERDICT_COLOR[result.verdict] || VERDICT_COLOR.unknown;
const ratioPct = (result.ratio * 100).toFixed(1);
const f = result.flags;
const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString();
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
const verdictLabel = t(`unmask.verdict.${result.verdict}`) || result.verdict;
const labelDeclared = t("unmask.label.declared") || "Declared context";
const labelEffective = t("unmask.label.effective") || "Effective (estimate)";
const labelRatio = t("unmask.label.ratio") || "Ratio";
const sectionFlags = t("unmask.section.flags") || "Architecture flags";
const sectionWarn = t("unmask.section.warnings")|| "Warnings";
const sectionReco = t("unmask.section.reco") || "Recommendation";
// Architecture flags row labels
const flagSwa = t("unmask.flag.swa") || "SWA";
const flagRope = t("unmask.flag.rope") || "RoPE scaling";
const flagGqa = t("unmask.flag.gqa") || "GQA";
const flagLayers = t("unmask.flag.layers") || "Layers";
const flagDhead = t("unmask.flag.dhead") || "d_head";
const flagTheta = t("unmask.flag.theta") || "RoPE θ";
const flagYes = t("unmask.flag.yes") || "yes";
const flagNo = t("unmask.flag.no") || "no";
const swaText = f.hasSWA
? `${flagYes} (window = ${fmtN(f.swaWindow)})`
: flagNo;
const ropeText = f.hasYaRN
? `${f.ropeScalingType} (factor = ${f.yarnFactor}, original = ${fmtN(f.yarnOriginal)})`
: flagNo;
const gqaText = f.hasGQA
? `${flagYes} (${f.n_kv_heads} kv / ${f.n_attn_heads} attn heads)`
: (t("unmask.flag.full_mha") || "no (full MHA, {n} heads)").replace("{n}", f.n_attn_heads ?? "?");
const warningsHtml = result.warnings.length
? `${sectionWarn}
- ${result.warnings.map(w =>
`
- ${tFmt("unmask.warn." + w.code, w.params)} `).join("")}
${sectionReco}
${tFmt("unmask.reco." + result.recoCode, result.recoParams)}
${escapeHtml(modelId)}${sectionFlags}
- ${flagSwa}: ${swaText}
- ${flagRope}: ${ropeText}
- ${flagGqa}: ${gqaText}
- ${flagLayers}: ${fmtN(f.n_layers)} · ${flagDhead}: ${fmtN(f.d_head)} · ${flagTheta}: ${fmtN(f.rope_theta)}
${sectionWarn}
- ${result.warnings.map(w => `
- ${tFmt("template.warn." + w.code, w.params)} `).join("")}
${sectionCmd}
${escapeHtml(lmEvalCmd)}
${escapeHtml(vllmCmd)}
${escapeHtml(transformersCmd)}
${sectionRaw}
${escapeHtml(result.rawTemplate)}
${escapeHtml(modelId)}${escapeHtml(r.model)}${noTies}
`; } else { tiesHtml = `| ${t("arena.col.tie_pair") || "Pair"} | ${t("arena.col.tie_diff") || "Elo gap"} | ${t("arena.col.tie_overlap") || "CI overlap"} |
|---|---|---|
#${tieEntry.rank_a} ${escapeHtml(tieEntry.model_a)} vs #${tieEntry.rank_b} ${escapeHtml(tieEntry.model_b)} |
${fmtN(Math.round(tieEntry.elo_diff * 10) / 10)} Elo | ${fmtN(Math.round(tieEntry.overlap_elo * 10) / 10)} Elo |
- ${t("arena.summary.votes") || "Total votes"}: ${fmtN(s.total_votes)}
- ${t("arena.summary.models") || "Models"}: ${fmtN(s.n_models)}
- ${t("arena.summary.ties") || "Statistical ties"}: ${fmtN(s.n_ties)}
- ${t("arena.summary.bootstrap") || "Bootstrap iters"}: ${fmtN(s.bootstrap_iters)}
- ${t("arena.summary.ci_level") || "CI level"}: ${(s.ci_level * 100).toFixed(0)}%
${titleRanked}
| ${colRank} | ${colModel} | ${colElo} | ${colCi} | ${colSpread} | ${colMatches} | ${colWins} |
|---|
${titleTies} (${result.ties.length})
${tiesHtml}${titleSummary}
${summaryHtml}${t("contam.no_entries") || "(none in this category)"}
`; let body = ""; for (const r of group) { body += `| ${colBench} | ${colReleased} | ${colGap} | ${colPrior} | ${colCorpora} | ${colCategory} |
|---|
${titleHigh} (${high.length})
${adviceHigh}
${tableFor(high)}${titleMed} (${medium.length})
${adviceMedium}
${tableFor(medium)}${titleLow} (${low.length})
${adviceLow}
${tableFor(low)}${recoText}
`; } else { recoHtml = `${t("quant.reco.no_action") || "No action needed — quantization is safe for this architecture."}
`; } return `${escapeHtml(modelId)} + ${escapeHtml(result.scheme_label)}${t("quant.section.breakdown") || "Breakdown"}
- ${t("quant.field.scheme") || "Scheme"}: ${escapeHtml(result.scheme_label)} (${result.scheme_bits}-bit, ${result.scheme_calibrated ? (t("quant.field.calibrated") || "calibrated") : (t("quant.field.uncalibrated") || "uncalibrated")})
- ${t("quant.field.base_penalty") || "Base penalty"}: ${result.base_penalty.toFixed(3)}
- ${t("quant.field.arch_mult_full") || "Architecture multiplier"}: ×${result.arch_multiplier} (d_head, GQA, SWA, params)
- ${t("quant.field.gamma_shift") || "Predicted γ shift"}: +${result.gamma_shift.toFixed(3)}
- ${t("quant.field.ppl_band") || "ΔPPL band (est.)"}: ${result.delta_ppl.low.toFixed(2)} – ${result.delta_ppl.high.toFixed(2)}
- ${t("quant.field.params") || "Parameters"}: ${fmtN(result.n_params)}
${t("quant.section.reco") || "Recommendation"}
${recoHtml}${t("quant.section.compare") || "All schemes (sorted by safety)"}
| ${t("quant.col.scheme") || "Scheme"} | ${t("quant.col.bits") || "Bits"} | ${t("quant.col.gamma_shift") || "γ shift"} | ${t("quant.col.ppl_band") || "ΔPPL band"} | ${t("quant.col.regime") || "Regime"} |
|---|
${t("drift.dominant_cause") || "Dominant cause"}: ${causeText}
`; } const recoText = t(`drift.reco.${result.verdict}`) || ""; return `${t("drift.section.setups") || "Setups"}
| ${t("drift.setup_a") || "Setup A"} | ${t("drift.setup_b") || "Setup B"} | |
|---|---|---|
| ${t("drift.score") || "Score"} | ${a.score?.toFixed(2)} | ${b.score?.toFixed(2)} |
| ${t("drift.framework") || "Framework"} | ${escapeHtml(fwLabel(a.framework))} | ${escapeHtml(fwLabel(b.framework))} |
| ${t("drift.dtype") || "Dtype"} | ${escapeHtml(dtLabel(a.dtype))} | ${escapeHtml(dtLabel(b.dtype))} |
| ${t("drift.batch") || "Batch"} | ${a.batch} | ${b.batch} |
| ${t("drift.template") || "Chat-template"} | ${escapeHtml(t("drift.template." + a.chat_template) || a.chat_template)} | ${escapeHtml(t("drift.template." + b.chat_template) || b.chat_template)} |
${t("drift.section.breakdown") || "Drift contributors (numerical band)"}
- ${t("drift.contrib.dtype") || "Dtype mismatch"}: ${result.breakdown.dtype.toFixed(2)} pts
- ${t("drift.contrib.framework") || "Framework"}: ${result.breakdown.framework.toFixed(2)} pts
- ${t("drift.contrib.batch") || "Batch difference"}: ${result.breakdown.batch.toFixed(2)} pts ${result.breakdown.template_mismatch !== null ? `
- ${t("drift.contrib.template") || "Chat-template MISMATCH"}: ~${result.breakdown.template_mismatch.toFixed(0)} pts (dominant) ` : ""}
${t("drift.section.verdict") || "Verdict & recommendation"}
${causeHtml} ${recoText ? `${recoText}
` : ""}${cfg.__via_mirror})`;
} else {
$("niah-status").textContent = tFmt("niah.status.fetched", { modelId });
}
return cfg;
} catch (err) {
if (err.code === "gated") {
$("niah-status").innerHTML = `🔒 ${err.modelId} ${t("hf_auto.gated_msg") || "is gated. Accept the license here:"} huggingface.co/${err.modelId}`;
} else {
$("niah-status").textContent = `❌ ${err.message}`;
}
return null;
} finally {
$("niah-fetch-btn").disabled = false;
}
}
function renderNIAHCard(result, modelId, calib = null) {
const escapeHtml = (s) => String(s).replace(/[&<>"']/g, c =>
({"&":"&","<":"<",">":">",'"':""","'":"'"}[c]));
const fmtN = (x) => x === null || x === undefined ? "—" : Number(x).toLocaleString();
const color = NIAH_VERDICT_COLOR[result.verdict] || "#8b949e";
const verdictLabel = t(`niah.verdict.${result.verdict}`) || result.verdict;
const reco = t(`niah.reco.${result.verdict}`) || "";
const safeText = result.safe_context
? tFmt("niah.safe_context", { ctx: result.safe_context })
: (t("niah.safe_context_none") || "No safe context found below your target — model fails reasoning even at small contexts.");
// RULER calibration block — appears only when KB lookup hits.
// Shows measured RULER aggregate, derived NIAH/reasoning, and the
// delta vs the heuristic so users see when the predictor was off.
let calibBlock = "";
if (calib) {
const fmtPct = (v) => `${(v * 100).toFixed(0)}%`;
const fmtDelta = (d) => {
if (d == null) return "—";
const pp = Math.round(d * 100);
const sign = pp > 0 ? "+" : "";
const col = Math.abs(pp) >= 10 ? "#f0883e" : Math.abs(pp) >= 5 ? "#d29922" : "#8b949e";
return `${sign}${pp} pp`;
};
const extrapNote = calib.extrapolated
? ` ⚠ ${t("niah.calib.extrapolated") || "extrapolated outside RULER's measured range"}`
: "";
calibBlock = `
📊 ${t("niah.calib.heading") || "RULER-calibrated (NVIDIA published data)"}
${tFmt("niah.calib.matched", {
alias: escapeHtml(calib.matched_alias),
canonical: escapeHtml(calib.canonical_id),
}) || `Matched ${escapeHtml(calib.matched_alias)} → KB row ${escapeHtml(calib.canonical_id)}.`}
${t("niah.calib.aggregate") || "RULER aggregate"} @ ${fmtN(result.T_eval)}:
${calib.ruler_avg_pct}%
(${t("niah.calib.interp") || "interpolated between"} ${calib.interp_anchor})${extrapNote}
| ${t("niah.calib.col.heuristic") || "Heuristic"} | ${t("niah.calib.col.calibrated") || "RULER-calibrated"} | ${t("niah.calib.col.delta") || "Δ"} | |
|---|---|---|---|
| NIAH | ${fmtPct(result.niah_rate)} | ${fmtPct(calib.niah_calibrated)} | ${fmtDelta(calib.delta_niah)} |
| ${t("niah.label.reasoning") || "Reasoning"} | ${fmtPct(result.reasoning_rate)} | ${fmtPct(calib.reasoning_calibrated)} | ${fmtDelta(calib.delta_reasoning)} |
${t("niah.calib.factors") || "Per-task factors from RULER paper Appendix Tables 13-16:"} retrieval = ${calib.retrieval_factor}× aggregate, reasoning = ${calib.reasoning_factor}× aggregate (${t("niah.calib.factors_caveat") || "honest range: retrieval 0.95-1.10×, reasoning 0.60-0.85×"}).
${t("niah.calib.claimed_vs_effective") || "Paper-reported"}: ${t("niah.calib.claimed") || "claimed"} ${fmtN(calib.claimed_context)} / ${t("niah.calib.effective") || "effective"} ${fmtN(calib.effective_context)}. ${t("niah.calib.source") || "Source"}: RULER paper (Hsieh et al., COLM 2024)
💡 ${t("niah.calib.miss") || "RULER calibration unavailable for this model — using architectural heuristic only. Add to data/ruler_kb.json if you have measured numbers."}
`; } return `${escapeHtml(modelId)} @ ${fmtN(result.T_eval)} tokens${t("niah.section.breakdown") || "Architecture breakdown"}
- γ_Padé @ T_eval: ${result.gamma_pade}
- ${t("niah.field.dhorizon") || "d_horizon (effective)"}: ${fmtN(result.d_horizon)} tokens
- ${t("niah.field.ratio") || "T_eval / d_horizon"}: ${result.horizon_ratio}×
- ${t("niah.field.arch_pressure") || "Arch pressure (small d_head + GQA + SWA)"}: ×${result.arch_pressure}
- ${t("niah.field.theta") || "RoPE θ"}: ${fmtN(result.theta)}
- ${t("niah.field.t_train") || "T_train (claimed)"}: ${fmtN(result.T_train)}
${t("niah.section.reco") || "Recommendation"}
${reco}
${t("niah.label.safe_ctx") || "Safe reasoning context"}: ${safeText}
${t("niah.section.sweep") || "Pass rate sweep across context lengths"}
| ${t("niah.col.context") || "T_eval"} | ${t("niah.col.niah") || "NIAH"} | ${t("niah.col.reasoning") || "Reasoning"} | ${t("niah.col.gap") || "Gap"} | ${t("niah.col.verdict") || "Verdict"} |
|---|
${escapeHtml(err.message)}
Try the Recipe mode for full manual control.`; } finally { $("ask-btn").disabled = false; } }); $("example-btn").addEventListener("click", () => { const ex = EXAMPLES[Math.floor(Math.random() * EXAMPLES.length)]; $("question").value = ex; }); async function routeQuestion(question) { const engine = await loadWebLLM(); const recipesDesc = state.recipes.map(r => ` ${r.id}: ${r.name} — ${r.description}\n params: ${r.params.join(", ")}` ).join("\n"); const systemPrompt = `You are a routing function. Given a user's free-form question about transformer LLM viability, you MUST output a single JSON object with two fields: - recipe_id: one of [${state.recipes.map(r => r.id).join(", ")}] - params: an object with parameter values inferred from the question Available recipes: ${recipesDesc} Common model facts you may use: Meta-Llama-3-8B: theta=500000, T_train=8192, n_attention_heads=32, n_kv_heads=8, d_head=128, n_layers=32, n_params=8e9 Mistral-7B-v0.1: theta=10000, T_train=8192, n_attention_heads=32, n_kv_heads=8, d_head=128, n_layers=32, n_params=7e9, has_SWA=true Qwen2.5-7B: theta=1000000, T_train=32768, n_attention_heads=28, n_kv_heads=4, d_head=128, n_layers=28, n_params=7.6e9 Llama-3.3-70B-Instruct: theta=500000, T_train=131072, n_attention_heads=64, n_kv_heads=8, d_head=128, n_layers=80, n_params=70e9 Respond with ONLY the JSON object. No prose, no markdown fences, no explanation.`; const reply = await engine.chat.completions.create({ messages: [ { role: "system", content: systemPrompt }, { role: "user", content: question }, ], max_tokens: 400, temperature: 0.0, response_format: { type: "json_object" }, }); const raw = reply.choices[0].message.content.trim(); let parsed; try { parsed = JSON.parse(raw); } catch (e) { // Try extracting JSON from markdown fences const m = raw.match(/\{[\s\S]*\}/); if (!m) throw new Error(`LLM returned non-JSON: ${raw.slice(0, 200)}`); parsed = JSON.parse(m[0]); } if (!parsed.recipe_id || !state.recipesById[parsed.recipe_id]) { throw new Error(`Unknown recipe: ${parsed.recipe_id}`); } return parsed; } // ════════════════════════════════════════════════════════════════════ // Run + display + synthesize // ════════════════════════════════════════════════════════════════════ async function runAndDisplay(recipeId, params, originalQuestion=null) { setStatus("🧮 Computing TAF chain..."); state.pyodide.globals.set("__rid", recipeId); state.pyodide.globals.set("__params", state.pyodide.toPy(params)); const resultJSON = state.pyodide.runPython(` import json result = run_recipe(__rid, **__params) json.dumps(result) `); const result = JSON.parse(resultJSON); result._original_question = originalQuestion; renderResult(result); $("output-section").style.display = "block"; $("profile-output").style.display = "none"; $("compare-output").style.display = "none"; state.lastResult = { type: "recipe", recipeId, params }; state.lastFullResult = result; setStatus("✅ Done. Numbers below."); if (ENABLE_WEBLLM) { await synthesizeAnswer(result); } } function renderResult(r) { console.log("[TAF] renderResult called with:", r); if (r.error) { $("verdict-box").className = "verdict-no"; $("verdict-box").innerHTML = `Error: ${escapeHtml(r.error)}`; $("chain-box").innerHTML = ""; return; } const vBox = $("verdict-box"); if (!vBox) { console.error("[TAF] verdict-box element not found!"); return; } const verdictStr = String(r.verdict || "UNKNOWN"); let vClass = ""; if (verdictStr.startsWith("YES") || verdictStr === "GO" || verdictStr.startsWith("USE SOFT")) vClass = "verdict-yes"; else if (verdictStr.startsWith("NO") || verdictStr.startsWith("MEMORY") || verdictStr === "TINY-MODEL") vClass = "verdict-no"; else vClass = "verdict-degraded"; vBox.className = vClass; const verdictEmoji = vClass === "verdict-yes" ? "✅" : (vClass === "verdict-no" ? "❌" : "⚠"); vBox.innerHTML = `
${escapeHtml(JSON.stringify(r, null, 2))}`;
return String(r);
}
function escapeHtml(s) {
return String(s)
.replace(/&/g, "&").replace(//g, ">")
.replace(/"/g, """).replace(/'/g, "'");
}
// ════════════════════════════════════════════════════════════════════
// WebLLM (synthesis + router)
// ════════════════════════════════════════════════════════════════════
async function loadWebLLM() {
if (state.webllm) return state.webllm;
// Request persistent storage to avoid quota issues with cached model weights
if (navigator.storage && navigator.storage.persist) {
try {
const persistent = await navigator.storage.persist();
console.log(persistent ? "Persistent storage granted" : "Persistent storage denied");
} catch (e) {
console.warn("storage.persist() failed:", e);
}
}
setStatus(`⏳ Loading WebLLM library + ${WEBLLM_MODEL.split("-")[0]} (~350MB first time, cached after)...`);
const { CreateMLCEngine } = await import("https://esm.run/@mlc-ai/web-llm");
const tryLoad = async (modelId) => {
return await CreateMLCEngine(modelId, {
initProgressCallback: (info) => setStatus(`⏳ ${info.text || "Loading model..."}`),
});
};
try {
state.webllm = await tryLoad(WEBLLM_MODEL);
} catch (err) {
if (String(err).includes("QuotaExceeded") || String(err).includes("storage")) {
setStatus(`⚠ Quota exceeded for ${WEBLLM_MODEL}. Trying smaller fallback ${WEBLLM_FALLBACK}...`);
try {
state.webllm = await tryLoad(WEBLLM_FALLBACK);
} catch (err2) {
throw new Error(
`Both models failed. Browser storage too constrained. ` +
`Try: (1) Settings → Privacy → Site settings → allow more storage for this site, ` +
`(2) clear browser cache, (3) use Chrome/Edge in non-incognito mode. ` +
`Original error: ${err2.message || err2}`
);
}
} else {
throw err;
}
}
return state.webllm;
}
async function synthesizeAnswer(result) {
$("answer-header").style.display = "block";
$("answer-box").style.display = "block";
$("answer-box").innerHTML = 'Generating plain-English summary...';
let engine;
try {
engine = await loadWebLLM();
} catch (err) {
$("answer-box").innerHTML = `⚠ WebLLM failed: ${escapeHtml(String(err))}Numbers above are still correct.`; return; } const prompt = buildSynthesisPrompt(result); let answer = ""; try { const reply = await engine.chat.completions.create({ messages: [ { role: "system", content: t("synthesis.system") }, { role: "user", content: prompt }, ], max_tokens: 400, temperature: 0.2, }); answer = reply.choices[0].message.content; } catch (err) { $("answer-box").innerHTML = `⚠ Synthesis failed: ${escapeHtml(String(err))}`; return; } $("answer-box").innerHTML = `
🎚 What-if: drag T_eval to see γ change live
Pure JS recompute (no Pyodide call). Shows the geometric γ_Padé and d_horizon as you slide. The full chain re-runs on click.
✅ ${counts.confirmed} confirmed · ⚠ ${counts.partial} partial · ❌ ${counts.refuted} refuted · ⏳ ${counts.untested} untested (out of ${FALSIFICATION_STATUS.length} total predictions)
`; let table = `| ID | Claim | Status | Evidence |
|---|---|---|---|
${f.id} |
${escapeHtml(f.claim)} | ${icon} ${f.status} | ${escapeHtml(f.evidence)} |
${REGISTRY_REPO} exists with submissions, they'll appear here live.`;
return;
}
throw new Error(`HTTP ${resp.status}`);
}
const issues = await resp.json();
if (!issues || issues.length === 0) {
target.innerHTML = `No submissions yet. Be the first — generate a Profile and click 📤 Submit to registry.`;
return;
}
const html = issues.map(issue => {
const verdict = extractVerdictFromTitle(issue.title);
const vClass = verdictClass(verdict);
const time = relativeTime(new Date(issue.created_at));
return ``;
}).join("");
target.innerHTML = html;
} catch (err) {
target.innerHTML = `⚠ Couldn't load community feed: ${escapeHtml(err.message)}`;
}
}
function extractVerdictFromTitle(title) {
const m = title.match(/→\s*(\S+)/);
if (m) return m[1];
if (title.includes("YES")) return "YES";
if (title.includes("NO")) return "NO";
if (title.includes("DEGRADED")) return "DEG";
if (title.includes("Profile")) return "📇";
if (title.includes("Compare")) return "🆚";
return "?";
}
function verdictClass(v) {
if (v.startsWith("YES") || v === "GO") return "yes";
if (v.startsWith("NO")) return "no";
if (v === "DEG" || v === "DEGRADED") return "deg";
return "";
}
function relativeTime(d) {
const sec = Math.floor((Date.now() - d.getTime()) / 1000);
if (sec < 60) return `${sec}s ago`;
if (sec < 3600) return `${Math.floor(sec / 60)}m ago`;
if (sec < 86400) return `${Math.floor(sec / 3600)}h ago`;
return `${Math.floor(sec / 86400)}d ago`;
}
// ════════════════════════════════════════════════════════════════════
// PROFILE mode
// ════════════════════════════════════════════════════════════════════
$("profile-preset").addEventListener("change", (e) => {
if (!e.target.value) return;
const modelId = e.target.value;
state.lastModelId = modelId; // remember for filename/hash
// Preset keys ARE valid HF model ids (e.g. "meta-llama/Llama-3.2-1B"). Auto-fill
// the HF id input so the user can also click 📥 Fetch to refresh from HF Hub
// without retyping. Status hint clarifies the dual source of truth.
if ($("profile-hf-id")) {
$("profile-hf-id").value = modelId;
if ($("profile-hf-status")) {
$("profile-hf-status").textContent = tFmt("profile.preset_loaded", { id: modelId });
}
}
const proxy = state.pyodide.runPython(`get_preset(${JSON.stringify(modelId)})`);
const p = proxy.toJs ? proxy.toJs({ dict_converter: Object.fromEntries }) : proxy;
if (!p || Object.keys(p).length === 0) return;
$("profile-theta").value = p.theta;
$("profile-T_train").value = p.T_train;
$("profile-n_attn").value = p.n_attention_heads;
$("profile-n_kv").value = p.n_kv_heads;
$("profile-d_head").value = p.d_head;
$("profile-n_layers").value = p.n_layers;
$("profile-n_params").value = p.n_params.toExponential(2);
$("profile-has_swa").value = String(p.has_SWA);
});
$("profile-fetch-btn").addEventListener("click", async () => {
const id = $("profile-hf-id").value.trim();
if (!id) { $("profile-hf-status").textContent = "⚠ Enter a model id"; return; }
$("profile-hf-status").textContent = `⏳ Fetching ${id}...`;
$("profile-fetch-btn").disabled = true;
state.lastModelId = id; // remember for filename/hash
try {
const cfg = await fetchHfConfig(id);
const p = configToPreset(cfg, id);
$("profile-theta").value = p.theta;
$("profile-T_train").value = p.T_train;
$("profile-n_attn").value = p.n_attention_heads;
$("profile-n_kv").value = p.n_kv_heads;
$("profile-d_head").value = p.d_head;
$("profile-n_layers").value = p.n_layers;
$("profile-n_params").value = p.n_params.toExponential(2);
$("profile-has_swa").value = String(p.has_SWA);
$("profile-hf-status").innerHTML = `✅ ${id} (${p._family})`;
} catch (err) {
$("profile-hf-status").textContent = `❌ ${err.message}`;
} finally {
$("profile-fetch-btn").disabled = false;
}
});
$("profile-btn").addEventListener("click", async () => {
const params = {
theta: parseFloat($("profile-theta").value),
T_train: parseInt($("profile-T_train").value),
T_eval: parseInt($("profile-T_eval").value),
n_attention_heads: parseInt($("profile-n_attn").value),
n_kv_heads: parseInt($("profile-n_kv").value),
d_head: parseInt($("profile-d_head").value),
n_layers: parseInt($("profile-n_layers").value),
n_params: parseFloat($("profile-n_params").value),
has_SWA: $("profile-has_swa").value === "true",
};
setStatus("🧮 Profiling — running all 5 recipes...");
$("profile-btn").disabled = true;
try {
state.pyodide.globals.set("__pp", state.pyodide.toPy(params));
const json = state.pyodide.runPython(`
import json
result = profile_model(**__pp)
json.dumps(result)
`);
const profile = JSON.parse(json);
renderProfile(profile, params);
state.lastResult = { type: "profile", params };
state.lastFullResult = profile;
setStatus("✅ Profile ready.");
} catch (err) {
setStatus(`❌ ${err.message}`);
console.error(err);
} finally {
$("profile-btn").disabled = false;
}
});
function renderProfile(p, params) {
$("profile-output").style.display = "block";
// Hide other outputs
$("output-section").style.display = "none";
$("compare-output").style.display = "none";
const verdictClass = (v) => {
if (v.startsWith("YES") || v === "GO" || v.startsWith("USE SOFT")) return "v-yes";
if (v.startsWith("NO") || v.startsWith("MEMORY") || v === "TINY-MODEL") return "v-no";
return "v-deg";
};
const verdictEmoji = (v) => verdictClass(v) === "v-yes" ? "✅"
: verdictClass(v) === "v-no" ? "❌" : "⚠";
const ms = p.model_summary;
const kn = p.key_numbers;
const formatN = (x) => x === null || x === undefined ? "n/a"
: (typeof x === "number" ? x.toLocaleString(undefined, { maximumFractionDigits: 4 }) : String(x));
const recipesHtml = Object.entries(p.recipes).map(([rid, r]) => `
📋 Recipes — verdict per dimension ${recipeCount} ${t("tafcard.recipes_count_label", "dimensions")}
🔬 Diagnostics — numbers + γ check + what-if
🔢 Key numbers (paper §26)
🔍 γ predicted vs observed
🎚️ What-if explorer
✓ Verification — Lean + Sage + falsification
📑 Lean+Mathlib theorem table
🔬 Algebraic consistency (Sage + Lean v0.5)
🔬 Falsification status (F1-F23)
${falsHtml || '📂 Provenance & share
🔬 v0.5.3 — Calibration audit (2026-05-02)
ⓘ What do these mean?
🔍 Per-identity details (${total} checks)
Recipe: ${escapeHtml(cmp.recipe_id)} — ${escapeHtml(cmp.recipe_name)}
Shared params: ${escapeHtml(JSON.stringify(cmp.shared_params))}
| Model | Verdict | Reason | `; allKeys.forEach(k => html += `${escapeHtml(k)} | `); html += "
|---|---|---|---|
| ${escapeHtml(r.label)} | `; html += `${escapeHtml(r.verdict)} | `; html += `${escapeHtml(r.reason)} | `; allKeys.forEach(k => { const v = r.key_numbers ? r.key_numbers[k] : null; html += `${v === undefined || v === null ? "—" : (typeof v === "number" ? v.toLocaleString(undefined, { maximumFractionDigits: 2 }) : escapeHtml(String(v)))} | `; }); html += "
Click to expand
\n\n\`\`\`json\n${JSON.stringify(p, null, 2)}\n\`\`\`\n\nFull data
\n\n\`\`\`json\n${JSON.stringify(c, null, 2)}\n\`\`\`\n\nFull data
\n\n\`\`\`json\n${JSON.stringify(r, null, 2)}\n\`\`\`\n\n⚠ ${t("saturation.borderline") || "Borderline — within ±1pp of a threshold cutoff. Treat verdict as 'check carefully'."}
` : ""; const sourceTag = result.source === "live" ? `live` : (result.source === "baked_consensus" ? `consensus` : `baked`); const spreadStr = result.params.spread != null ? `${result.params.spread.toFixed(1)} pp` : "n/a"; const meanStr = result.params.mean != null ? `${result.params.mean.toFixed(1)}%` : "n/a"; return `${t("saturation.section.top3") || "Top-3 frontier scores"}
| # | ${t("saturation.col.model") || "Model"} | ${t("saturation.col.score") || "Score"} |
|---|
${t("saturation.section.recommendations") || "Recommended alternatives"}
- ${recoItems}
${t("saturation.section.note") || "Notes"}
${result.note}
${t("saturation.section.all") || "All tracked benchmarks"}
| ${t("saturation.col.bench") || "Benchmark"} | ${t("saturation.col.spread") || "Spread"} | ${t("saturation.col.mean") || "Mean"} | ${t("saturation.col.verdict") || "Verdict"} | ${t("saturation.col.reco") || "Top reco"} |
|---|
${t("hub.best_for") || "Best for"}: ${e.best_for}
` : ""; const notFor = e.not_for ? `${t("hub.not_for") || "Not for"}: ${e.not_for}
` : ""; return `${e.pain} ${modeBadge}
${bestFor} ${notFor} ${tools ? `${t("hub.tools") || "External tools"}:
- ${tools}
${c.icon} ${c.label} (${c.count})
${c.description}
${inner}${tFmt("hub.search.empty", { query })}
`; return; } const html = matches.map(renderEntry).join(""); $("hub-output").innerHTML = `${tFmt("hub.search.results", { n: matches.length, query })}
${html}${verdictBadge}
${escapeHtml(reason)}
${verdictBadge}
${t(`cot.hint.${result.code}`) || ""}
${escapeHtml(f.name)}| # | Field | Type |
|---|
${t("cot.suggested_fix.title") || "✓ Suggested fix"}
${t("cot.suggested_fix.desc") || ""}
${escapeHtml(fixed)}
${explainer}
` : ""; // Source attribution footer const attribution = `${t("cot.attribution") || ""} collinwilkins.com · JSONSchemaBench · llguidance
`; return `${verdictBadge} (${tFmt("cot.field_count", { n: result.params.field_count }) || `${result.params.field_count} fields`})
${explainerBlock} ${fieldTable} ${fixBlock} ${attribution}${escapeHtml(f.params.checkpoint_hint)} ${t("peft.detected_at_line") || "appears at line"} ${f.params.checkpoint_line}
${t("peft.suggested_fix") || "Suggested:"} ${escapeHtml(f.params.fix)}
${tFmt("peft.qlora_order.detail", f.params) || `prepare_model_for_kbit_training (line ${f.params.prepare_line}) runs AFTER get_peft_model (line ${f.params.get_peft_model_line}). Reverse the order.`}
`; } else if (f.rule === "target_modules_mismatch") { detail = `${t("peft.detected_arch") || "Detected arch"}: ${escapeHtml(f.params.detected_arch)} ${t("peft.from_model_id") || "(from model id"} ${escapeHtml(f.params.detected_from)})
${t("peft.your_modules") || "Your target_modules"}: ${escapeHtml(f.params.user_modules.join(", "))}
${t("peft.expected_modules") || "Expected for this arch"}: ${escapeHtml(f.params.expected_modules.join(", "))}
${tFmt("peft.match_ratio", f.params) || `${f.params.hits} of ${f.params.total} match.`}
`; } else if (f.rule === "alpha_not_2r") { detail = `r=${f.params.r}, lora_alpha=${f.params.lora_alpha} → ${t("peft.ratio") || "ratio"} ${f.params.ratio}× (${t("peft.alpha.convention") || "convention is α=2r or α=r"})
${t("peft.no_peft_calls.detail") || "No get_peft_model / PeftModel.from_pretrained / LoraConfig calls detected. Paste a PEFT/LoRA setup snippet."}
`; } return `${sevBadge} ${ruleLabel} ${lineLabel}
${explainer ? `${explainer}
` : ""} ${detail} ${fixHint ? `${fixHint}
` : ""}${tFmt("peft.summary", result.summary) || `${result.summary.total} finding(s)`}
` : ""; // Source attribution const attribution = `${t("peft.attribution") || "Refs:"} peft #2115 · PEFT troubleshooting · get_layer_status / get_model_status
`; return `${verdictBadge}
${summary} ${findingsHtml} ${attribution}${noteHtml.join(" ")}` : ""; const ttlMin = p.cache_ttl_seconds >= 3600 ? `${Math.round(p.cache_ttl_seconds / 3600)}h` : `${Math.round(p.cache_ttl_seconds / 60)}min`; const savingsColor = p.savings_usd > 0 ? "#3fb950" : (p.reason ? "#8b949e" : "#d29922"); const writeRow = p.cache_write_surcharge_usd && p.cache_write_surcharge_usd > 0 ? `
TTL ${ttlMin}
${t("cache.diff.title") || "Where the cache breaks"}
${t("cache.diff.legend") || "Green = shared prefix (cacheable). Red = first edit (everything from here is re-billed)."}
${verdictBadge}
${t("cache.hint.empty") || "Paste two prompts, then Predict."}
${tFmt("cache.summary.tokens", { common: p.tokens_common.toLocaleString(), total: p.tokens_total.toLocaleString(), pct: Math.round(p.hit_ratio * 100) }) || `Common prefix ${p.tokens_common.toLocaleString()} / ${p.tokens_total.toLocaleString()} tokens (${Math.round(p.hit_ratio * 100)}% theoretical hit ratio).`}
${tFmt("cache.summary.diff_at", { line: p.diff_point.line }) || `First difference at line ${p.diff_point.line}.`}
`; const rows = (result.providers || []).map(renderCacheProvider).join(""); const table = rows ? `| ${t("cache.col.provider") || "Provider"} | ${t("cache.col.hit") || "Hit"} | ${t("cache.col.cost") || "Base → cached"} | ${t("cache.col.savings") || "Savings"} |
|---|
${t("cache.attribution") || "Refs:"}
Anthropic prompt caching ·
OpenAI prompt caching ·
Gemini context caching
${t("cache.attribution.snapshot") || "Prices snapshot 2026-01; verify against current provider docs before acting on $."}
${verdictBadge}
${summary} ${table} ${diffViz} ${attribution}${verdictBadge}
${t(`speculative.hint.${result.code}`) || ""}
${verdictBadge}
- ${errs}
${t("speculative.fetch_error.hint") || "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth."}
${escapeHtml(p.target_via_mirror)}.`);
}
if (p.draft_via_mirror) {
lines.push(tFmt("speculative.mirror.draft_used", {
original: escapeHtml(p.draftId),
mirror: escapeHtml(p.draft_via_mirror),
}) || `Draft was gated; used mirror ${escapeHtml(p.draft_via_mirror)}.`);
}
mirrorBanner = `
${l}`).join("")}
${t("speculative.mirror.warn") || "Mirror tokenizers (e.g. unsloth/) are usually byte-identical to the gated original because quantization touches weights, not tokens. Verify chat-template if exact match is required."}
${val ?? "—"}`;
const typeRow = `
${typeBadge(t("speculative.target_label_short") || "target", p.target_type, p.type_match ? "#3fb950" : "#f85149")}
${typeBadge(t("speculative.draft_label_short") || "draft", p.draft_type, p.type_match ? "#3fb950" : "#f85149")}
${p.type_match ? "" : ` ← ${t("speculative.type_mismatch_note") || "tokenizer types differ; spec-dec impossible"}`}
`;
const sizeRow = `
${t("speculative.vocab_size") || "Vocab size"}:
target = ${p.target_vocab_size.toLocaleString()},
draft = ${p.draft_vocab_size.toLocaleString()}
${p.vocab_size_match ? "" : ` ← ${t("speculative.size_diff") || "differ — every reused id is a misalignment"}`}
`;
// Sampled match
const matchPct = p.sampled_total > 0 ? Math.round(p.sampled_match_ratio * 100) : 0;
const matchColor = matchPct >= 99.9 ? "#3fb950" : matchPct >= 95 ? "#d29922" : "#f85149";
const sampleRow = `
${t("speculative.sampled") || "Token-id sample match"}:
${matchPct}%
(${p.sampled_match_count.toLocaleString()} / ${p.sampled_total.toLocaleString()} tokens)
${p.first_mismatch ? `${t("speculative.first_mismatch") || "First mismatch"}:
${escapeHtml(p.first_mismatch.token).slice(0, 40)} → target id ${p.first_mismatch.target_id ?? "—"}, draft id ${p.first_mismatch.draft_id ?? "—"}` : ""}
`;
// Special / added token diffs
const specDiffRows = (p.special_tokens_diff || []).map(d =>
`${d.name}: target=${escapeHtml(String(d.target ?? "—"))}, draft=${escapeHtml(String(d.draft ?? "—"))}${t("speculative.special_diff") || "Special-token differences"} (${p.special_tokens_diff.length})
- ${specDiffRows}
${escapeHtml(d.token).slice(0, 40)}${t("speculative.added_diff") || "Added-token differences"} (${(p.added_tokens_diff||[]).length})
- ${addedDiffPreview}${p.added_tokens_diff.length > 12 ? `
- ${t("speculative.added_diff_more") || "+ more …"} ` : ""}
${tFmt("speculative.speedup.params", { target: fmtParams(p.target_params), draft: fmtParams(p.draft_params), ratio }) || `target ${fmtParams(p.target_params)} / draft ${fmtParams(p.draft_params)} (param ratio ${ratio})`}
${p.speedup_low}×
${p.speedup_expected}×
${p.speedup_high}×
${t("speculative.speedup.disclaimer") || "α = draft acceptance rate. Real speedup depends on prompt domain, lookahead K, and engine overhead. Bands assume ideal verifier batching."}
${t("speculative.speedup.draft_not_smaller") || "Draft is not smaller than target — spec-dec is misuse here."}
`; } // Attribution const attribution = `${t("speculative.attribution") || "Refs:"} vLLM spec-dec docs · SGLang · transformers assistant_model · Leviathan et al. 2022
`; return `${verdictBadge}
${mirrorBanner}${typeRow}
${sizeRow}
${sampleRow}
${specDiffBlock} ${addedDiffBlock} ${speedupBlock} ${attribution}${t("tax.hint.empty") || "Paste some text and click Tokenize."}
${escapeHtml(r.modelId)} ${meta ? `(${escapeHtml(meta.label)})` : ""}: ${t(`tax.error.${r.error}`) || r.error}❌ ${t("tax.all_failed") || "All tokenizers failed to load."}
- ${errLines}
${escapeHtml(meta.family)}
${escapeHtml(meta.family)}
⚠ ${tFmt("tax.interp.worst", { label: meta?.label || worst.modelId, pct, }) || `${meta?.label || worst.modelId} costs ${pct}% more tokens than baseline for this text.`}
`; } else if (worst && worst.ratio_vs_baseline <= 1.05) { interpretation = `${t("tax.interp.uniform") || "✓ All tokenizers within ±5% — text is well-handled across vendors."}
`; } return `${tFmt("tax.summary.input", { chars: res.chars.toLocaleString(), bytes: res.bytes.toLocaleString() }) || `Input: ${res.chars.toLocaleString()} chars, ${res.bytes.toLocaleString()} bytes`} ${blocks.dominant ? ` · ${t("tax.script_breakdown") || "scripts"}: ${fmtBlocks(blocks)}` : ""}
${interpretation}| ${t("tax.col.tokenizer") || "Tokenizer"} | ${t("tax.col.tokens") || "Tokens"} | ${t("tax.col.cpt") || "Chars/tok"} | ${t("tax.col.ratio") || "Ratio"} |
|---|
${t("tax.attribution") || "Tokenizers via"} @huggingface/transformers (browser BPE runtime). ${t("tax.attribution.privacy") || "Text is tokenized locally — never leaves the browser."}
${t("longscore.miss.title") || "Model not found in KB"}
${tFmt("longscore.miss.body", { id: res.normalized_id, n: res.n_kb_total }) || `Looked up ${res.normalized_id}. KB has ${res.n_kb_total} models. Try a canonical HF id (e.g. Qwen2.5-72B-Instruct, Llama-3.1-70B-Instruct, Jamba-1.5-Mini).`}
${t("longscore.miss.suggest") || "Check coverage at"} RULER · HELMET.
${escapeHtml(res.display_name)}`; if (res.params_b) html += ` · ${res.params_b}B params`; if (res.recipe_class) html += ` · ${escapeHtml(res.recipe_class)}`; if (res.native_context_k) html += ` · native ctx ${res.native_context_k}K`; html += `
`; // RULER per-length + LongScore if (res.ruler_long_score) { const ls = res.ruler_long_score; const v = verdictMap[res.verdict] || { color: "#8b949e", label: res.verdict }; html += `${t("longscore.score_label") || "LongScore"}: ${lsFmtPct(ls.avg_lc, true)} · Base = ${ls.base.toFixed(1)}% (mean of 4K, 8K)
`; html += `${v.label}
`; // Per-length bars html += `| ${t("longscore.col.ctx") || "Context"} | ${t("longscore.col.score") || "Score"} | ${t("longscore.col.lc") || "LC"} |
|---|---|---|
| ${k.toUpperCase()}${isShort ? ` (base)` : ""} | ${score.toFixed(1)}% | ${lc != null ? lsFmtPct(lc, true) : "—"} |
${t("longscore.no_ruler") || "⚠ No per-length data — LongScore not computable. Showing HELMET aggregate at 128K instead."}
`; } // HELMET breakdown if available if (res.helmet) { html += `${t("longscore.helmet_label") || "HELMET 7-task breakdown"} (at 128K)
| ${t("longscore.col.task") || "Task"} | ${t("longscore.col.score") || "Score"} |
|---|---|
| Overall | ${res.helmet.overall.toFixed(1)} |
| ${escapeHtml(task)} | ${score != null ? score.toFixed(1) : "—"} |
${t("longscore.source_note") || "Data source"}: ${escapeHtml(res.source)} · LongScore metric
`; html += `