Spaces:
Running
v0.8.5+ spec-decode: open-mirror fallback for gated models (no token needed)
Browse filesUser reported the v0.8.5-fix examples (Qwen) avoided the 401 problem
but the underlying limitation remained — anyone pasting a Llama /
Mistral / Gemma id still got "gated, no auth" with no recourse.
Web research confirmed HF's official position: their own transformers.js
docs explicitly state browser-side tokens are NOT supported for
gated/private models ("possibility of leaking access tokens"). So
adding a token field would violate HF's recommendation AND put users
at risk.
Implemented Option B (open-mirror suggester) instead, no token needed:
- `fetchTokenizerWithMirrorFallback(modelId)` tries 4 patterns when
the original returns 401:
1. unsloth/{name} ← bare unsloth redistribution
2. unsloth/Meta-{name} ← Meta-prefixed mirror (Llama)
3. unsloth/{name}-bnb-4bit ← quantized variant
4. unsloth/Meta-{name}-bnb-4bit ← Meta-prefixed quantized
First success wins. Pattern coverage verified empirically:
Llama-3.1-8B → pattern 1 (unsloth/Llama-3.1-8B-Instruct = 200);
Llama-3.1-70B → pattern 2 (unsloth/Meta-Llama-3.1-70B-Instruct
= 200). 8 URLs probed via curl + browser fetch.
- Mirror tokenizers are typically byte-identical to the gated
original because quantization touches WEIGHTS, not the tokenizer
artifact (BPE vocab + merges). Caveat surfaced inline: unsloth
issue #880 documents occasional chat-template drift.
- UI gets a yellow "Open-mirror fallback" banner when either side
used a mirror, naming the original AND the resolved mirror id.
Users see exactly what was substituted and can click through to
verify chat-template if their use case demands exact match.
- Defaults updated to demonstrate the new path: "good" example is
now meta-llama/Llama-3.1-70B-Instruct vs meta-llama/Llama-3.1-8B-
Instruct — both gated, both auto-resolve to unsloth mirrors,
produces a "compatible_with_caveats" verdict + mirror banner.
- Bad example stays Qwen vs Phi-3.5 (open-weight cross-family
incompatibility, no fallback needed).
NOT implemented (Option A — HF token in browser): officially
discouraged by HuggingFace; would expose users to token theft via
any future XSS vector. Documented in the inline note.
Verified: direct logic test (3/3) confirms Llama-3.1-70B resolves
to unsloth/Meta-Llama-3.1-70B-Instruct, Qwen passes through with no
mirror, nonexistent returns the underlying 401 (HF returns 401 for
non-existent too — can't be distinguished). 4-lang i18n updated for
the new mirror banner + reworded gated-note.
Refs:
- https://huggingface.co/docs/transformers.js/guides/private (HF
official: no browser tokens for gated models)
- https://github.com/unslothai/unsloth/issues/880 (chat-template
drift in some unsloth releases — surfaced as caveat in the UI)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- js/i18n.js +24 -8
- js/main.js +34 -6
- js/spec_decode_compat.js +84 -7
|
@@ -648,9 +648,13 @@ export const TRANSLATIONS = {
|
|
| 648 |
"speculative.target_label_short": "target",
|
| 649 |
"speculative.draft_label_short": "draft",
|
| 650 |
"speculative.check_btn": "🔍 Check compatibility",
|
| 651 |
-
"speculative.example_good_btn":"↳ Example:
|
| 652 |
"speculative.example_bad_btn": "↳ Example: cross-family (bad)",
|
| 653 |
-
"speculative.gated_note": "💡 <strong>Gated models</strong> (Llama, Mistral, Gemma)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
"speculative.status.fetching": "🔄 Fetching tokenizer.json from HF Hub for both models…",
|
| 655 |
"speculative.status.done": "✅ {verdict}",
|
| 656 |
"speculative.status.error": "❌ Error",
|
|
@@ -1802,9 +1806,13 @@ export const TRANSLATIONS = {
|
|
| 1802 |
"speculative.target_label_short": "target",
|
| 1803 |
"speculative.draft_label_short": "draft",
|
| 1804 |
"speculative.check_btn": "🔍 Verificar compatibilidad",
|
| 1805 |
-
"speculative.example_good_btn":"↳ Ejemplo:
|
| 1806 |
"speculative.example_bad_btn": "↳ Ejemplo: cross-family (malo)",
|
| 1807 |
-
"speculative.gated_note": "💡 <strong>Modelos gated</strong> (Llama, Mistral, Gemma)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1808 |
"speculative.status.fetching": "🔄 Haciendo fetch de tokenizer.json desde HF Hub para ambos modelos…",
|
| 1809 |
"speculative.status.done": "✅ {verdict}",
|
| 1810 |
"speculative.status.error": "❌ Error",
|
|
@@ -2820,9 +2828,13 @@ export const TRANSLATIONS = {
|
|
| 2820 |
"speculative.target_label_short": "target",
|
| 2821 |
"speculative.draft_label_short": "draft",
|
| 2822 |
"speculative.check_btn": "🔍 Vérifier compatibilité",
|
| 2823 |
-
"speculative.example_good_btn":"↳ Exemple :
|
| 2824 |
"speculative.example_bad_btn": "↳ Exemple : cross-family (mauvais)",
|
| 2825 |
-
"speculative.gated_note": "💡 <strong>Modèles gated</strong> (Llama, Mistral, Gemma)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2826 |
"speculative.status.fetching": "🔄 Récupération de tokenizer.json depuis HF Hub pour les deux modèles…",
|
| 2827 |
"speculative.status.done": "✅ {verdict}",
|
| 2828 |
"speculative.status.error": "❌ Erreur",
|
|
@@ -3838,9 +3850,13 @@ export const TRANSLATIONS = {
|
|
| 3838 |
"speculative.target_label_short": "target",
|
| 3839 |
"speculative.draft_label_short": "draft",
|
| 3840 |
"speculative.check_btn": "🔍 检查兼容性",
|
| 3841 |
-
"speculative.example_good_btn":"↳ 示例:
|
| 3842 |
"speculative.example_bad_btn": "↳ 示例:跨 family(坏)",
|
| 3843 |
-
"speculative.gated_note": "💡 <strong>受限模型</strong>(Llama、Mistral、Gemma)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3844 |
"speculative.status.fetching": "🔄 从 HF Hub 获取两个模型的 tokenizer.json…",
|
| 3845 |
"speculative.status.done": "✅ {verdict}",
|
| 3846 |
"speculative.status.error": "❌ 错误",
|
|
|
|
| 648 |
"speculative.target_label_short": "target",
|
| 649 |
"speculative.draft_label_short": "draft",
|
| 650 |
"speculative.check_btn": "🔍 Check compatibility",
|
| 651 |
+
"speculative.example_good_btn":"↳ Example: Llama-3.1 8B/70B (gated → mirror)",
|
| 652 |
"speculative.example_bad_btn": "↳ Example: cross-family (bad)",
|
| 653 |
+
"speculative.gated_note": "💡 <strong>Gated models</strong> (Llama, Mistral, Gemma) trigger an automatic open-mirror fallback (unsloth/...). HF officially discourages browser-side tokens, so the tool can't auth — but mirror tokenizers are typically byte-identical because quantization touches weights, not the tokenizer artifact.",
|
| 654 |
+
"speculative.mirror.heading": "Open-mirror fallback",
|
| 655 |
+
"speculative.mirror.target_used": "Target <code>{original}</code> was gated; used mirror <code>{mirror}</code>.",
|
| 656 |
+
"speculative.mirror.draft_used": "Draft <code>{original}</code> was gated; used mirror <code>{mirror}</code>.",
|
| 657 |
+
"speculative.mirror.warn": "Mirror tokenizers (e.g. unsloth/) are usually byte-identical to the gated original because quantization touches weights, not tokens. Verify chat-template if exact match is required (unsloth #880 documents occasional drift).",
|
| 658 |
"speculative.status.fetching": "🔄 Fetching tokenizer.json from HF Hub for both models…",
|
| 659 |
"speculative.status.done": "✅ {verdict}",
|
| 660 |
"speculative.status.error": "❌ Error",
|
|
|
|
| 1806 |
"speculative.target_label_short": "target",
|
| 1807 |
"speculative.draft_label_short": "draft",
|
| 1808 |
"speculative.check_btn": "🔍 Verificar compatibilidad",
|
| 1809 |
+
"speculative.example_good_btn":"↳ Ejemplo: Llama-3.1 8B/70B (gated → mirror)",
|
| 1810 |
"speculative.example_bad_btn": "↳ Ejemplo: cross-family (malo)",
|
| 1811 |
+
"speculative.gated_note": "💡 <strong>Modelos gated</strong> (Llama, Mistral, Gemma) disparan un fallback automático a mirror open (unsloth/...). HF desaconseja oficialmente tokens en browser, así que la tool no puede autenticar — pero los tokenizers de mirrors son típicamente byte-idénticos porque la cuantización toca weights, no el artefacto del tokenizer.",
|
| 1812 |
+
"speculative.mirror.heading": "Fallback a open-mirror",
|
| 1813 |
+
"speculative.mirror.target_used": "Target <code>{original}</code> estaba gated; se usó mirror <code>{mirror}</code>.",
|
| 1814 |
+
"speculative.mirror.draft_used": "Draft <code>{original}</code> estaba gated; se usó mirror <code>{mirror}</code>.",
|
| 1815 |
+
"speculative.mirror.warn": "Los tokenizers de mirror (ej. unsloth/) suelen ser byte-idénticos al original gated porque la cuantización toca weights, no tokens. Verifica chat-template si necesitas match exacto (unsloth #880 documenta drift ocasional).",
|
| 1816 |
"speculative.status.fetching": "🔄 Haciendo fetch de tokenizer.json desde HF Hub para ambos modelos…",
|
| 1817 |
"speculative.status.done": "✅ {verdict}",
|
| 1818 |
"speculative.status.error": "❌ Error",
|
|
|
|
| 2828 |
"speculative.target_label_short": "target",
|
| 2829 |
"speculative.draft_label_short": "draft",
|
| 2830 |
"speculative.check_btn": "🔍 Vérifier compatibilité",
|
| 2831 |
+
"speculative.example_good_btn":"↳ Exemple : Llama-3.1 8B/70B (gated → mirror)",
|
| 2832 |
"speculative.example_bad_btn": "↳ Exemple : cross-family (mauvais)",
|
| 2833 |
+
"speculative.gated_note": "💡 <strong>Modèles gated</strong> (Llama, Mistral, Gemma) déclenchent un fallback automatique vers un open-mirror (unsloth/...). HF déconseille officiellement les tokens côté navigateur, donc l'outil ne peut pas auth — mais les tokenizers des mirrors sont typiquement byte-identiques car la quantification touche les poids, pas l'artefact du tokenizer.",
|
| 2834 |
+
"speculative.mirror.heading": "Fallback open-mirror",
|
| 2835 |
+
"speculative.mirror.target_used": "Target <code>{original}</code> était gated ; utilisation du mirror <code>{mirror}</code>.",
|
| 2836 |
+
"speculative.mirror.draft_used": "Draft <code>{original}</code> était gated ; utilisation du mirror <code>{mirror}</code>.",
|
| 2837 |
+
"speculative.mirror.warn": "Les tokenizers mirror (ex. unsloth/) sont habituellement byte-identiques au gated original car la quantification touche les poids, pas les tokens. Vérifiez le chat-template si un match exact est requis (unsloth #880 documente une dérive occasionnelle).",
|
| 2838 |
"speculative.status.fetching": "🔄 Récupération de tokenizer.json depuis HF Hub pour les deux modèles…",
|
| 2839 |
"speculative.status.done": "✅ {verdict}",
|
| 2840 |
"speculative.status.error": "❌ Erreur",
|
|
|
|
| 3850 |
"speculative.target_label_short": "target",
|
| 3851 |
"speculative.draft_label_short": "draft",
|
| 3852 |
"speculative.check_btn": "🔍 检查兼容性",
|
| 3853 |
+
"speculative.example_good_btn":"↳ 示例:Llama-3.1 8B/70B(受限 → mirror)",
|
| 3854 |
"speculative.example_bad_btn": "↳ 示例:跨 family(坏)",
|
| 3855 |
+
"speculative.gated_note": "💡 <strong>受限模型</strong>(Llama、Mistral、Gemma)会触发自动 open-mirror 回退(unsloth/...)。HF 官方不推荐浏览器端 token,所以工具无法 auth——但 mirror 的 tokenizer 通常字节级等同,因为量化只影响权重,不影响 tokenizer 工件。",
|
| 3856 |
+
"speculative.mirror.heading": "Open-mirror 回退",
|
| 3857 |
+
"speculative.mirror.target_used": "Target <code>{original}</code> 受限;使用 mirror <code>{mirror}</code>。",
|
| 3858 |
+
"speculative.mirror.draft_used": "Draft <code>{original}</code> 受限;使用 mirror <code>{mirror}</code>。",
|
| 3859 |
+
"speculative.mirror.warn": "Mirror tokenizer(例如 unsloth/)通常与受限原版字节级等同,因为量化只影响权重而非 token。如需精确匹配,请验证 chat-template(unsloth #880 记录了偶发的漂移)。",
|
| 3860 |
"speculative.status.fetching": "🔄 从 HF Hub 获取两个模型的 tokenizer.json…",
|
| 3861 |
"speculative.status.done": "✅ {verdict}",
|
| 3862 |
"speculative.status.error": "❌ 错误",
|
|
@@ -3971,6 +3971,31 @@ function renderSpecResult(result) {
|
|
| 3971 |
|
| 3972 |
const p = result.params;
|
| 3973 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3974 |
// Section 1 — vocab summary
|
| 3975 |
const typeBadge = (label, val, bg) =>
|
| 3976 |
`<span class="badge" style="background:${bg};">${label}: <code>${val ?? "—"}</code></span>`;
|
|
@@ -4045,6 +4070,7 @@ function renderSpecResult(result) {
|
|
| 4045 |
|
| 4046 |
return `<div class="arena-result">
|
| 4047 |
<p style="font-size:1.1em;">${verdictBadge}</p>
|
|
|
|
| 4048 |
<p>${typeRow}</p>
|
| 4049 |
<p>${sizeRow}</p>
|
| 4050 |
<p>${sampleRow}</p>
|
|
@@ -4072,16 +4098,18 @@ async function runSpecCheck() {
|
|
| 4072 |
}
|
| 4073 |
|
| 4074 |
$("spec-check-btn")?.addEventListener("click", runSpecCheck);
|
| 4075 |
-
// Examples
|
| 4076 |
-
//
|
| 4077 |
-
//
|
| 4078 |
-
//
|
| 4079 |
$("spec-example-good-btn")?.addEventListener("click", () => {
|
| 4080 |
-
|
| 4081 |
-
$("spec-
|
|
|
|
| 4082 |
runSpecCheck();
|
| 4083 |
});
|
| 4084 |
$("spec-example-bad-btn")?.addEventListener("click", () => {
|
|
|
|
| 4085 |
$("spec-target-id").value = "Qwen/Qwen2.5-7B-Instruct";
|
| 4086 |
$("spec-draft-id").value = "microsoft/Phi-3.5-mini-instruct";
|
| 4087 |
runSpecCheck();
|
|
|
|
| 3971 |
|
| 3972 |
const p = result.params;
|
| 3973 |
|
| 3974 |
+
// Mirror banner — when a gated model was fetched via an open mirror.
|
| 3975 |
+
let mirrorBanner = "";
|
| 3976 |
+
if (p.target_via_mirror || p.draft_via_mirror) {
|
| 3977 |
+
const lines = [];
|
| 3978 |
+
if (p.target_via_mirror) {
|
| 3979 |
+
lines.push(tFmt("speculative.mirror.target_used", {
|
| 3980 |
+
original: escapeHtml(p.targetId),
|
| 3981 |
+
mirror: escapeHtml(p.target_via_mirror),
|
| 3982 |
+
}) || `Target was gated; used mirror <code>${escapeHtml(p.target_via_mirror)}</code>.`);
|
| 3983 |
+
}
|
| 3984 |
+
if (p.draft_via_mirror) {
|
| 3985 |
+
lines.push(tFmt("speculative.mirror.draft_used", {
|
| 3986 |
+
original: escapeHtml(p.draftId),
|
| 3987 |
+
mirror: escapeHtml(p.draft_via_mirror),
|
| 3988 |
+
}) || `Draft was gated; used mirror <code>${escapeHtml(p.draft_via_mirror)}</code>.`);
|
| 3989 |
+
}
|
| 3990 |
+
mirrorBanner = `
|
| 3991 |
+
<div style="margin-bottom:0.75em;padding:0.6em;background:#332b00;border-left:3px solid #d29922;border-radius:4px;font-size:0.92em;">
|
| 3992 |
+
<strong>ℹ ${t("speculative.mirror.heading") || "Open-mirror fallback"}</strong>
|
| 3993 |
+
${lines.map(l => `<br>${l}`).join("")}
|
| 3994 |
+
<br><span class="subtle" style="font-size:0.85em;">${t("speculative.mirror.warn") || "Mirror tokenizers (e.g. unsloth/) are usually byte-identical to the gated original because quantization touches weights, not tokens. Verify chat-template if exact match is required."}</span>
|
| 3995 |
+
</div>
|
| 3996 |
+
`;
|
| 3997 |
+
}
|
| 3998 |
+
|
| 3999 |
// Section 1 — vocab summary
|
| 4000 |
const typeBadge = (label, val, bg) =>
|
| 4001 |
`<span class="badge" style="background:${bg};">${label}: <code>${val ?? "—"}</code></span>`;
|
|
|
|
| 4070 |
|
| 4071 |
return `<div class="arena-result">
|
| 4072 |
<p style="font-size:1.1em;">${verdictBadge}</p>
|
| 4073 |
+
${mirrorBanner}
|
| 4074 |
<p>${typeRow}</p>
|
| 4075 |
<p>${sizeRow}</p>
|
| 4076 |
<p>${sampleRow}</p>
|
|
|
|
| 4098 |
}
|
| 4099 |
|
| 4100 |
$("spec-check-btn")?.addEventListener("click", runSpecCheck);
|
| 4101 |
+
// Examples mix gated + open: gated ids (Llama) trigger the open-mirror
|
| 4102 |
+
// fallback (unsloth/...) so the user sees both the demo result AND the
|
| 4103 |
+
// mirror-resolution mechanism. Pure open-weight pairs (Qwen + Phi)
|
| 4104 |
+
// stay as the "no fallback needed" path for the second example.
|
| 4105 |
$("spec-example-good-btn")?.addEventListener("click", () => {
|
| 4106 |
+
// Gated → triggers unsloth mirror fallback for both sides.
|
| 4107 |
+
$("spec-target-id").value = "meta-llama/Llama-3.1-70B-Instruct";
|
| 4108 |
+
$("spec-draft-id").value = "meta-llama/Llama-3.1-8B-Instruct";
|
| 4109 |
runSpecCheck();
|
| 4110 |
});
|
| 4111 |
$("spec-example-bad-btn")?.addEventListener("click", () => {
|
| 4112 |
+
// Open-weight cross-family → no fallback, plain incompatibility demo.
|
| 4113 |
$("spec-target-id").value = "Qwen/Qwen2.5-7B-Instruct";
|
| 4114 |
$("spec-draft-id").value = "microsoft/Phi-3.5-mini-instruct";
|
| 4115 |
runSpecCheck();
|
|
@@ -84,6 +84,77 @@ export async function fetchConfig(modelId) {
|
|
| 84 |
return await fetchHfJson(modelId, "config.json");
|
| 85 |
}
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
// =============================================================================
|
| 88 |
// Vocab extraction + comparison
|
| 89 |
// =============================================================================
|
|
@@ -312,20 +383,24 @@ export async function checkCompatibility(targetId, draftId) {
|
|
| 312 |
return { code: "identical_models", params: { targetId, draftId }, errors: [] };
|
| 313 |
}
|
| 314 |
|
| 315 |
-
const [tTok, dTok
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
fetchConfig(targetId),
|
| 319 |
-
fetchConfig(draftId),
|
| 320 |
]);
|
| 321 |
|
| 322 |
const errors = [];
|
| 323 |
-
if (!tTok.ok) errors.push({ side: "target", error: tTok.error, status: tTok.status });
|
| 324 |
-
if (!dTok.ok) errors.push({ side: "draft", error: dTok.error, status: dTok.status });
|
| 325 |
if (!tTok.ok || !dTok.ok) {
|
| 326 |
return { code: "fetch_failed", params: { targetId, draftId }, errors };
|
| 327 |
}
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
const cmp = compareVocabs(tTok.data, dTok.data);
|
| 330 |
|
| 331 |
// Param ratio + speedup estimate
|
|
@@ -366,6 +441,8 @@ export async function checkCompatibility(targetId, draftId) {
|
|
| 366 |
speedup_high: speedup?.high ?? null,
|
| 367 |
target_source: tTok.source,
|
| 368 |
draft_source: dTok.source,
|
|
|
|
|
|
|
| 369 |
},
|
| 370 |
errors,
|
| 371 |
};
|
|
|
|
| 84 |
return await fetchHfJson(modelId, "config.json");
|
| 85 |
}
|
| 86 |
|
| 87 |
+
// =============================================================================
|
| 88 |
+
// Open-mirror fallback for gated models
|
| 89 |
+
// =============================================================================
|
| 90 |
+
//
|
| 91 |
+
// HF officially DISCOURAGES browser-side tokens (their own transformers.js
|
| 92 |
+
// docs: "we only support accessing private/gated models from server-side
|
| 93 |
+
// environments"). For client-only tools, the practical workaround for
|
| 94 |
+
// gated families (Llama, Mistral, Gemma) is to fall back to public mirrors
|
| 95 |
+
// that re-host the same tokenizer:
|
| 96 |
+
// - unsloth/{name} ← unsloth's open redistributions
|
| 97 |
+
// - unsloth/Meta-{name} ← Meta-prefixed Llama mirrors
|
| 98 |
+
// - unsloth/{name}-bnb-4bit ← quantized variants (tokenizer preserved)
|
| 99 |
+
//
|
| 100 |
+
// Tokenizer (BPE merges + vocab) is text — quantization touches weights,
|
| 101 |
+
// not the tokenizer artifact, so the mirror's tokenizer.json is usually
|
| 102 |
+
// byte-identical to the gated original. Caveat: some unsloth releases
|
| 103 |
+
// patch chat-template tokens (issue #880); we surface that in the UI
|
| 104 |
+
// with a "verify chat-template if exact match required" note.
|
| 105 |
+
|
| 106 |
+
const MIRROR_PATTERN_BUILDERS = [
|
| 107 |
+
(id) => {
|
| 108 |
+
const last = id.split("/").slice(-1)[0];
|
| 109 |
+
return `unsloth/${last}`;
|
| 110 |
+
},
|
| 111 |
+
(id) => {
|
| 112 |
+
const last = id.split("/").slice(-1)[0];
|
| 113 |
+
return last.startsWith("Meta-") ? `unsloth/${last}` : `unsloth/Meta-${last}`;
|
| 114 |
+
},
|
| 115 |
+
(id) => {
|
| 116 |
+
const last = id.split("/").slice(-1)[0];
|
| 117 |
+
return `unsloth/${last}-bnb-4bit`;
|
| 118 |
+
},
|
| 119 |
+
(id) => {
|
| 120 |
+
const last = id.split("/").slice(-1)[0];
|
| 121 |
+
return last.startsWith("Meta-") ? `unsloth/${last}-bnb-4bit` : `unsloth/Meta-${last}-bnb-4bit`;
|
| 122 |
+
},
|
| 123 |
+
];
|
| 124 |
+
|
| 125 |
+
export async function fetchTokenizerWithMirrorFallback(modelId) {
|
| 126 |
+
const original = await fetchTokenizer(modelId);
|
| 127 |
+
if (original.ok) return { ...original, viaMirror: null };
|
| 128 |
+
// Only attempt mirror fallback when the failure is gated/private.
|
| 129 |
+
// 404 / network / parse errors aren't fixable by trying a mirror.
|
| 130 |
+
if (original.error !== "gated_or_private") {
|
| 131 |
+
return { ...original, viaMirror: null };
|
| 132 |
+
}
|
| 133 |
+
const tried = new Set([modelId]);
|
| 134 |
+
for (const build of MIRROR_PATTERN_BUILDERS) {
|
| 135 |
+
let candidate;
|
| 136 |
+
try { candidate = build(modelId); }
|
| 137 |
+
catch { continue; }
|
| 138 |
+
if (!candidate || tried.has(candidate)) continue;
|
| 139 |
+
tried.add(candidate);
|
| 140 |
+
const r = await fetchTokenizer(candidate);
|
| 141 |
+
if (r.ok) return { ...r, viaMirror: candidate, mirrorOf: modelId };
|
| 142 |
+
}
|
| 143 |
+
return { ...original, viaMirror: null, triedMirrors: [...tried].slice(1) };
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
export async function fetchConfigWithMirrorFallback(modelId, mirrorId) {
|
| 147 |
+
// Prefer the mirror's config when one was used (param counts come from
|
| 148 |
+
// there), but also try the ORIGINAL config — some unsloth mirrors omit
|
| 149 |
+
// it. Falls back gracefully.
|
| 150 |
+
if (mirrorId) {
|
| 151 |
+
const m = await fetchConfig(mirrorId);
|
| 152 |
+
if (m.ok) return { ...m, viaMirror: mirrorId };
|
| 153 |
+
}
|
| 154 |
+
const o = await fetchConfig(modelId);
|
| 155 |
+
return { ...o, viaMirror: null };
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
// =============================================================================
|
| 159 |
// Vocab extraction + comparison
|
| 160 |
// =============================================================================
|
|
|
|
| 383 |
return { code: "identical_models", params: { targetId, draftId }, errors: [] };
|
| 384 |
}
|
| 385 |
|
| 386 |
+
const [tTok, dTok] = await Promise.all([
|
| 387 |
+
fetchTokenizerWithMirrorFallback(targetId),
|
| 388 |
+
fetchTokenizerWithMirrorFallback(draftId),
|
|
|
|
|
|
|
| 389 |
]);
|
| 390 |
|
| 391 |
const errors = [];
|
| 392 |
+
if (!tTok.ok) errors.push({ side: "target", error: tTok.error, status: tTok.status, triedMirrors: tTok.triedMirrors });
|
| 393 |
+
if (!dTok.ok) errors.push({ side: "draft", error: dTok.error, status: dTok.status, triedMirrors: dTok.triedMirrors });
|
| 394 |
if (!tTok.ok || !dTok.ok) {
|
| 395 |
return { code: "fetch_failed", params: { targetId, draftId }, errors };
|
| 396 |
}
|
| 397 |
|
| 398 |
+
// Fetch configs — prefer mirror when one was used.
|
| 399 |
+
const [tCfg, dCfg] = await Promise.all([
|
| 400 |
+
fetchConfigWithMirrorFallback(targetId, tTok.viaMirror),
|
| 401 |
+
fetchConfigWithMirrorFallback(draftId, dTok.viaMirror),
|
| 402 |
+
]);
|
| 403 |
+
|
| 404 |
const cmp = compareVocabs(tTok.data, dTok.data);
|
| 405 |
|
| 406 |
// Param ratio + speedup estimate
|
|
|
|
| 441 |
speedup_high: speedup?.high ?? null,
|
| 442 |
target_source: tTok.source,
|
| 443 |
draft_source: dTok.source,
|
| 444 |
+
target_via_mirror: tTok.viaMirror || null,
|
| 445 |
+
draft_via_mirror: dTok.viaMirror || null,
|
| 446 |
},
|
| 447 |
errors,
|
| 448 |
};
|