Spaces:
Running
v0.8.5+ spec-decode: fetch via /resolve/main/ (LFS pointer fix)
Browse filesLive HF Space test surfaced a silent fail: clicking the Llama
example showed a "❌ tokenizer types differ" verdict with vocab_size=0
on the draft side, even though the mirror banner appeared. Cause:
Llama-3.x tokenizer.json is ~17 MB and stored via Git-LFS. The
`/raw/main/` endpoint returns the LFS POINTER text (not the JSON):
version https://git-lfs.github.com/spec/v1
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
size 17209920
JSON.parse accepts that as a string-only "json" file, my
extractVocab() returned an empty map, and the comparator concluded
"types differ / vocab_size=0".
Switched to `/resolve/main/` which redirects through HF's CDN for
LFS files (returning the actual content) and serves small files
unchanged. Both endpoints are CORS-enabled. Verified end-to-end:
meta-llama/Llama-3.1-70B-Instruct (gated)
→ unsloth/Meta-Llama-3.1-70B-Instruct (via mirror fallback)
→ vocab BPE, 128,000 tokens
meta-llama/Llama-3.1-8B-Instruct (gated)
→ unsloth/Llama-3.1-8B-Instruct (via mirror fallback)
→ vocab BPE, 128,000 tokens
Sampled match: 128,000 / 128,000 (100%)
Verdict: COMPATIBLE
Elapsed: 60ms
Bumped FETCH_TIMEOUT_MS from 8s to 15s — first-cold-cache fetch of a
17 MB tokenizer can hit 3-8s, the old ceiling was too tight on slower
links. i18n timeout strings updated in 4 langs to reflect 15s.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- js/i18n.js +4 -4
- js/spec_decode_compat.js +12 -2
|
@@ -681,7 +681,7 @@ export const TRANSLATIONS = {
|
|
| 681 |
"speculative.fetch_error.not_found": "model id not found on HF Hub",
|
| 682 |
"speculative.fetch_error.fetch_failed": "fetch failed (HTTP error)",
|
| 683 |
"speculative.fetch_error.parse_failed": "JSON parse failed (file malformed)",
|
| 684 |
-
"speculative.fetch_error.timeout": "timeout (>
|
| 685 |
"speculative.fetch_error.network": "network error",
|
| 686 |
"speculative.fetch_error.hint": "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth.",
|
| 687 |
"speculative.hint.missing_input": "Enter both target and draft model ids, then Check.",
|
|
@@ -1839,7 +1839,7 @@ export const TRANSLATIONS = {
|
|
| 1839 |
"speculative.fetch_error.not_found": "model id no encontrado en HF Hub",
|
| 1840 |
"speculative.fetch_error.fetch_failed": "fetch falló (error HTTP)",
|
| 1841 |
"speculative.fetch_error.parse_failed": "parse JSON falló (archivo malformado)",
|
| 1842 |
-
"speculative.fetch_error.timeout": "timeout (>
|
| 1843 |
"speculative.fetch_error.network": "error de red",
|
| 1844 |
"speculative.fetch_error.hint": "Verifica el spelling del model id. Para modelos gated necesitas ver el tokenizer vía tu cuenta HF — esta tool no puede autenticar.",
|
| 1845 |
"speculative.hint.missing_input": "Ingresa ambos model ids (target y draft), luego Verificar.",
|
|
@@ -2861,7 +2861,7 @@ export const TRANSLATIONS = {
|
|
| 2861 |
"speculative.fetch_error.not_found": "model id non trouvé sur HF Hub",
|
| 2862 |
"speculative.fetch_error.fetch_failed": "fetch échoué (erreur HTTP)",
|
| 2863 |
"speculative.fetch_error.parse_failed": "parse JSON échoué (fichier malformé)",
|
| 2864 |
-
"speculative.fetch_error.timeout": "timeout (>
|
| 2865 |
"speculative.fetch_error.network": "erreur réseau",
|
| 2866 |
"speculative.fetch_error.hint": "Vérifiez l'orthographe du model id. Pour les modèles gated, consultez le tokenizer via votre compte HF — cet outil ne peut pas auth.",
|
| 2867 |
"speculative.hint.missing_input": "Entrez les deux model ids (target et draft), puis Vérifier.",
|
|
@@ -3883,7 +3883,7 @@ export const TRANSLATIONS = {
|
|
| 3883 |
"speculative.fetch_error.not_found": "在 HF Hub 上找不到 model id",
|
| 3884 |
"speculative.fetch_error.fetch_failed": "获取失败(HTTP 错误)",
|
| 3885 |
"speculative.fetch_error.parse_failed": "JSON 解析失败(文件格式不正确)",
|
| 3886 |
-
"speculative.fetch_error.timeout": "超时(>
|
| 3887 |
"speculative.fetch_error.network": "网络错误",
|
| 3888 |
"speculative.fetch_error.hint": "检查 model id 拼写。受限模型需要通过你的 HF 账户查看 tokenizer 文件——这个工具无法 auth。",
|
| 3889 |
"speculative.hint.missing_input": "输入两个 model id(target 和 draft),然后检查。",
|
|
|
|
| 681 |
"speculative.fetch_error.not_found": "model id not found on HF Hub",
|
| 682 |
"speculative.fetch_error.fetch_failed": "fetch failed (HTTP error)",
|
| 683 |
"speculative.fetch_error.parse_failed": "JSON parse failed (file malformed)",
|
| 684 |
+
"speculative.fetch_error.timeout": "timeout (>15s, large tokenizer or slow connection)",
|
| 685 |
"speculative.fetch_error.network": "network error",
|
| 686 |
"speculative.fetch_error.hint": "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth.",
|
| 687 |
"speculative.hint.missing_input": "Enter both target and draft model ids, then Check.",
|
|
|
|
| 1839 |
"speculative.fetch_error.not_found": "model id no encontrado en HF Hub",
|
| 1840 |
"speculative.fetch_error.fetch_failed": "fetch falló (error HTTP)",
|
| 1841 |
"speculative.fetch_error.parse_failed": "parse JSON falló (archivo malformado)",
|
| 1842 |
+
"speculative.fetch_error.timeout": "timeout (>15s, tokenizer grande o conexión lenta)",
|
| 1843 |
"speculative.fetch_error.network": "error de red",
|
| 1844 |
"speculative.fetch_error.hint": "Verifica el spelling del model id. Para modelos gated necesitas ver el tokenizer vía tu cuenta HF — esta tool no puede autenticar.",
|
| 1845 |
"speculative.hint.missing_input": "Ingresa ambos model ids (target y draft), luego Verificar.",
|
|
|
|
| 2861 |
"speculative.fetch_error.not_found": "model id non trouvé sur HF Hub",
|
| 2862 |
"speculative.fetch_error.fetch_failed": "fetch échoué (erreur HTTP)",
|
| 2863 |
"speculative.fetch_error.parse_failed": "parse JSON échoué (fichier malformé)",
|
| 2864 |
+
"speculative.fetch_error.timeout": "timeout (>15s, gros tokenizer ou connexion lente)",
|
| 2865 |
"speculative.fetch_error.network": "erreur réseau",
|
| 2866 |
"speculative.fetch_error.hint": "Vérifiez l'orthographe du model id. Pour les modèles gated, consultez le tokenizer via votre compte HF — cet outil ne peut pas auth.",
|
| 2867 |
"speculative.hint.missing_input": "Entrez les deux model ids (target et draft), puis Vérifier.",
|
|
|
|
| 3883 |
"speculative.fetch_error.not_found": "在 HF Hub 上找不到 model id",
|
| 3884 |
"speculative.fetch_error.fetch_failed": "获取失败(HTTP 错误)",
|
| 3885 |
"speculative.fetch_error.parse_failed": "JSON 解析失败(文件格式不正确)",
|
| 3886 |
+
"speculative.fetch_error.timeout": "超时(>15 秒,大 tokenizer 或慢速连接)",
|
| 3887 |
"speculative.fetch_error.network": "网络错误",
|
| 3888 |
"speculative.fetch_error.hint": "检查 model id 拼写。受限模型需要通过你的 HF 账户查看 tokenizer 文件——这个工具无法 auth。",
|
| 3889 |
"speculative.hint.missing_input": "输入两个 model id(target 和 draft),然后检查。",
|
|
@@ -32,13 +32,23 @@
|
|
| 32 |
// the UI doesn't hang on gated/private/missing models.
|
| 33 |
|
| 34 |
const HF_BASE = "https://huggingface.co";
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
|
| 37 |
async function fetchHfJson(modelId, fileName) {
|
| 38 |
if (typeof modelId !== "string" || !modelId.trim()) {
|
| 39 |
return { ok: false, error: "missing_model_id" };
|
| 40 |
}
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
const controller = new AbortController();
|
| 43 |
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
| 44 |
try {
|
|
|
|
| 32 |
// the UI doesn't hang on gated/private/missing models.
|
| 33 |
|
| 34 |
const HF_BASE = "https://huggingface.co";
|
| 35 |
+
// 15s timeout — Llama-3.x tokenizer.json is ~17 MB via LFS-CDN and the
|
| 36 |
+
// fetch can take 3-8s on first hit (cold cache). 8s was too tight.
|
| 37 |
+
const FETCH_TIMEOUT_MS = 15000;
|
| 38 |
|
| 39 |
async function fetchHfJson(modelId, fileName) {
|
| 40 |
if (typeof modelId !== "string" || !modelId.trim()) {
|
| 41 |
return { ok: false, error: "missing_model_id" };
|
| 42 |
}
|
| 43 |
+
// Use `/resolve/main/` (NOT `/raw/main/`) so we get the actual content
|
| 44 |
+
// for LFS-tracked artifacts. Llama-3.x tokenizer.json is ~17 MB and
|
| 45 |
+
// stored via Git-LFS — `/raw/main/` returns the LFS POINTER text
|
| 46 |
+
// ("version https://git-lfs.github.com/spec/v1\noid sha256:..."),
|
| 47 |
+
// which JSON.parse rejects, leaving the linter with empty vocabs and
|
| 48 |
+
// a silent false-fail. `/resolve/main/` redirects through HF's CDN
|
| 49 |
+
// for LFS files and serves small files (config.json) unchanged. CORS
|
| 50 |
+
// is granted for both via Access-Control-Allow-Origin headers.
|
| 51 |
+
const url = `${HF_BASE}/${encodeURI(modelId.trim())}/resolve/main/${fileName}`;
|
| 52 |
const controller = new AbortController();
|
| 53 |
const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
|
| 54 |
try {
|