Spaces:

karlexmarin
/

taf-agent

Running

karlexmarin Claude Opus 4.7 (1M context) commited on 16 days ago

Commit

2288c3d

1 Parent(s): a6f36b3

v0.8.5+ spec-decode: fetch via /resolve/main/ (LFS pointer fix)

Live HF Space test surfaced a silent fail: clicking the Llama
example showed a "❌ tokenizer types differ" verdict with vocab_size=0
on the draft side, even though the mirror banner appeared. Cause:
Llama-3.x tokenizer.json is ~17 MB and stored via Git-LFS. The
`/raw/main/` endpoint returns the LFS POINTER text (not the JSON):

version https://git-lfs.github.com/spec/v1
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
size 17209920

JSON.parse accepts that as a string-only "json" file, my
extractVocab() returned an empty map, and the comparator concluded
"types differ / vocab_size=0".

Switched to `/resolve/main/` which redirects through HF's CDN for
LFS files (returning the actual content) and serves small files
unchanged. Both endpoints are CORS-enabled. Verified end-to-end:

meta-llama/Llama-3.1-70B-Instruct (gated)
→ unsloth/Meta-Llama-3.1-70B-Instruct (via mirror fallback)
→ vocab BPE, 128,000 tokens
meta-llama/Llama-3.1-8B-Instruct (gated)
→ unsloth/Llama-3.1-8B-Instruct (via mirror fallback)
→ vocab BPE, 128,000 tokens
Sampled match: 128,000 / 128,000 (100%)
Verdict: COMPATIBLE
Elapsed: 60ms

Bumped FETCH_TIMEOUT_MS from 8s to 15s — first-cold-cache fetch of a
17 MB tokenizer can hit 3-8s, the old ceiling was too tight on slower
links. i18n timeout strings updated in 4 langs to reflect 15s.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

js/i18n.js +4 -4
js/spec_decode_compat.js +12 -2

js/i18n.js CHANGED Viewed

@@ -681,7 +681,7 @@ export const TRANSLATIONS = {
     "speculative.fetch_error.not_found":        "model id not found on HF Hub",
     "speculative.fetch_error.fetch_failed":     "fetch failed (HTTP error)",
     "speculative.fetch_error.parse_failed":     "JSON parse failed (file malformed)",
-    "speculative.fetch_error.timeout":          "timeout (>8s, large tokenizer or slow connection)",
     "speculative.fetch_error.network":          "network error",
     "speculative.fetch_error.hint":             "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth.",
     "speculative.hint.missing_input":  "Enter both target and draft model ids, then Check.",
@@ -1839,7 +1839,7 @@ export const TRANSLATIONS = {
     "speculative.fetch_error.not_found":        "model id no encontrado en HF Hub",
     "speculative.fetch_error.fetch_failed":     "fetch falló (error HTTP)",
     "speculative.fetch_error.parse_failed":     "parse JSON falló (archivo malformado)",
-    "speculative.fetch_error.timeout":          "timeout (>8s, tokenizer grande o conexión lenta)",
     "speculative.fetch_error.network":          "error de red",
     "speculative.fetch_error.hint":             "Verifica el spelling del model id. Para modelos gated necesitas ver el tokenizer vía tu cuenta HF — esta tool no puede autenticar.",
     "speculative.hint.missing_input":  "Ingresa ambos model ids (target y draft), luego Verificar.",
@@ -2861,7 +2861,7 @@ export const TRANSLATIONS = {
     "speculative.fetch_error.not_found":        "model id non trouvé sur HF Hub",
     "speculative.fetch_error.fetch_failed":     "fetch échoué (erreur HTTP)",
     "speculative.fetch_error.parse_failed":     "parse JSON échoué (fichier malformé)",
-    "speculative.fetch_error.timeout":          "timeout (>8s, gros tokenizer ou connexion lente)",
     "speculative.fetch_error.network":          "erreur réseau",
     "speculative.fetch_error.hint":             "Vérifiez l'orthographe du model id. Pour les modèles gated, consultez le tokenizer via votre compte HF — cet outil ne peut pas auth.",
     "speculative.hint.missing_input":  "Entrez les deux model ids (target et draft), puis Vérifier.",
@@ -3883,7 +3883,7 @@ export const TRANSLATIONS = {
     "speculative.fetch_error.not_found":        "在 HF Hub 上找不到 model id",
     "speculative.fetch_error.fetch_failed":     "获取失败（HTTP 错误）",
     "speculative.fetch_error.parse_failed":     "JSON 解析失败（文件格式不正确）",
-    "speculative.fetch_error.timeout":          "超时（>8 秒，大 tokenizer 或慢速连接）",
     "speculative.fetch_error.network":          "网络错误",
     "speculative.fetch_error.hint":             "检查 model id 拼写。受限模型需要通过你的 HF 账户查看 tokenizer 文件——这个工具无法 auth。",
     "speculative.hint.missing_input":  "输入两个 model id（target 和 draft），然后检查。",

     "speculative.fetch_error.not_found":        "model id not found on HF Hub",
     "speculative.fetch_error.fetch_failed":     "fetch failed (HTTP error)",
     "speculative.fetch_error.parse_failed":     "JSON parse failed (file malformed)",
+    "speculative.fetch_error.timeout":          "timeout (>15s, large tokenizer or slow connection)",
     "speculative.fetch_error.network":          "network error",
     "speculative.fetch_error.hint":             "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth.",
     "speculative.hint.missing_input":  "Enter both target and draft model ids, then Check.",
     "speculative.fetch_error.not_found":        "model id no encontrado en HF Hub",
     "speculative.fetch_error.fetch_failed":     "fetch falló (error HTTP)",
     "speculative.fetch_error.parse_failed":     "parse JSON falló (archivo malformado)",
+    "speculative.fetch_error.timeout":          "timeout (>15s, tokenizer grande o conexión lenta)",
     "speculative.fetch_error.network":          "error de red",
     "speculative.fetch_error.hint":             "Verifica el spelling del model id. Para modelos gated necesitas ver el tokenizer vía tu cuenta HF — esta tool no puede autenticar.",
     "speculative.hint.missing_input":  "Ingresa ambos model ids (target y draft), luego Verificar.",
     "speculative.fetch_error.not_found":        "model id non trouvé sur HF Hub",
     "speculative.fetch_error.fetch_failed":     "fetch échoué (erreur HTTP)",
     "speculative.fetch_error.parse_failed":     "parse JSON échoué (fichier malformé)",
+    "speculative.fetch_error.timeout":          "timeout (>15s, gros tokenizer ou connexion lente)",
     "speculative.fetch_error.network":          "erreur réseau",
     "speculative.fetch_error.hint":             "Vérifiez l'orthographe du model id. Pour les modèles gated, consultez le tokenizer via votre compte HF — cet outil ne peut pas auth.",
     "speculative.hint.missing_input":  "Entrez les deux model ids (target et draft), puis Vérifier.",
     "speculative.fetch_error.not_found":        "在 HF Hub 上找不到 model id",
     "speculative.fetch_error.fetch_failed":     "获取失败（HTTP 错误）",
     "speculative.fetch_error.parse_failed":     "JSON 解析失败（文件格式不正确）",
+    "speculative.fetch_error.timeout":          "超时（>15 秒，大 tokenizer 或慢速连接）",
     "speculative.fetch_error.network":          "网络错误",
     "speculative.fetch_error.hint":             "检查 model id 拼写。受限模型需要通过你的 HF 账户查看 tokenizer 文件——这个工具无法 auth。",
     "speculative.hint.missing_input":  "输入两个 model id（target 和 draft），然后检查。",

js/spec_decode_compat.js CHANGED Viewed

@@ -32,13 +32,23 @@
 // the UI doesn't hang on gated/private/missing models.
 const HF_BASE = "https://huggingface.co";
-const FETCH_TIMEOUT_MS = 8000;
 async function fetchHfJson(modelId, fileName) {
   if (typeof modelId !== "string" || !modelId.trim()) {
     return { ok: false, error: "missing_model_id" };
   }
-  const url = `${HF_BASE}/${encodeURI(modelId.trim())}/raw/main/${fileName}`;
   const controller = new AbortController();
   const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
   try {

 // the UI doesn't hang on gated/private/missing models.
 const HF_BASE = "https://huggingface.co";
+// 15s timeout — Llama-3.x tokenizer.json is ~17 MB via LFS-CDN and the
+// fetch can take 3-8s on first hit (cold cache). 8s was too tight.
+const FETCH_TIMEOUT_MS = 15000;
 async function fetchHfJson(modelId, fileName) {
   if (typeof modelId !== "string" || !modelId.trim()) {
     return { ok: false, error: "missing_model_id" };
   }
+  // Use `/resolve/main/` (NOT `/raw/main/`) so we get the actual content
+  // for LFS-tracked artifacts. Llama-3.x tokenizer.json is ~17 MB and
+  // stored via Git-LFS — `/raw/main/` returns the LFS POINTER text
+  // ("version https://git-lfs.github.com/spec/v1\noid sha256:..."),
+  // which JSON.parse rejects, leaving the linter with empty vocabs and
+  // a silent false-fail. `/resolve/main/` redirects through HF's CDN
+  // for LFS files and serves small files (config.json) unchanged. CORS
+  // is granted for both via Access-Control-Allow-Origin headers.
+  const url = `${HF_BASE}/${encodeURI(modelId.trim())}/resolve/main/${fileName}`;
   const controller = new AbortController();
   const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
   try {