karlexmarin Claude Opus 4.7 (1M context) commited on
Commit
2288c3d
·
1 Parent(s): a6f36b3

v0.8.5+ spec-decode: fetch via /resolve/main/ (LFS pointer fix)

Browse files

Live HF Space test surfaced a silent fail: clicking the Llama
example showed a "❌ tokenizer types differ" verdict with vocab_size=0
on the draft side, even though the mirror banner appeared. Cause:
Llama-3.x tokenizer.json is ~17 MB and stored via Git-LFS. The
`/raw/main/` endpoint returns the LFS POINTER text (not the JSON):

version https://git-lfs.github.com/spec/v1
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
size 17209920

JSON.parse accepts that as a string-only "json" file, my
extractVocab() returned an empty map, and the comparator concluded
"types differ / vocab_size=0".

Switched to `/resolve/main/` which redirects through HF's CDN for
LFS files (returning the actual content) and serves small files
unchanged. Both endpoints are CORS-enabled. Verified end-to-end:

meta-llama/Llama-3.1-70B-Instruct (gated)
→ unsloth/Meta-Llama-3.1-70B-Instruct (via mirror fallback)
→ vocab BPE, 128,000 tokens
meta-llama/Llama-3.1-8B-Instruct (gated)
→ unsloth/Llama-3.1-8B-Instruct (via mirror fallback)
→ vocab BPE, 128,000 tokens
Sampled match: 128,000 / 128,000 (100%)
Verdict: COMPATIBLE
Elapsed: 60ms

Bumped FETCH_TIMEOUT_MS from 8s to 15s — first-cold-cache fetch of a
17 MB tokenizer can hit 3-8s, the old ceiling was too tight on slower
links. i18n timeout strings updated in 4 langs to reflect 15s.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. js/i18n.js +4 -4
  2. js/spec_decode_compat.js +12 -2
js/i18n.js CHANGED
@@ -681,7 +681,7 @@ export const TRANSLATIONS = {
681
  "speculative.fetch_error.not_found": "model id not found on HF Hub",
682
  "speculative.fetch_error.fetch_failed": "fetch failed (HTTP error)",
683
  "speculative.fetch_error.parse_failed": "JSON parse failed (file malformed)",
684
- "speculative.fetch_error.timeout": "timeout (>8s, large tokenizer or slow connection)",
685
  "speculative.fetch_error.network": "network error",
686
  "speculative.fetch_error.hint": "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth.",
687
  "speculative.hint.missing_input": "Enter both target and draft model ids, then Check.",
@@ -1839,7 +1839,7 @@ export const TRANSLATIONS = {
1839
  "speculative.fetch_error.not_found": "model id no encontrado en HF Hub",
1840
  "speculative.fetch_error.fetch_failed": "fetch falló (error HTTP)",
1841
  "speculative.fetch_error.parse_failed": "parse JSON falló (archivo malformado)",
1842
- "speculative.fetch_error.timeout": "timeout (>8s, tokenizer grande o conexión lenta)",
1843
  "speculative.fetch_error.network": "error de red",
1844
  "speculative.fetch_error.hint": "Verifica el spelling del model id. Para modelos gated necesitas ver el tokenizer vía tu cuenta HF — esta tool no puede autenticar.",
1845
  "speculative.hint.missing_input": "Ingresa ambos model ids (target y draft), luego Verificar.",
@@ -2861,7 +2861,7 @@ export const TRANSLATIONS = {
2861
  "speculative.fetch_error.not_found": "model id non trouvé sur HF Hub",
2862
  "speculative.fetch_error.fetch_failed": "fetch échoué (erreur HTTP)",
2863
  "speculative.fetch_error.parse_failed": "parse JSON échoué (fichier malformé)",
2864
- "speculative.fetch_error.timeout": "timeout (>8s, gros tokenizer ou connexion lente)",
2865
  "speculative.fetch_error.network": "erreur réseau",
2866
  "speculative.fetch_error.hint": "Vérifiez l'orthographe du model id. Pour les modèles gated, consultez le tokenizer via votre compte HF — cet outil ne peut pas auth.",
2867
  "speculative.hint.missing_input": "Entrez les deux model ids (target et draft), puis Vérifier.",
@@ -3883,7 +3883,7 @@ export const TRANSLATIONS = {
3883
  "speculative.fetch_error.not_found": "在 HF Hub 上找不到 model id",
3884
  "speculative.fetch_error.fetch_failed": "获取失败(HTTP 错误)",
3885
  "speculative.fetch_error.parse_failed": "JSON 解析失败(文件格式不正确)",
3886
- "speculative.fetch_error.timeout": "超时(>8 秒,大 tokenizer 或慢速连接)",
3887
  "speculative.fetch_error.network": "网络错误",
3888
  "speculative.fetch_error.hint": "检查 model id 拼写。受限模型需要通过你的 HF 账户查看 tokenizer 文件——这个工具无法 auth。",
3889
  "speculative.hint.missing_input": "输入两个 model id(target 和 draft),然后检查。",
 
681
  "speculative.fetch_error.not_found": "model id not found on HF Hub",
682
  "speculative.fetch_error.fetch_failed": "fetch failed (HTTP error)",
683
  "speculative.fetch_error.parse_failed": "JSON parse failed (file malformed)",
684
+ "speculative.fetch_error.timeout": "timeout (>15s, large tokenizer or slow connection)",
685
  "speculative.fetch_error.network": "network error",
686
  "speculative.fetch_error.hint": "Check the model id spelling. For gated models you'll need to view the tokenizer file via your HF account — this tool can't auth.",
687
  "speculative.hint.missing_input": "Enter both target and draft model ids, then Check.",
 
1839
  "speculative.fetch_error.not_found": "model id no encontrado en HF Hub",
1840
  "speculative.fetch_error.fetch_failed": "fetch falló (error HTTP)",
1841
  "speculative.fetch_error.parse_failed": "parse JSON falló (archivo malformado)",
1842
+ "speculative.fetch_error.timeout": "timeout (>15s, tokenizer grande o conexión lenta)",
1843
  "speculative.fetch_error.network": "error de red",
1844
  "speculative.fetch_error.hint": "Verifica el spelling del model id. Para modelos gated necesitas ver el tokenizer vía tu cuenta HF — esta tool no puede autenticar.",
1845
  "speculative.hint.missing_input": "Ingresa ambos model ids (target y draft), luego Verificar.",
 
2861
  "speculative.fetch_error.not_found": "model id non trouvé sur HF Hub",
2862
  "speculative.fetch_error.fetch_failed": "fetch échoué (erreur HTTP)",
2863
  "speculative.fetch_error.parse_failed": "parse JSON échoué (fichier malformé)",
2864
+ "speculative.fetch_error.timeout": "timeout (>15s, gros tokenizer ou connexion lente)",
2865
  "speculative.fetch_error.network": "erreur réseau",
2866
  "speculative.fetch_error.hint": "Vérifiez l'orthographe du model id. Pour les modèles gated, consultez le tokenizer via votre compte HF — cet outil ne peut pas auth.",
2867
  "speculative.hint.missing_input": "Entrez les deux model ids (target et draft), puis Vérifier.",
 
3883
  "speculative.fetch_error.not_found": "在 HF Hub 上找不到 model id",
3884
  "speculative.fetch_error.fetch_failed": "获取失败(HTTP 错误)",
3885
  "speculative.fetch_error.parse_failed": "JSON 解析失败(文件格式不正确)",
3886
+ "speculative.fetch_error.timeout": "超时(>15 秒,大 tokenizer 或慢速连接)",
3887
  "speculative.fetch_error.network": "网络错误",
3888
  "speculative.fetch_error.hint": "检查 model id 拼写。受限模型需要通过你的 HF 账户查看 tokenizer 文件——这个工具无法 auth。",
3889
  "speculative.hint.missing_input": "输入两个 model id(target 和 draft),然后检查。",
js/spec_decode_compat.js CHANGED
@@ -32,13 +32,23 @@
32
  // the UI doesn't hang on gated/private/missing models.
33
 
34
  const HF_BASE = "https://huggingface.co";
35
- const FETCH_TIMEOUT_MS = 8000;
 
 
36
 
37
  async function fetchHfJson(modelId, fileName) {
38
  if (typeof modelId !== "string" || !modelId.trim()) {
39
  return { ok: false, error: "missing_model_id" };
40
  }
41
- const url = `${HF_BASE}/${encodeURI(modelId.trim())}/raw/main/${fileName}`;
 
 
 
 
 
 
 
 
42
  const controller = new AbortController();
43
  const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
44
  try {
 
32
  // the UI doesn't hang on gated/private/missing models.
33
 
34
  const HF_BASE = "https://huggingface.co";
35
+ // 15s timeout — Llama-3.x tokenizer.json is ~17 MB via LFS-CDN and the
36
+ // fetch can take 3-8s on first hit (cold cache). 8s was too tight.
37
+ const FETCH_TIMEOUT_MS = 15000;
38
 
39
  async function fetchHfJson(modelId, fileName) {
40
  if (typeof modelId !== "string" || !modelId.trim()) {
41
  return { ok: false, error: "missing_model_id" };
42
  }
43
+ // Use `/resolve/main/` (NOT `/raw/main/`) so we get the actual content
44
+ // for LFS-tracked artifacts. Llama-3.x tokenizer.json is ~17 MB and
45
+ // stored via Git-LFS — `/raw/main/` returns the LFS POINTER text
46
+ // ("version https://git-lfs.github.com/spec/v1\noid sha256:..."),
47
+ // which JSON.parse rejects, leaving the linter with empty vocabs and
48
+ // a silent false-fail. `/resolve/main/` redirects through HF's CDN
49
+ // for LFS files and serves small files (config.json) unchanged. CORS
50
+ // is granted for both via Access-Control-Allow-Origin headers.
51
+ const url = `${HF_BASE}/${encodeURI(modelId.trim())}/resolve/main/${fileName}`;
52
  const controller = new AbortController();
53
  const timer = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
54
  try {