Spaces:
Running
Running
Commit ·
6ec910c
1
Parent(s): 910ed77
fix(webllm): use smaller Qwen2.5-0.5B (~350MB) + storage.persist() + fallback to SmolLM2-360M
Browse filesResolves QuotaExceededError users hit when Llama-3.2-1B-q4f32 (~700MB)
exceeds default IndexedDB quota in some browsers/configurations.
- Default model: Qwen2.5-0.5B-Instruct-q4f16_1 (~350MB, capable enough for routing+synthesis)
- Request navigator.storage.persist() before model load
- Auto-fallback to SmolLM2-360M-Instruct-q4f16_1 if quota error
- Helpful error message with mitigation tips if both fail
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- js/main.js +41 -5
js/main.js
CHANGED
|
@@ -7,7 +7,9 @@
|
|
| 7 |
|
| 8 |
const TAF_BROWSER_URL = "python/taf_browser.py";
|
| 9 |
const ENABLE_WEBLLM = true;
|
| 10 |
-
|
|
|
|
|
|
|
| 11 |
|
| 12 |
const $ = (id) => document.getElementById(id);
|
| 13 |
|
|
@@ -463,11 +465,45 @@ function escapeHtml(s) {
|
|
| 463 |
// ════════════════════════════════════════════════════════════════════
|
| 464 |
async function loadWebLLM() {
|
| 465 |
if (state.webllm) return state.webllm;
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
const { CreateMLCEngine } = await import("https://esm.run/@mlc-ai/web-llm");
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 471 |
return state.webllm;
|
| 472 |
}
|
| 473 |
|
|
|
|
| 7 |
|
| 8 |
const TAF_BROWSER_URL = "python/taf_browser.py";
|
| 9 |
const ENABLE_WEBLLM = true;
|
| 10 |
+
// Smaller model = fits in default browser quota (~350MB vs 700MB for Llama-1B)
|
| 11 |
+
const WEBLLM_MODEL = "Qwen2.5-0.5B-Instruct-q4f16_1-MLC";
|
| 12 |
+
const WEBLLM_FALLBACK = "SmolLM2-360M-Instruct-q4f16_1-MLC";
|
| 13 |
|
| 14 |
const $ = (id) => document.getElementById(id);
|
| 15 |
|
|
|
|
| 465 |
// ════════════════════════════════════════════════════════════════════
|
| 466 |
async function loadWebLLM() {
|
| 467 |
if (state.webllm) return state.webllm;
|
| 468 |
+
|
| 469 |
+
// Request persistent storage to avoid quota issues with cached model weights
|
| 470 |
+
if (navigator.storage && navigator.storage.persist) {
|
| 471 |
+
try {
|
| 472 |
+
const persistent = await navigator.storage.persist();
|
| 473 |
+
console.log(persistent ? "Persistent storage granted" : "Persistent storage denied");
|
| 474 |
+
} catch (e) {
|
| 475 |
+
console.warn("storage.persist() failed:", e);
|
| 476 |
+
}
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
setStatus(`⏳ Loading WebLLM library + ${WEBLLM_MODEL.split("-")[0]} (~350MB first time, cached after)...`);
|
| 480 |
const { CreateMLCEngine } = await import("https://esm.run/@mlc-ai/web-llm");
|
| 481 |
+
|
| 482 |
+
const tryLoad = async (modelId) => {
|
| 483 |
+
return await CreateMLCEngine(modelId, {
|
| 484 |
+
initProgressCallback: (info) => setStatus(`⏳ ${info.text || "Loading model..."}`),
|
| 485 |
+
});
|
| 486 |
+
};
|
| 487 |
+
|
| 488 |
+
try {
|
| 489 |
+
state.webllm = await tryLoad(WEBLLM_MODEL);
|
| 490 |
+
} catch (err) {
|
| 491 |
+
if (String(err).includes("QuotaExceeded") || String(err).includes("storage")) {
|
| 492 |
+
setStatus(`⚠ Quota exceeded for ${WEBLLM_MODEL}. Trying smaller fallback ${WEBLLM_FALLBACK}...`);
|
| 493 |
+
try {
|
| 494 |
+
state.webllm = await tryLoad(WEBLLM_FALLBACK);
|
| 495 |
+
} catch (err2) {
|
| 496 |
+
throw new Error(
|
| 497 |
+
`Both models failed. Browser storage too constrained. ` +
|
| 498 |
+
`Try: (1) Settings → Privacy → Site settings → allow more storage for this site, ` +
|
| 499 |
+
`(2) clear browser cache, (3) use Chrome/Edge in non-incognito mode. ` +
|
| 500 |
+
`Original error: ${err2.message || err2}`
|
| 501 |
+
);
|
| 502 |
+
}
|
| 503 |
+
} else {
|
| 504 |
+
throw err;
|
| 505 |
+
}
|
| 506 |
+
}
|
| 507 |
return state.webllm;
|
| 508 |
}
|
| 509 |
|