karlexmarin Claude Opus 4.7 (1M context) commited on
Commit
6ec910c
·
1 Parent(s): 910ed77

fix(webllm): use smaller Qwen2.5-0.5B (~350MB) + storage.persist() + fallback to SmolLM2-360M

Browse files

Resolves QuotaExceededError users hit when Llama-3.2-1B-q4f32 (~700MB)
exceeds default IndexedDB quota in some browsers/configurations.

- Default model: Qwen2.5-0.5B-Instruct-q4f16_1 (~350MB, capable enough for routing+synthesis)
- Request navigator.storage.persist() before model load
- Auto-fallback to SmolLM2-360M-Instruct-q4f16_1 if quota error
- Helpful error message with mitigation tips if both fail

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. js/main.js +41 -5
js/main.js CHANGED
@@ -7,7 +7,9 @@
7
 
8
  const TAF_BROWSER_URL = "python/taf_browser.py";
9
  const ENABLE_WEBLLM = true;
10
- const WEBLLM_MODEL = "Llama-3.2-1B-Instruct-q4f32_1-MLC";
 
 
11
 
12
  const $ = (id) => document.getElementById(id);
13
 
@@ -463,11 +465,45 @@ function escapeHtml(s) {
463
  // ════════════════════════════════════════════════════════════════════
464
  async function loadWebLLM() {
465
  if (state.webllm) return state.webllm;
466
- setStatus("⏳ Loading WebLLM library + Llama-3.2-1B (~700MB first time, cached after)...");
 
 
 
 
 
 
 
 
 
 
 
467
  const { CreateMLCEngine } = await import("https://esm.run/@mlc-ai/web-llm");
468
- state.webllm = await CreateMLCEngine(WEBLLM_MODEL, {
469
- initProgressCallback: (info) => setStatus(`⏳ ${info.text || "Loading model..."}`),
470
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  return state.webllm;
472
  }
473
 
 
7
 
8
  const TAF_BROWSER_URL = "python/taf_browser.py";
9
  const ENABLE_WEBLLM = true;
10
+ // Smaller model = fits in default browser quota (~350MB vs 700MB for Llama-1B)
11
+ const WEBLLM_MODEL = "Qwen2.5-0.5B-Instruct-q4f16_1-MLC";
12
+ const WEBLLM_FALLBACK = "SmolLM2-360M-Instruct-q4f16_1-MLC";
13
 
14
  const $ = (id) => document.getElementById(id);
15
 
 
465
  // ════════════════════════════════════════════════════════════════════
466
  async function loadWebLLM() {
467
  if (state.webllm) return state.webllm;
468
+
469
+ // Request persistent storage to avoid quota issues with cached model weights
470
+ if (navigator.storage && navigator.storage.persist) {
471
+ try {
472
+ const persistent = await navigator.storage.persist();
473
+ console.log(persistent ? "Persistent storage granted" : "Persistent storage denied");
474
+ } catch (e) {
475
+ console.warn("storage.persist() failed:", e);
476
+ }
477
+ }
478
+
479
+ setStatus(`⏳ Loading WebLLM library + ${WEBLLM_MODEL.split("-")[0]} (~350MB first time, cached after)...`);
480
  const { CreateMLCEngine } = await import("https://esm.run/@mlc-ai/web-llm");
481
+
482
+ const tryLoad = async (modelId) => {
483
+ return await CreateMLCEngine(modelId, {
484
+ initProgressCallback: (info) => setStatus(`⏳ ${info.text || "Loading model..."}`),
485
+ });
486
+ };
487
+
488
+ try {
489
+ state.webllm = await tryLoad(WEBLLM_MODEL);
490
+ } catch (err) {
491
+ if (String(err).includes("QuotaExceeded") || String(err).includes("storage")) {
492
+ setStatus(`⚠ Quota exceeded for ${WEBLLM_MODEL}. Trying smaller fallback ${WEBLLM_FALLBACK}...`);
493
+ try {
494
+ state.webllm = await tryLoad(WEBLLM_FALLBACK);
495
+ } catch (err2) {
496
+ throw new Error(
497
+ `Both models failed. Browser storage too constrained. ` +
498
+ `Try: (1) Settings → Privacy → Site settings → allow more storage for this site, ` +
499
+ `(2) clear browser cache, (3) use Chrome/Edge in non-incognito mode. ` +
500
+ `Original error: ${err2.message || err2}`
501
+ );
502
+ }
503
+ } else {
504
+ throw err;
505
+ }
506
+ }
507
  return state.webllm;
508
  }
509