Spaces:
Running
Fix re-index timeout: switch to SSE stream + add progress bar
Browse filesRe-index was using POST /ingest (sync) which times out for large repos
(600+ chunks take several minutes to re-embed). Fix:
Backend:
- Add force=true query param to GET /ingest/stream so it supports
full re-ingestion, not just initial index
- Update contextual_at timestamp when force=true on the stream path
Frontend:
- Rewrite handleReindex to use EventSource (SSE) with force=true,
same as the initial ingest flow β no more timeout failures
- Add per-repo progress % state (reindexPct) mapped from ingest steps:
fetching=10% β filtering=22% β chunking=38% β embedding=75% β
storing=90% β done=100%
- Add a 2px progress bar that fills across the bottom of the repo card
during re-indexing, with a glowing accent color and smooth easing
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- backend/routers/ingestion.py +8 -3
- ui/src/components/Sidebar.jsx +48 -12
- ui/src/index.css +17 -0
|
@@ -73,12 +73,14 @@ async def ingest_repo(
|
|
| 73 |
|
| 74 |
|
| 75 |
@router.get("/ingest/stream")
|
| 76 |
-
async def ingest_stream(repo: str, request: Request):
|
| 77 |
"""
|
| 78 |
Stream ingestion progress as Server-Sent Events (SSE).
|
| 79 |
|
| 80 |
Each event: { "step": "fetching|filtering|chunking|embedding|storing|done|error",
|
| 81 |
"detail": "human-readable message" }
|
|
|
|
|
|
|
| 82 |
"""
|
| 83 |
check_rate_limit(request)
|
| 84 |
|
|
@@ -90,12 +92,15 @@ async def ingest_stream(repo: str, request: Request):
|
|
| 90 |
|
| 91 |
async def _run():
|
| 92 |
try:
|
| 93 |
-
await asyncio.to_thread(services.ingestion.ingest, repo,
|
| 94 |
if services.diagram:
|
| 95 |
services.diagram.invalidate(repo)
|
| 96 |
if services.repo_map:
|
| 97 |
services.repo_map.invalidate(repo)
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
| 99 |
except Exception as e:
|
| 100 |
loop.call_soon_threadsafe(queue.put_nowait, {"step": "error", "detail": str(e)})
|
| 101 |
finally:
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
@router.get("/ingest/stream")
|
| 76 |
+
async def ingest_stream(repo: str, request: Request, force: bool = False):
|
| 77 |
"""
|
| 78 |
Stream ingestion progress as Server-Sent Events (SSE).
|
| 79 |
|
| 80 |
Each event: { "step": "fetching|filtering|chunking|embedding|storing|done|error",
|
| 81 |
"detail": "human-readable message" }
|
| 82 |
+
|
| 83 |
+
force=true deletes and re-ingests from scratch (used by the re-index button).
|
| 84 |
"""
|
| 85 |
check_rate_limit(request)
|
| 86 |
|
|
|
|
| 92 |
|
| 93 |
async def _run():
|
| 94 |
try:
|
| 95 |
+
await asyncio.to_thread(services.ingestion.ingest, repo, force, _progress)
|
| 96 |
if services.diagram:
|
| 97 |
services.diagram.invalidate(repo)
|
| 98 |
if services.repo_map:
|
| 99 |
services.repo_map.invalidate(repo)
|
| 100 |
+
now = datetime.now(timezone.utc).isoformat()
|
| 101 |
+
repo_indexed_at[repo] = now
|
| 102 |
+
if force:
|
| 103 |
+
repo_contextual_at[repo] = now
|
| 104 |
except Exception as e:
|
| 105 |
loop.call_soon_threadsafe(queue.put_nowait, {"step": "error", "detail": str(e)})
|
| 106 |
finally:
|
|
@@ -116,6 +116,7 @@ export default function Sidebar({ repos, reposLoading, activeRepo, onSelectRepo,
|
|
| 116 |
const [isIngesting, setIsIngesting] = useState(false);
|
| 117 |
const [reindexing, setReindexing] = useState(null); // slug currently re-indexing
|
| 118 |
const [reindexDone, setReindexDone] = useState({}); // slug β bool (just finished)
|
|
|
|
| 119 |
const [sessionSearch, setSessionSearch] = useState(""); // filter text for sessions list
|
| 120 |
|
| 121 |
// Load MCP status once on mount
|
|
@@ -187,23 +188,50 @@ export default function Sidebar({ repos, reposLoading, activeRepo, onSelectRepo,
|
|
| 187 |
}
|
| 188 |
}
|
| 189 |
|
| 190 |
-
|
| 191 |
e.stopPropagation();
|
| 192 |
if (reindexing) return;
|
| 193 |
setReindexing(slug);
|
| 194 |
setReindexDone(prev => ({ ...prev, [slug]: false }));
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
setReindexing(null);
|
| 206 |
-
|
|
|
|
|
|
|
| 207 |
}
|
| 208 |
|
| 209 |
const SEARCH_MODE_TITLES = {
|
|
@@ -454,11 +482,13 @@ export default function Sidebar({ repos, reposLoading, activeRepo, onSelectRepo,
|
|
| 454 |
const staleness = stalenessLevel(r.indexed_at);
|
| 455 |
const isReindexingThis = reindexing === r.slug;
|
| 456 |
const justDone = reindexDone[r.slug];
|
|
|
|
| 457 |
return (
|
| 458 |
<div
|
| 459 |
key={r.slug}
|
| 460 |
className={`repo-item ${activeRepo === r.slug ? "active" : ""}`}
|
| 461 |
onClick={() => onSelectRepo(activeRepo === r.slug ? null : r.slug)}
|
|
|
|
| 462 |
>
|
| 463 |
<div className="repo-item-main">
|
| 464 |
{/* GitHub mark β reinforces these are GitHub repos without taking space */}
|
|
@@ -532,6 +562,12 @@ export default function Sidebar({ repos, reposLoading, activeRepo, onSelectRepo,
|
|
| 532 |
>Γ</button>
|
| 533 |
)}
|
| 534 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
</div>
|
| 536 |
);
|
| 537 |
})}
|
|
|
|
| 116 |
const [isIngesting, setIsIngesting] = useState(false);
|
| 117 |
const [reindexing, setReindexing] = useState(null); // slug currently re-indexing
|
| 118 |
const [reindexDone, setReindexDone] = useState({}); // slug β bool (just finished)
|
| 119 |
+
const [reindexPct, setReindexPct] = useState({}); // slug β 0-100 progress %
|
| 120 |
const [sessionSearch, setSessionSearch] = useState(""); // filter text for sessions list
|
| 121 |
|
| 122 |
// Load MCP status once on mount
|
|
|
|
| 188 |
}
|
| 189 |
}
|
| 190 |
|
| 191 |
+
function handleReindex(e, slug) {
|
| 192 |
e.stopPropagation();
|
| 193 |
if (reindexing) return;
|
| 194 |
setReindexing(slug);
|
| 195 |
setReindexDone(prev => ({ ...prev, [slug]: false }));
|
| 196 |
+
setReindexPct(prev => ({ ...prev, [slug]: 5 }));
|
| 197 |
+
|
| 198 |
+
// Map ingestion steps to approximate % complete so the bar fills meaningfully.
|
| 199 |
+
// Embedding is the longest step (~60% of total time), so it gets the most range.
|
| 200 |
+
const STEP_PCT = { fetching: 10, filtering: 22, chunking: 38, embedding: 75, storing: 90, done: 100 };
|
| 201 |
+
|
| 202 |
+
// Use EventSource (GET SSE) instead of a POST fetch so the connection never
|
| 203 |
+
// times out β large repos take several minutes to re-embed. The backend sends
|
| 204 |
+
// keepalive pings every 15s to prevent proxy idle-disconnect.
|
| 205 |
+
const es = new EventSource(`${BASE}/ingest/stream?repo=${encodeURIComponent(`https://github.com/${slug}`)}&force=true`);
|
| 206 |
+
|
| 207 |
+
es.onmessage = (ev) => {
|
| 208 |
+
const event = JSON.parse(ev.data);
|
| 209 |
+
const pct = STEP_PCT[event.step] ?? null;
|
| 210 |
+
if (pct !== null) setReindexPct(prev => ({ ...prev, [slug]: pct }));
|
| 211 |
+
|
| 212 |
+
if (event.step === "done") {
|
| 213 |
+
es.close();
|
| 214 |
+
setReindexing(null);
|
| 215 |
+
setReindexDone(prev => ({ ...prev, [slug]: true }));
|
| 216 |
+
onReposChange();
|
| 217 |
+
setTimeout(() => {
|
| 218 |
+
setReindexDone(prev => { const n = {...prev}; delete n[slug]; return n; });
|
| 219 |
+
setReindexPct(prev => { const n = {...prev}; delete n[slug]; return n; });
|
| 220 |
+
}, 3000);
|
| 221 |
+
} else if (event.step === "error") {
|
| 222 |
+
es.close();
|
| 223 |
+
setReindexing(null);
|
| 224 |
+
setReindexPct(prev => { const n = {...prev}; delete n[slug]; return n; });
|
| 225 |
+
setStatus({ type: "error", text: `Re-index failed: ${event.detail}` });
|
| 226 |
+
}
|
| 227 |
+
};
|
| 228 |
+
|
| 229 |
+
es.onerror = () => {
|
| 230 |
+
es.close();
|
| 231 |
setReindexing(null);
|
| 232 |
+
setReindexPct(prev => { const n = {...prev}; delete n[slug]; return n; });
|
| 233 |
+
setStatus({ type: "error", text: "Re-index failed: connection lost" });
|
| 234 |
+
};
|
| 235 |
}
|
| 236 |
|
| 237 |
const SEARCH_MODE_TITLES = {
|
|
|
|
| 482 |
const staleness = stalenessLevel(r.indexed_at);
|
| 483 |
const isReindexingThis = reindexing === r.slug;
|
| 484 |
const justDone = reindexDone[r.slug];
|
| 485 |
+
const pct = reindexPct[r.slug] ?? null;
|
| 486 |
return (
|
| 487 |
<div
|
| 488 |
key={r.slug}
|
| 489 |
className={`repo-item ${activeRepo === r.slug ? "active" : ""}`}
|
| 490 |
onClick={() => onSelectRepo(activeRepo === r.slug ? null : r.slug)}
|
| 491 |
+
style={{ position: "relative", overflow: "hidden" }}
|
| 492 |
>
|
| 493 |
<div className="repo-item-main">
|
| 494 |
{/* GitHub mark β reinforces these are GitHub repos without taking space */}
|
|
|
|
| 562 |
>Γ</button>
|
| 563 |
)}
|
| 564 |
</div>
|
| 565 |
+
{/* Progress bar β shown while re-indexing, fills from left to right */}
|
| 566 |
+
{pct !== null && (
|
| 567 |
+
<div className="repo-reindex-progress">
|
| 568 |
+
<div className="repo-reindex-progress-bar" style={{ width: `${pct}%` }} />
|
| 569 |
+
</div>
|
| 570 |
+
)}
|
| 571 |
</div>
|
| 572 |
);
|
| 573 |
})}
|
|
@@ -3335,6 +3335,23 @@ textarea:focus-visible {
|
|
| 3335 |
to { transform: rotate(360deg); }
|
| 3336 |
}
|
| 3337 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3338 |
/* ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3339 |
FEATURE 3: SESSION SEARCH + EDITABLE TITLE
|
| 3340 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|
|
|
|
| 3335 |
to { transform: rotate(360deg); }
|
| 3336 |
}
|
| 3337 |
|
| 3338 |
+
/* Re-index progress bar β sits flush at the bottom of the repo card */
|
| 3339 |
+
.repo-reindex-progress {
|
| 3340 |
+
position: absolute;
|
| 3341 |
+
bottom: 0;
|
| 3342 |
+
left: 0;
|
| 3343 |
+
right: 0;
|
| 3344 |
+
height: 2px;
|
| 3345 |
+
background: var(--surface-4);
|
| 3346 |
+
}
|
| 3347 |
+
.repo-reindex-progress-bar {
|
| 3348 |
+
height: 100%;
|
| 3349 |
+
background: var(--accent);
|
| 3350 |
+
border-radius: 0 1px 1px 0;
|
| 3351 |
+
transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1);
|
| 3352 |
+
box-shadow: 0 0 6px var(--accent);
|
| 3353 |
+
}
|
| 3354 |
+
|
| 3355 |
/* ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 3356 |
FEATURE 3: SESSION SEARCH + EDITABLE TITLE
|
| 3357 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */
|