Spaces:
Running
Running
Fix re-index stuck: emit per-batch progress during contextual retrieval
Browse filesThe progress bar was frozen at 38% (chunking) for the entire 10+ minute
contextual retrieval phase because _add_context had no progress callbacks.
Backend:
- Thread progress callback through to _add_context
- Emit "contextualizing" step every 20 chunks with "X / Y" in the detail
so the frontend can compute an exact percentage
Frontend:
- Parse "X / Y" from contextualizing detail to map 38-78% dynamically
- Adjust STEP_PCT: embedding=80%, storing=92% (shifted right to make room)
UI:
- Add a shimmer animation to the progress bar so it visually pulses
between batch events, making it clear the operation is still running
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- backend/services/ingestion_service.py +21 -5
- ui/src/components/Sidebar.jsx +16 -3
- ui/src/index.css +13 -0
backend/services/ingestion_service.py
CHANGED
|
@@ -143,12 +143,21 @@ class IngestionService:
|
|
| 143 |
# Only runs on force=True re-ingestion to avoid slowing down first-time indexing.
|
| 144 |
if force and hasattr(self, '_gen') and self._gen is not None:
|
| 145 |
top_n = settings.contextual_top_n # 0 = all chunks
|
| 146 |
-
|
| 147 |
_emit("contextualizing",
|
| 148 |
-
f"
|
| 149 |
-
print(f"Contextual retrieval: enriching {
|
| 150 |
now = datetime.now(timezone.utc).isoformat()
|
| 151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
n_enriched = sum(1 for c in chunks if c.get('_contextualised'))
|
| 153 |
print(f" Context added to {n_enriched} chunks")
|
| 154 |
|
|
@@ -259,6 +268,7 @@ def _add_context(
|
|
| 259 |
gen,
|
| 260 |
top_n: int = 0,
|
| 261 |
contextual_at: str = None,
|
|
|
|
| 262 |
) -> list[dict]:
|
| 263 |
"""
|
| 264 |
Contextual Retrieval: prepend a short LLM-generated context sentence to
|
|
@@ -313,7 +323,9 @@ def _add_context(
|
|
| 313 |
result[i] = dict(result[i])
|
| 314 |
result[i]["contextual_at"] = contextual_at
|
| 315 |
|
| 316 |
-
|
|
|
|
|
|
|
| 317 |
# Use result[idx] (which already has contextual_at stamped) as the base
|
| 318 |
chunk = result[idx]
|
| 319 |
filepath = chunk.get("filepath", "")
|
|
@@ -340,4 +352,8 @@ def _add_context(
|
|
| 340 |
print(f" Context skipped for {filepath}:{chunk.get('name', '?')} β {e}")
|
| 341 |
# Leave result[idx] unchanged β graceful fallback to raw chunk
|
| 342 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
return result
|
|
|
|
| 143 |
# Only runs on force=True re-ingestion to avoid slowing down first-time indexing.
|
| 144 |
if force and hasattr(self, '_gen') and self._gen is not None:
|
| 145 |
top_n = settings.contextual_top_n # 0 = all chunks
|
| 146 |
+
total_ctx = len(chunks) if top_n == 0 else min(top_n, len(chunks))
|
| 147 |
_emit("contextualizing",
|
| 148 |
+
f"Adding context to chunks⦠0 / {total_ctx}")
|
| 149 |
+
print(f"Contextual retrieval: enriching {total_ctx} chunks with context...")
|
| 150 |
now = datetime.now(timezone.utc).isoformat()
|
| 151 |
+
|
| 152 |
+
# Emit progress every 20 chunks so the UI bar advances visibly
|
| 153 |
+
def _ctx_progress(done: int, total: int) -> None:
|
| 154 |
+
_emit("contextualizing", f"Adding context to chunks⦠{done} / {total}")
|
| 155 |
+
|
| 156 |
+
chunks = _add_context(
|
| 157 |
+
chunks, file_dicts, self._gen,
|
| 158 |
+
top_n=top_n, contextual_at=now,
|
| 159 |
+
progress=_ctx_progress,
|
| 160 |
+
)
|
| 161 |
n_enriched = sum(1 for c in chunks if c.get('_contextualised'))
|
| 162 |
print(f" Context added to {n_enriched} chunks")
|
| 163 |
|
|
|
|
| 268 |
gen,
|
| 269 |
top_n: int = 0,
|
| 270 |
contextual_at: str = None,
|
| 271 |
+
progress: callable = None,
|
| 272 |
) -> list[dict]:
|
| 273 |
"""
|
| 274 |
Contextual Retrieval: prepend a short LLM-generated context sentence to
|
|
|
|
| 323 |
result[i] = dict(result[i])
|
| 324 |
result[i]["contextual_at"] = contextual_at
|
| 325 |
|
| 326 |
+
_REPORT_EVERY = 20 # emit a progress event every N chunks to avoid SSE flood
|
| 327 |
+
|
| 328 |
+
for done, (idx, _chunk) in enumerate(ranked[:limit], start=1):
|
| 329 |
# Use result[idx] (which already has contextual_at stamped) as the base
|
| 330 |
chunk = result[idx]
|
| 331 |
filepath = chunk.get("filepath", "")
|
|
|
|
| 352 |
print(f" Context skipped for {filepath}:{chunk.get('name', '?')} β {e}")
|
| 353 |
# Leave result[idx] unchanged β graceful fallback to raw chunk
|
| 354 |
|
| 355 |
+
# Report progress every N chunks so the UI bar advances visibly
|
| 356 |
+
if progress and (done % _REPORT_EVERY == 0 or done == limit):
|
| 357 |
+
progress(done, limit)
|
| 358 |
+
|
| 359 |
return result
|
ui/src/components/Sidebar.jsx
CHANGED
|
@@ -196,8 +196,8 @@ export default function Sidebar({ repos, reposLoading, activeRepo, onSelectRepo,
|
|
| 196 |
setReindexPct(prev => ({ ...prev, [slug]: 5 }));
|
| 197 |
|
| 198 |
// Map ingestion steps to approximate % complete so the bar fills meaningfully.
|
| 199 |
-
//
|
| 200 |
-
const STEP_PCT = { fetching: 10, filtering: 22, chunking: 38, embedding:
|
| 201 |
|
| 202 |
// Use EventSource (GET SSE) instead of a POST fetch so the connection never
|
| 203 |
// times out β large repos take several minutes to re-embed. The backend sends
|
|
@@ -206,7 +206,20 @@ export default function Sidebar({ repos, reposLoading, activeRepo, onSelectRepo,
|
|
| 206 |
|
| 207 |
es.onmessage = (ev) => {
|
| 208 |
const event = JSON.parse(ev.data);
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
if (pct !== null) setReindexPct(prev => ({ ...prev, [slug]: pct }));
|
| 211 |
|
| 212 |
if (event.step === "done") {
|
|
|
|
| 196 |
setReindexPct(prev => ({ ...prev, [slug]: 5 }));
|
| 197 |
|
| 198 |
// Map ingestion steps to approximate % complete so the bar fills meaningfully.
|
| 199 |
+
// "contextualizing" is dynamic β we compute it from the "X / Y" in the detail.
|
| 200 |
+
const STEP_PCT = { fetching: 10, filtering: 22, chunking: 38, embedding: 80, storing: 92, done: 100 };
|
| 201 |
|
| 202 |
// Use EventSource (GET SSE) instead of a POST fetch so the connection never
|
| 203 |
// times out β large repos take several minutes to re-embed. The backend sends
|
|
|
|
| 206 |
|
| 207 |
es.onmessage = (ev) => {
|
| 208 |
const event = JSON.parse(ev.data);
|
| 209 |
+
let pct = STEP_PCT[event.step] ?? null;
|
| 210 |
+
|
| 211 |
+
// Contextualizing fires many times with "X / Y" in the detail.
|
| 212 |
+
// Map it to 38β78% range so the bar visibly advances during this long phase.
|
| 213 |
+
if (event.step === "contextualizing" && event.detail) {
|
| 214 |
+
const m = event.detail.match(/(\d+)\s*\/\s*(\d+)/);
|
| 215 |
+
if (m) {
|
| 216 |
+
const [done, total] = [parseInt(m[1]), parseInt(m[2])];
|
| 217 |
+
pct = Math.round(38 + (done / total) * 40);
|
| 218 |
+
} else {
|
| 219 |
+
pct = 40; // initial "contextualizing" event before first batch
|
| 220 |
+
}
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
if (pct !== null) setReindexPct(prev => ({ ...prev, [slug]: pct }));
|
| 224 |
|
| 225 |
if (event.step === "done") {
|
ui/src/index.css
CHANGED
|
@@ -3350,6 +3350,19 @@ textarea:focus-visible {
|
|
| 3350 |
border-radius: 0 1px 1px 0;
|
| 3351 |
transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1);
|
| 3352 |
box-shadow: 0 0 6px var(--accent);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3353 |
}
|
| 3354 |
|
| 3355 |
/* ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 3350 |
border-radius: 0 1px 1px 0;
|
| 3351 |
transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1);
|
| 3352 |
box-shadow: 0 0 6px var(--accent);
|
| 3353 |
+
/* Shimmer shows the bar is alive even when progress is slow */
|
| 3354 |
+
background-image: linear-gradient(
|
| 3355 |
+
90deg,
|
| 3356 |
+
var(--accent) 0%,
|
| 3357 |
+
rgba(160, 120, 255, 0.9) 45%,
|
| 3358 |
+
var(--accent) 100%
|
| 3359 |
+
);
|
| 3360 |
+
background-size: 200% 100%;
|
| 3361 |
+
animation: reindex-shimmer 1.8s ease-in-out infinite;
|
| 3362 |
+
}
|
| 3363 |
+
@keyframes reindex-shimmer {
|
| 3364 |
+
0% { background-position: 200% center; }
|
| 3365 |
+
100% { background-position: -200% center; }
|
| 3366 |
}
|
| 3367 |
|
| 3368 |
/* ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|