umanggarg Claude Sonnet 4.6 commited on
Commit
34ba9cf
Β·
1 Parent(s): 1dd81ef

Fix re-index stuck: emit per-batch progress during contextual retrieval

Browse files

The progress bar was frozen at 38% (chunking) for the entire 10+ minute
contextual retrieval phase because _add_context had no progress callbacks.

Backend:
- Thread progress callback through to _add_context
- Emit "contextualizing" step every 20 chunks with "X / Y" in the detail
so the frontend can compute an exact percentage

Frontend:
- Parse "X / Y" from contextualizing detail to map 38-78% dynamically
- Adjust STEP_PCT: embedding=80%, storing=92% (shifted right to make room)

UI:
- Add a shimmer animation to the progress bar so it visually pulses
between batch events, making it clear the operation is still running

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

backend/services/ingestion_service.py CHANGED
@@ -143,12 +143,21 @@ class IngestionService:
143
  # Only runs on force=True re-ingestion to avoid slowing down first-time indexing.
144
  if force and hasattr(self, '_gen') and self._gen is not None:
145
  top_n = settings.contextual_top_n # 0 = all chunks
146
- limit_str = f"all {len(chunks)}" if top_n == 0 else f"top {top_n}"
147
  _emit("contextualizing",
148
- f"Contextual retrieval: adding AI-generated descriptions to {limit_str} chunks…")
149
- print(f"Contextual retrieval: enriching {limit_str} chunks with context...")
150
  now = datetime.now(timezone.utc).isoformat()
151
- chunks = _add_context(chunks, file_dicts, self._gen, top_n=top_n, contextual_at=now)
 
 
 
 
 
 
 
 
 
152
  n_enriched = sum(1 for c in chunks if c.get('_contextualised'))
153
  print(f" Context added to {n_enriched} chunks")
154
 
@@ -259,6 +268,7 @@ def _add_context(
259
  gen,
260
  top_n: int = 0,
261
  contextual_at: str = None,
 
262
  ) -> list[dict]:
263
  """
264
  Contextual Retrieval: prepend a short LLM-generated context sentence to
@@ -313,7 +323,9 @@ def _add_context(
313
  result[i] = dict(result[i])
314
  result[i]["contextual_at"] = contextual_at
315
 
316
- for idx, _chunk in ranked[:limit]:
 
 
317
  # Use result[idx] (which already has contextual_at stamped) as the base
318
  chunk = result[idx]
319
  filepath = chunk.get("filepath", "")
@@ -340,4 +352,8 @@ def _add_context(
340
  print(f" Context skipped for {filepath}:{chunk.get('name', '?')} β€” {e}")
341
  # Leave result[idx] unchanged β€” graceful fallback to raw chunk
342
 
 
 
 
 
343
  return result
 
143
  # Only runs on force=True re-ingestion to avoid slowing down first-time indexing.
144
  if force and hasattr(self, '_gen') and self._gen is not None:
145
  top_n = settings.contextual_top_n # 0 = all chunks
146
+ total_ctx = len(chunks) if top_n == 0 else min(top_n, len(chunks))
147
  _emit("contextualizing",
148
+ f"Adding context to chunks… 0 / {total_ctx}")
149
+ print(f"Contextual retrieval: enriching {total_ctx} chunks with context...")
150
  now = datetime.now(timezone.utc).isoformat()
151
+
152
+ # Emit progress every 20 chunks so the UI bar advances visibly
153
+ def _ctx_progress(done: int, total: int) -> None:
154
+ _emit("contextualizing", f"Adding context to chunks… {done} / {total}")
155
+
156
+ chunks = _add_context(
157
+ chunks, file_dicts, self._gen,
158
+ top_n=top_n, contextual_at=now,
159
+ progress=_ctx_progress,
160
+ )
161
  n_enriched = sum(1 for c in chunks if c.get('_contextualised'))
162
  print(f" Context added to {n_enriched} chunks")
163
 
 
268
  gen,
269
  top_n: int = 0,
270
  contextual_at: str = None,
271
+ progress: callable = None,
272
  ) -> list[dict]:
273
  """
274
  Contextual Retrieval: prepend a short LLM-generated context sentence to
 
323
  result[i] = dict(result[i])
324
  result[i]["contextual_at"] = contextual_at
325
 
326
+ _REPORT_EVERY = 20 # emit a progress event every N chunks to avoid SSE flood
327
+
328
+ for done, (idx, _chunk) in enumerate(ranked[:limit], start=1):
329
  # Use result[idx] (which already has contextual_at stamped) as the base
330
  chunk = result[idx]
331
  filepath = chunk.get("filepath", "")
 
352
  print(f" Context skipped for {filepath}:{chunk.get('name', '?')} β€” {e}")
353
  # Leave result[idx] unchanged β€” graceful fallback to raw chunk
354
 
355
+ # Report progress every N chunks so the UI bar advances visibly
356
+ if progress and (done % _REPORT_EVERY == 0 or done == limit):
357
+ progress(done, limit)
358
+
359
  return result
ui/src/components/Sidebar.jsx CHANGED
@@ -196,8 +196,8 @@ export default function Sidebar({ repos, reposLoading, activeRepo, onSelectRepo,
196
  setReindexPct(prev => ({ ...prev, [slug]: 5 }));
197
 
198
  // Map ingestion steps to approximate % complete so the bar fills meaningfully.
199
- // Embedding is the longest step (~60% of total time), so it gets the most range.
200
- const STEP_PCT = { fetching: 10, filtering: 22, chunking: 38, embedding: 75, storing: 90, done: 100 };
201
 
202
  // Use EventSource (GET SSE) instead of a POST fetch so the connection never
203
  // times out β€” large repos take several minutes to re-embed. The backend sends
@@ -206,7 +206,20 @@ export default function Sidebar({ repos, reposLoading, activeRepo, onSelectRepo,
206
 
207
  es.onmessage = (ev) => {
208
  const event = JSON.parse(ev.data);
209
- const pct = STEP_PCT[event.step] ?? null;
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  if (pct !== null) setReindexPct(prev => ({ ...prev, [slug]: pct }));
211
 
212
  if (event.step === "done") {
 
196
  setReindexPct(prev => ({ ...prev, [slug]: 5 }));
197
 
198
  // Map ingestion steps to approximate % complete so the bar fills meaningfully.
199
+ // "contextualizing" is dynamic β€” we compute it from the "X / Y" in the detail.
200
+ const STEP_PCT = { fetching: 10, filtering: 22, chunking: 38, embedding: 80, storing: 92, done: 100 };
201
 
202
  // Use EventSource (GET SSE) instead of a POST fetch so the connection never
203
  // times out β€” large repos take several minutes to re-embed. The backend sends
 
206
 
207
  es.onmessage = (ev) => {
208
  const event = JSON.parse(ev.data);
209
+ let pct = STEP_PCT[event.step] ?? null;
210
+
211
+ // Contextualizing fires many times with "X / Y" in the detail.
212
+ // Map it to 38–78% range so the bar visibly advances during this long phase.
213
+ if (event.step === "contextualizing" && event.detail) {
214
+ const m = event.detail.match(/(\d+)\s*\/\s*(\d+)/);
215
+ if (m) {
216
+ const [done, total] = [parseInt(m[1]), parseInt(m[2])];
217
+ pct = Math.round(38 + (done / total) * 40);
218
+ } else {
219
+ pct = 40; // initial "contextualizing" event before first batch
220
+ }
221
+ }
222
+
223
  if (pct !== null) setReindexPct(prev => ({ ...prev, [slug]: pct }));
224
 
225
  if (event.step === "done") {
ui/src/index.css CHANGED
@@ -3350,6 +3350,19 @@ textarea:focus-visible {
3350
  border-radius: 0 1px 1px 0;
3351
  transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1);
3352
  box-shadow: 0 0 6px var(--accent);
 
 
 
 
 
 
 
 
 
 
 
 
 
3353
  }
3354
 
3355
  /* ══════════════════════════════════════════════════════════
 
3350
  border-radius: 0 1px 1px 0;
3351
  transition: width 0.6s cubic-bezier(0.4, 0, 0.2, 1);
3352
  box-shadow: 0 0 6px var(--accent);
3353
+ /* Shimmer shows the bar is alive even when progress is slow */
3354
+ background-image: linear-gradient(
3355
+ 90deg,
3356
+ var(--accent) 0%,
3357
+ rgba(160, 120, 255, 0.9) 45%,
3358
+ var(--accent) 100%
3359
+ );
3360
+ background-size: 200% 100%;
3361
+ animation: reindex-shimmer 1.8s ease-in-out infinite;
3362
+ }
3363
+ @keyframes reindex-shimmer {
3364
+ 0% { background-position: 200% center; }
3365
+ 100% { background-position: -200% center; }
3366
  }
3367
 
3368
  /* ══════════════════════════════════════════════════════════