umanggarg Claude Sonnet 4.6 commited on
Commit
ea8edd8
·
1 Parent(s): 9d3e9f6

Fix 5 bugs found in expert review

Browse files

- ingestion_service.py: NameError on 'filtered' → use raw_files (would crash every successful ingest)
- retrieval.py: relevance_threshold was all-or-nothing → now filters per result
- generation.py: Groq/Anthropic client created per-request → cached as self._client
- main.py + api.js: double LLM call (POST /query + GET /query/stream) eliminated
- stream endpoint now emits 'event: meta' with sources+query_type before tokens
- frontend listens with es.addEventListener('meta', ...) instead of second fetch
- App.jsx: textarea auto-grows with content (ref + scrollHeight effect)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

backend/main.py CHANGED
@@ -300,9 +300,18 @@ async def query_stream(
300
  context = retrieval_svc.format_context(results)
301
 
302
  def token_stream():
 
 
 
 
 
 
 
 
 
 
303
  for token in generation_svc.stream(question, context, query_type):
304
- # Escape newlines: SSE uses \n\n as event delimiter.
305
- # A bare newline in the token would split the event prematurely.
306
  safe_token = token.replace("\n", "\\n")
307
  yield f"data: {safe_token}\n\n"
308
  yield "data: [DONE]\n\n"
 
300
  context = retrieval_svc.format_context(results)
301
 
302
  def token_stream():
303
+ import json
304
+ # First event: send sources + query_type as structured JSON so the
305
+ # frontend gets everything in one SSE connection (no second POST /query call).
306
+ meta = {
307
+ "sources": [CodeChunk(**r).model_dump() for r in results],
308
+ "query_type": query_type,
309
+ }
310
+ yield f"event: meta\ndata: {json.dumps(meta)}\n\n"
311
+
312
+ # Subsequent events: stream tokens
313
  for token in generation_svc.stream(question, context, query_type):
314
+ # Escape newlines SSE uses \n\n as event delimiter
 
315
  safe_token = token.replace("\n", "\\n")
316
  yield f"data: {safe_token}\n\n"
317
  yield "data: [DONE]\n\n"
backend/services/generation.py CHANGED
@@ -140,13 +140,19 @@ class GenerationService:
140
  self.provider = self._init_provider()
141
 
142
  def _init_provider(self) -> str:
143
- """Pick Groq or Anthropic based on which key is configured."""
 
 
 
 
144
  if settings.groq_api_key:
145
- import groq as groq_sdk # noqa: F401 — check it's importable
 
146
  print("Generation: using Groq (llama-3.3-70b-versatile)")
147
  return "groq"
148
  elif settings.anthropic_api_key:
149
- import anthropic as anthropic_sdk # noqa: F401
 
150
  print("Generation: using Anthropic (claude-haiku-4-5)")
151
  return "anthropic"
152
  else:
@@ -200,9 +206,7 @@ class GenerationService:
200
  # ── Groq implementation ────────────────────────────────────────────────────
201
 
202
  def _groq_complete(self, system: str, prompt: str, params: dict) -> str:
203
- from groq import Groq
204
- client = Groq(api_key=settings.groq_api_key)
205
- response = client.chat.completions.create(
206
  model="llama-3.3-70b-versatile",
207
  messages=[
208
  {"role": "system", "content": system},
@@ -214,9 +218,7 @@ class GenerationService:
214
  return response.choices[0].message.content
215
 
216
  def _groq_stream(self, system: str, prompt: str, params: dict) -> Iterator[str]:
217
- from groq import Groq
218
- client = Groq(api_key=settings.groq_api_key)
219
- stream = client.chat.completions.create(
220
  model="llama-3.3-70b-versatile",
221
  messages=[
222
  {"role": "system", "content": system},
@@ -234,9 +236,7 @@ class GenerationService:
234
  # ── Anthropic implementation ───────────────────────────────────────────────
235
 
236
  def _anthropic_complete(self, system: str, prompt: str, params: dict) -> str:
237
- import anthropic
238
- client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
239
- response = client.messages.create(
240
  model="claude-haiku-4-5-20251001",
241
  system=system,
242
  messages=[{"role": "user", "content": prompt}],
@@ -246,9 +246,7 @@ class GenerationService:
246
  return response.content[0].text
247
 
248
  def _anthropic_stream(self, system: str, prompt: str, params: dict) -> Iterator[str]:
249
- import anthropic
250
- client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
251
- with client.messages.stream(
252
  model="claude-haiku-4-5-20251001",
253
  system=system,
254
  messages=[{"role": "user", "content": prompt}],
 
140
  self.provider = self._init_provider()
141
 
142
  def _init_provider(self) -> str:
143
+ """Pick Groq or Anthropic, and create the client once for reuse.
144
+
145
+ Creating a client per-request wastes resources — each instantiation
146
+ sets up an httpx session. We store it on self and reuse across all calls.
147
+ """
148
  if settings.groq_api_key:
149
+ from groq import Groq
150
+ self._client = Groq(api_key=settings.groq_api_key)
151
  print("Generation: using Groq (llama-3.3-70b-versatile)")
152
  return "groq"
153
  elif settings.anthropic_api_key:
154
+ import anthropic
155
+ self._client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
156
  print("Generation: using Anthropic (claude-haiku-4-5)")
157
  return "anthropic"
158
  else:
 
206
  # ── Groq implementation ────────────────────────────────────────────────────
207
 
208
  def _groq_complete(self, system: str, prompt: str, params: dict) -> str:
209
+ response = self._client.chat.completions.create(
 
 
210
  model="llama-3.3-70b-versatile",
211
  messages=[
212
  {"role": "system", "content": system},
 
218
  return response.choices[0].message.content
219
 
220
  def _groq_stream(self, system: str, prompt: str, params: dict) -> Iterator[str]:
221
+ stream = self._client.chat.completions.create(
 
 
222
  model="llama-3.3-70b-versatile",
223
  messages=[
224
  {"role": "system", "content": system},
 
236
  # ── Anthropic implementation ───────────────────────────────────────────────
237
 
238
  def _anthropic_complete(self, system: str, prompt: str, params: dict) -> str:
239
+ response = self._client.messages.create(
 
 
240
  model="claude-haiku-4-5-20251001",
241
  system=system,
242
  messages=[{"role": "user", "content": prompt}],
 
246
  return response.content[0].text
247
 
248
  def _anthropic_stream(self, system: str, prompt: str, params: dict) -> Iterator[str]:
249
+ with self._client.messages.stream(
 
 
250
  model="claude-haiku-4-5-20251001",
251
  system=system,
252
  messages=[{"role": "user", "content": prompt}],
backend/services/ingestion_service.py CHANGED
@@ -115,13 +115,13 @@ class IngestionService:
115
  total_stored = self.store.count(repo=repo_slug)
116
  message = (
117
  f"Ingested {repo_slug}: "
118
- f"{len(filtered)} files → {len(chunks)} chunks → {total_stored} total stored"
119
  )
120
  print(f"\n✓ {message}")
121
 
122
  return {
123
  "repo": repo_slug,
124
- "files_indexed": len(filtered),
125
  "chunks_stored": len(chunks),
126
  "message": message,
127
  }
 
115
  total_stored = self.store.count(repo=repo_slug)
116
  message = (
117
  f"Ingested {repo_slug}: "
118
+ f"{len(raw_files)} files → {len(chunks)} chunks → {total_stored} total stored"
119
  )
120
  print(f"\n✓ {message}")
121
 
122
  return {
123
  "repo": repo_slug,
124
+ "files_indexed": len(raw_files),
125
  "chunks_stored": len(chunks),
126
  "message": message,
127
  }
retrieval/retrieval.py CHANGED
@@ -105,10 +105,10 @@ class RetrievalService:
105
  else:
106
  results = self._hybrid_search(query, top_k, qdrant_filter)
107
 
108
- # Relevance gate — skip when repo_filter is set (user explicitly chose a repo)
109
- if relevance_threshold > 0 and not repo_filter and results:
110
- if results[0]["score"] < relevance_threshold:
111
- return []
112
 
113
  return results
114
 
 
105
  else:
106
  results = self._hybrid_search(query, top_k, qdrant_filter)
107
 
108
+ # Per-result relevance gate — filter out low-scoring chunks individually.
109
+ # Only applied when no repo_filter (user explicitly chose a repo means any score is valid).
110
+ if relevance_threshold > 0 and not repo_filter:
111
+ results = [r for r in results if r["score"] >= relevance_threshold]
112
 
113
  return results
114
 
ui/.gitignore CHANGED
@@ -22,3 +22,4 @@ dist-ssr
22
  *.njsproj
23
  *.sln
24
  *.sw?
 
 
22
  *.njsproj
23
  *.sln
24
  *.sw?
25
+ .vercel
ui/src/App.jsx CHANGED
@@ -11,9 +11,18 @@ export default function App() {
11
  const [input, setInput] = useState("");
12
  const [streaming, setStreaming] = useState(false);
13
 
14
- const bottomRef = useRef(null);
15
- const scrollRef = useRef(null);
16
- const stopStream = useRef(null); // cleanup fn for active SSE
 
 
 
 
 
 
 
 
 
17
 
18
  // Load repos on mount
19
  const loadRepos = useCallback(async () => {
@@ -159,6 +168,7 @@ export default function App() {
159
  {/* Input */}
160
  <div className="input-bar">
161
  <textarea
 
162
  rows={1}
163
  placeholder={placeholder}
164
  value={input}
 
11
  const [input, setInput] = useState("");
12
  const [streaming, setStreaming] = useState(false);
13
 
14
+ const bottomRef = useRef(null);
15
+ const scrollRef = useRef(null);
16
+ const textareaRef = useRef(null);
17
+ const stopStream = useRef(null); // cleanup fn for active SSE
18
+
19
+ // Auto-grow textarea as user types
20
+ useEffect(() => {
21
+ const el = textareaRef.current;
22
+ if (!el) return;
23
+ el.style.height = "auto";
24
+ el.style.height = `${el.scrollHeight}px`;
25
+ }, [input]);
26
 
27
  // Load repos on mount
28
  const loadRepos = useCallback(async () => {
 
168
  {/* Input */}
169
  <div className="input-bar">
170
  <textarea
171
+ ref={textareaRef}
172
  rows={1}
173
  placeholder={placeholder}
174
  value={input}
ui/src/api.js CHANGED
@@ -28,8 +28,13 @@ export async function deleteRepo(slug) {
28
 
29
  /**
30
  * Stream a query response via SSE.
31
- * Calls onToken(token) for each chunk, onSources(sources) when done,
32
- * and onDone(queryType) at the end.
 
 
 
 
 
33
  */
34
  export function streamQuery({ question, repo, mode, onToken, onSources, onDone, onError }) {
35
  const params = new URLSearchParams({
@@ -39,34 +44,21 @@ export function streamQuery({ question, repo, mode, onToken, onSources, onDone,
39
  ...(repo ? { repo } : {}),
40
  });
41
 
42
- // First fetch sources via POST /query (non-streaming) to get structured data,
43
- // then stream the answer via GET /query/stream for the text tokens.
44
- // We run both in parallel — sources arrive slightly later but the stream starts immediately.
45
-
46
- let queryType = "technical";
47
-
48
- // Kick off the source fetch
49
- fetch(`${BASE}/query`, {
50
- method: "POST",
51
- headers: { "Content-Type": "application/json" },
52
- body: JSON.stringify({ question, repo: repo || null, mode: mode || "hybrid", top_k: 6 }),
53
- })
54
- .then((r) => r.json())
55
- .then((data) => {
56
- onSources(data.sources || [], data.query_type || "technical");
57
- })
58
- .catch(() => onSources([], "technical"));
59
-
60
- // Stream the answer tokens
61
  const es = new EventSource(`${BASE}/query/stream?${params}`);
62
 
 
 
 
 
 
 
 
63
  es.onmessage = (e) => {
64
  if (e.data === "[DONE]") {
65
  es.close();
66
- onDone(queryType);
67
  return;
68
  }
69
- // Unescape newlines that were escaped server-side
70
  const token = e.data.replace(/\\n/g, "\n");
71
  onToken(token);
72
  };
@@ -76,5 +68,5 @@ export function streamQuery({ question, repo, mode, onToken, onSources, onDone,
76
  onError("Connection lost");
77
  };
78
 
79
- return () => es.close(); // return cleanup fn
80
  }
 
28
 
29
  /**
30
  * Stream a query response via SSE.
31
+ *
32
+ * The server sends two event types:
33
+ * event: meta → JSON with { sources, query_type } (arrives before tokens)
34
+ * (default) → token text, or "[DONE]" to signal completion
35
+ *
36
+ * This avoids the previous double-LLM-call pattern where we fired both
37
+ * POST /query and GET /query/stream simultaneously. Now one connection does both.
38
  */
39
  export function streamQuery({ question, repo, mode, onToken, onSources, onDone, onError }) {
40
  const params = new URLSearchParams({
 
44
  ...(repo ? { repo } : {}),
45
  });
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  const es = new EventSource(`${BASE}/query/stream?${params}`);
48
 
49
+ // Named event: sources + query_type arrive in the first frame
50
+ es.addEventListener("meta", (e) => {
51
+ const { sources, query_type } = JSON.parse(e.data);
52
+ onSources(sources || [], query_type || "technical");
53
+ });
54
+
55
+ // Default events: token text
56
  es.onmessage = (e) => {
57
  if (e.data === "[DONE]") {
58
  es.close();
59
+ onDone();
60
  return;
61
  }
 
62
  const token = e.data.replace(/\\n/g, "\n");
63
  onToken(token);
64
  };
 
68
  onError("Connection lost");
69
  };
70
 
71
+ return () => es.close();
72
  }