Spaces:

umanggarg
/

cartographer

Running

umanggarg Claude Sonnet 4.6 commited on Mar 24

Commit

ea8edd8

1 Parent(s): 9d3e9f6

Fix 5 bugs found in expert review

- ingestion_service.py: NameError on 'filtered' → use raw_files (would crash every successful ingest)
- retrieval.py: relevance_threshold was all-or-nothing → now filters per result
- generation.py: Groq/Anthropic client created per-request → cached as self._client
- main.py + api.js: double LLM call (POST /query + GET /query/stream) eliminated
- stream endpoint now emits 'event: meta' with sources+query_type before tokens
- frontend listens with es.addEventListener('meta', ...) instead of second fetch
- App.jsx: textarea auto-grows with content (ref + scrollHeight effect)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (7) hide show

backend/main.py +11 -2
backend/services/generation.py +13 -15
backend/services/ingestion_service.py +2 -2
retrieval/retrieval.py +4 -4
ui/.gitignore +1 -0
ui/src/App.jsx +13 -3
ui/src/api.js +16 -24

backend/main.py CHANGED Viewed

@@ -300,9 +300,18 @@ async def query_stream(
     context = retrieval_svc.format_context(results)
     def token_stream():
         for token in generation_svc.stream(question, context, query_type):
-            # Escape newlines: SSE uses \n\n as event delimiter.
-            # A bare newline in the token would split the event prematurely.
             safe_token = token.replace("\n", "\\n")
             yield f"data: {safe_token}\n\n"
         yield "data: [DONE]\n\n"

     context = retrieval_svc.format_context(results)
     def token_stream():
+        import json
+        # First event: send sources + query_type as structured JSON so the
+        # frontend gets everything in one SSE connection (no second POST /query call).
+        meta = {
+            "sources":    [CodeChunk(**r).model_dump() for r in results],
+            "query_type": query_type,
+        }
+        yield f"event: meta\ndata: {json.dumps(meta)}\n\n"
+        # Subsequent events: stream tokens
         for token in generation_svc.stream(question, context, query_type):
+            # Escape newlines — SSE uses \n\n as event delimiter
             safe_token = token.replace("\n", "\\n")
             yield f"data: {safe_token}\n\n"
         yield "data: [DONE]\n\n"

backend/services/generation.py CHANGED Viewed

@@ -140,13 +140,19 @@ class GenerationService:
         self.provider = self._init_provider()
     def _init_provider(self) -> str:
-        """Pick Groq or Anthropic based on which key is configured."""
         if settings.groq_api_key:
-            import groq as groq_sdk  # noqa: F401 — check it's importable
             print("Generation: using Groq (llama-3.3-70b-versatile)")
             return "groq"
         elif settings.anthropic_api_key:
-            import anthropic as anthropic_sdk  # noqa: F401
             print("Generation: using Anthropic (claude-haiku-4-5)")
             return "anthropic"
         else:
@@ -200,9 +206,7 @@ class GenerationService:
     # ── Groq implementation ────────────────────────────────────────────────────
     def _groq_complete(self, system: str, prompt: str, params: dict) -> str:
-        from groq import Groq
-        client = Groq(api_key=settings.groq_api_key)
-        response = client.chat.completions.create(
             model="llama-3.3-70b-versatile",
             messages=[
                 {"role": "system", "content": system},
@@ -214,9 +218,7 @@ class GenerationService:
         return response.choices[0].message.content
     def _groq_stream(self, system: str, prompt: str, params: dict) -> Iterator[str]:
-        from groq import Groq
-        client = Groq(api_key=settings.groq_api_key)
-        stream = client.chat.completions.create(
             model="llama-3.3-70b-versatile",
             messages=[
                 {"role": "system", "content": system},
@@ -234,9 +236,7 @@ class GenerationService:
     # ── Anthropic implementation ───────────────────────────────────────────────
     def _anthropic_complete(self, system: str, prompt: str, params: dict) -> str:
-        import anthropic
-        client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
-        response = client.messages.create(
             model="claude-haiku-4-5-20251001",
             system=system,
             messages=[{"role": "user", "content": prompt}],
@@ -246,9 +246,7 @@ class GenerationService:
         return response.content[0].text
     def _anthropic_stream(self, system: str, prompt: str, params: dict) -> Iterator[str]:
-        import anthropic
-        client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
-        with client.messages.stream(
             model="claude-haiku-4-5-20251001",
             system=system,
             messages=[{"role": "user", "content": prompt}],

         self.provider = self._init_provider()
     def _init_provider(self) -> str:
+        """Pick Groq or Anthropic, and create the client once for reuse.
+        Creating a client per-request wastes resources — each instantiation
+        sets up an httpx session. We store it on self and reuse across all calls.
+        """
         if settings.groq_api_key:
+            from groq import Groq
+            self._client = Groq(api_key=settings.groq_api_key)
             print("Generation: using Groq (llama-3.3-70b-versatile)")
             return "groq"
         elif settings.anthropic_api_key:
+            import anthropic
+            self._client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
             print("Generation: using Anthropic (claude-haiku-4-5)")
             return "anthropic"
         else:
     # ── Groq implementation ────────────────────────────────────────────────────
     def _groq_complete(self, system: str, prompt: str, params: dict) -> str:
+        response = self._client.chat.completions.create(
             model="llama-3.3-70b-versatile",
             messages=[
                 {"role": "system", "content": system},
         return response.choices[0].message.content
     def _groq_stream(self, system: str, prompt: str, params: dict) -> Iterator[str]:
+        stream = self._client.chat.completions.create(
             model="llama-3.3-70b-versatile",
             messages=[
                 {"role": "system", "content": system},
     # ── Anthropic implementation ───────────────────────────────────────────────
     def _anthropic_complete(self, system: str, prompt: str, params: dict) -> str:
+        response = self._client.messages.create(
             model="claude-haiku-4-5-20251001",
             system=system,
             messages=[{"role": "user", "content": prompt}],
         return response.content[0].text
     def _anthropic_stream(self, system: str, prompt: str, params: dict) -> Iterator[str]:
+        with self._client.messages.stream(
             model="claude-haiku-4-5-20251001",
             system=system,
             messages=[{"role": "user", "content": prompt}],

backend/services/ingestion_service.py CHANGED Viewed

@@ -115,13 +115,13 @@ class IngestionService:
         total_stored = self.store.count(repo=repo_slug)
         message = (
             f"Ingested {repo_slug}: "
-            f"{len(filtered)} files → {len(chunks)} chunks → {total_stored} total stored"
         )
         print(f"\n✓ {message}")
         return {
             "repo":          repo_slug,
-            "files_indexed": len(filtered),
             "chunks_stored": len(chunks),
             "message":       message,
         }

         total_stored = self.store.count(repo=repo_slug)
         message = (
             f"Ingested {repo_slug}: "
+            f"{len(raw_files)} files → {len(chunks)} chunks → {total_stored} total stored"
         )
         print(f"\n✓ {message}")
         return {
             "repo":          repo_slug,
+            "files_indexed": len(raw_files),
             "chunks_stored": len(chunks),
             "message":       message,
         }

retrieval/retrieval.py CHANGED Viewed

@@ -105,10 +105,10 @@ class RetrievalService:
         else:
             results = self._hybrid_search(query, top_k, qdrant_filter)
-        # Relevance gate — skip when repo_filter is set (user explicitly chose a repo)
-        if relevance_threshold > 0 and not repo_filter and results:
-            if results[0]["score"] < relevance_threshold:
-                return []
         return results

         else:
             results = self._hybrid_search(query, top_k, qdrant_filter)
+        # Per-result relevance gate — filter out low-scoring chunks individually.
+        # Only applied when no repo_filter (user explicitly chose a repo means any score is valid).
+        if relevance_threshold > 0 and not repo_filter:
+            results = [r for r in results if r["score"] >= relevance_threshold]
         return results

ui/.gitignore CHANGED Viewed

@@ -22,3 +22,4 @@ dist-ssr
 *.njsproj
 *.sln
 *.sw?

 *.njsproj
 *.sln
 *.sw?
+.vercel

ui/src/App.jsx CHANGED Viewed

@@ -11,9 +11,18 @@ export default function App() {
   const [input, setInput]           = useState("");
   const [streaming, setStreaming]   = useState(false);
-  const bottomRef  = useRef(null);
-  const scrollRef  = useRef(null);
-  const stopStream = useRef(null); // cleanup fn for active SSE
   // Load repos on mount
   const loadRepos = useCallback(async () => {
@@ -159,6 +168,7 @@ export default function App() {
         {/* Input */}
         <div className="input-bar">
           <textarea
             rows={1}
             placeholder={placeholder}
             value={input}

   const [input, setInput]           = useState("");
   const [streaming, setStreaming]   = useState(false);
+  const bottomRef   = useRef(null);
+  const scrollRef   = useRef(null);
+  const textareaRef = useRef(null);
+  const stopStream  = useRef(null); // cleanup fn for active SSE
+  // Auto-grow textarea as user types
+  useEffect(() => {
+    const el = textareaRef.current;
+    if (!el) return;
+    el.style.height = "auto";
+    el.style.height = `${el.scrollHeight}px`;
+  }, [input]);
   // Load repos on mount
   const loadRepos = useCallback(async () => {
         {/* Input */}
         <div className="input-bar">
           <textarea
+            ref={textareaRef}
             rows={1}
             placeholder={placeholder}
             value={input}

ui/src/api.js CHANGED Viewed

@@ -28,8 +28,13 @@ export async function deleteRepo(slug) {
 /**
  * Stream a query response via SSE.
- * Calls onToken(token) for each chunk, onSources(sources) when done,
- * and onDone(queryType) at the end.
  */
 export function streamQuery({ question, repo, mode, onToken, onSources, onDone, onError }) {
   const params = new URLSearchParams({
@@ -39,34 +44,21 @@ export function streamQuery({ question, repo, mode, onToken, onSources, onDone,
     ...(repo ? { repo } : {}),
   });
-  // First fetch sources via POST /query (non-streaming) to get structured data,
-  // then stream the answer via GET /query/stream for the text tokens.
-  // We run both in parallel — sources arrive slightly later but the stream starts immediately.
-  let queryType = "technical";
-  // Kick off the source fetch
-  fetch(`${BASE}/query`, {
-    method: "POST",
-    headers: { "Content-Type": "application/json" },
-    body: JSON.stringify({ question, repo: repo || null, mode: mode || "hybrid", top_k: 6 }),
-  })
-    .then((r) => r.json())
-    .then((data) => {
-      onSources(data.sources || [], data.query_type || "technical");
-    })
-    .catch(() => onSources([], "technical"));
-  // Stream the answer tokens
   const es = new EventSource(`${BASE}/query/stream?${params}`);
   es.onmessage = (e) => {
     if (e.data === "[DONE]") {
       es.close();
-      onDone(queryType);
       return;
     }
-    // Unescape newlines that were escaped server-side
     const token = e.data.replace(/\\n/g, "\n");
     onToken(token);
   };
@@ -76,5 +68,5 @@ export function streamQuery({ question, repo, mode, onToken, onSources, onDone,
     onError("Connection lost");
   };
-  return () => es.close(); // return cleanup fn
 }

 /**
  * Stream a query response via SSE.
+ *
+ * The server sends two event types:
+ *   event: meta   → JSON with { sources, query_type } (arrives before tokens)
+ *   (default)     → token text, or "[DONE]" to signal completion
+ *
+ * This avoids the previous double-LLM-call pattern where we fired both
+ * POST /query and GET /query/stream simultaneously. Now one connection does both.
  */
 export function streamQuery({ question, repo, mode, onToken, onSources, onDone, onError }) {
   const params = new URLSearchParams({
     ...(repo ? { repo } : {}),
   });
   const es = new EventSource(`${BASE}/query/stream?${params}`);
+  // Named event: sources + query_type arrive in the first frame
+  es.addEventListener("meta", (e) => {
+    const { sources, query_type } = JSON.parse(e.data);
+    onSources(sources || [], query_type || "technical");
+  });
+  // Default events: token text
   es.onmessage = (e) => {
     if (e.data === "[DONE]") {
       es.close();
+      onDone();
       return;
     }
     const token = e.data.replace(/\\n/g, "\n");
     onToken(token);
   };
     onError("Connection lost");
   };
+  return () => es.close();
 }