Spaces:
Running
Phase 5: MCP server + Agentic RAG (ReAct loop)
Browse filesBackend:
- backend/services/agent.py β AgentService with ReAct loop using Anthropic
tool use. Detailed docstrings explain agent vs plain RAG, tool use message
format, and stopping conditions. run() + stream() methods.
- backend/main.py β /agent/query (sync) and /agent/stream (SSE) endpoints.
AgentService initialised at startup only when ANTHROPIC_API_KEY is set.
- backend/models/schemas.py β AgentRequest, AgentResponse, AgentToolCall schemas.
MCP server:
- mcp_server/server.py β Full MCP server exposing RAG as MCP primitives.
Tools: search_code, list_repos, get_file_chunk, find_callers.
Resources: indexed repos as rag://repos/owner/name URIs.
Prompts: explain-function, repo-overview (slash commands in Claude Desktop).
Docstring explains MCP from scratch: primitives, transports, why it exists.
UI:
- Agent mode toggle in sidebar (RAG vs Agent β¦)
- Live tool-call trace: each search step appears as the agent runs, with spinner
on the in-flight call. Collapses to a toggle after completion.
- api.js β streamAgentQuery() with tool_call/tool_result/done SSE events.
- index.css β .agent-trace.live with accent border, .agent-step.pending/.done.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- backend/main.py +123 -1
- backend/models/schemas.py +53 -0
- backend/services/agent.py +400 -0
- mcp_server/__init__.py +0 -0
- mcp_server/server.py +499 -0
- requirements.txt +1 -0
- ui/src/App.jsx +98 -41
- ui/src/api.js +63 -0
- ui/src/components/Message.jsx +84 -4
- ui/src/components/Sidebar.jsx +36 -12
- ui/src/index.css +106 -0
|
@@ -30,6 +30,8 @@ Endpoints:
|
|
| 30 |
POST /search β retrieve chunks (no generation)
|
| 31 |
POST /query β RAG: retrieve + generate answer
|
| 32 |
GET /query/stream β RAG with streaming SSE response
|
|
|
|
|
|
|
| 33 |
"""
|
| 34 |
|
| 35 |
from contextlib import asynccontextmanager
|
|
@@ -44,10 +46,12 @@ from backend.models.schemas import (
|
|
| 44 |
SearchRequest, SearchResponse, CodeChunk,
|
| 45 |
QueryRequest, QueryResponse,
|
| 46 |
ReposResponse, RepoInfo,
|
|
|
|
| 47 |
)
|
| 48 |
from backend.config import settings
|
| 49 |
from backend.services.ingestion_service import IngestionService
|
| 50 |
from backend.services.generation import GenerationService, classify_query
|
|
|
|
| 51 |
from retrieval.retrieval import RetrievalService
|
| 52 |
|
| 53 |
|
|
@@ -58,6 +62,7 @@ from retrieval.retrieval import RetrievalService
|
|
| 58 |
_ingestion_service: IngestionService | None = None
|
| 59 |
_retrieval_service: RetrievalService | None = None
|
| 60 |
_generation_service: GenerationService | None = None
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
@asynccontextmanager
|
|
@@ -71,11 +76,18 @@ async def lifespan(app: FastAPI):
|
|
| 71 |
Loading models here (not at import time) means startup errors are visible
|
| 72 |
in the server log, not buried in a traceback from a module-level call.
|
| 73 |
"""
|
| 74 |
-
global _ingestion_service, _retrieval_service, _generation_service
|
| 75 |
print("Starting up β loading models and connecting to Qdrant...")
|
| 76 |
_ingestion_service = IngestionService()
|
| 77 |
_retrieval_service = RetrievalService()
|
| 78 |
_generation_service = GenerationService()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
print("All services ready.\n")
|
| 80 |
yield
|
| 81 |
# Cleanup on shutdown (if needed) goes here
|
|
@@ -130,6 +142,14 @@ def get_generation_service() -> GenerationService:
|
|
| 130 |
raise RuntimeError("GenerationService not initialised")
|
| 131 |
return _generation_service
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
# ββ Routes: Ingestion ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 135 |
|
|
@@ -319,6 +339,108 @@ async def query_stream(
|
|
| 319 |
return StreamingResponse(token_stream(), media_type="text/event-stream")
|
| 320 |
|
| 321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
# ββ Health check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 323 |
|
| 324 |
@app.get("/health", tags=["meta"])
|
|
|
|
| 30 |
POST /search β retrieve chunks (no generation)
|
| 31 |
POST /query β RAG: retrieve + generate answer
|
| 32 |
GET /query/stream β RAG with streaming SSE response
|
| 33 |
+
POST /agent/query β Agentic RAG: ReAct loop (synchronous)
|
| 34 |
+
GET /agent/stream β Agentic RAG: ReAct loop with SSE progress stream
|
| 35 |
"""
|
| 36 |
|
| 37 |
from contextlib import asynccontextmanager
|
|
|
|
| 46 |
SearchRequest, SearchResponse, CodeChunk,
|
| 47 |
QueryRequest, QueryResponse,
|
| 48 |
ReposResponse, RepoInfo,
|
| 49 |
+
AgentRequest, AgentResponse, AgentToolCall,
|
| 50 |
)
|
| 51 |
from backend.config import settings
|
| 52 |
from backend.services.ingestion_service import IngestionService
|
| 53 |
from backend.services.generation import GenerationService, classify_query
|
| 54 |
+
from backend.services.agent import AgentService
|
| 55 |
from retrieval.retrieval import RetrievalService
|
| 56 |
|
| 57 |
|
|
|
|
| 62 |
_ingestion_service: IngestionService | None = None
|
| 63 |
_retrieval_service: RetrievalService | None = None
|
| 64 |
_generation_service: GenerationService | None = None
|
| 65 |
+
_agent_service: AgentService | None = None
|
| 66 |
|
| 67 |
|
| 68 |
@asynccontextmanager
|
|
|
|
| 76 |
Loading models here (not at import time) means startup errors are visible
|
| 77 |
in the server log, not buried in a traceback from a module-level call.
|
| 78 |
"""
|
| 79 |
+
global _ingestion_service, _retrieval_service, _generation_service, _agent_service
|
| 80 |
print("Starting up β loading models and connecting to Qdrant...")
|
| 81 |
_ingestion_service = IngestionService()
|
| 82 |
_retrieval_service = RetrievalService()
|
| 83 |
_generation_service = GenerationService()
|
| 84 |
+
# AgentService is optional β only initialised when ANTHROPIC_API_KEY is set.
|
| 85 |
+
# If no key, the /agent/* endpoints return a clear error rather than crashing.
|
| 86 |
+
if settings.anthropic_api_key:
|
| 87 |
+
_agent_service = AgentService(_retrieval_service)
|
| 88 |
+
print("AgentService ready (agentic RAG enabled).")
|
| 89 |
+
else:
|
| 90 |
+
print("No ANTHROPIC_API_KEY β /agent/* endpoints disabled.")
|
| 91 |
print("All services ready.\n")
|
| 92 |
yield
|
| 93 |
# Cleanup on shutdown (if needed) goes here
|
|
|
|
| 142 |
raise RuntimeError("GenerationService not initialised")
|
| 143 |
return _generation_service
|
| 144 |
|
| 145 |
+
def get_agent_service() -> AgentService:
|
| 146 |
+
if _agent_service is None:
|
| 147 |
+
raise HTTPException(
|
| 148 |
+
status_code=503,
|
| 149 |
+
detail="Agentic RAG requires ANTHROPIC_API_KEY β not configured on this server.",
|
| 150 |
+
)
|
| 151 |
+
return _agent_service
|
| 152 |
+
|
| 153 |
|
| 154 |
# ββ Routes: Ingestion ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 155 |
|
|
|
|
| 339 |
return StreamingResponse(token_stream(), media_type="text/event-stream")
|
| 340 |
|
| 341 |
|
| 342 |
+
# ββ Routes: Agentic RAG ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 343 |
+
#
|
| 344 |
+
# These endpoints wrap AgentService, which runs a ReAct loop:
|
| 345 |
+
# question β think β search β observe β think β search β ... β answer
|
| 346 |
+
#
|
| 347 |
+
# Why two endpoints?
|
| 348 |
+
# POST /agent/query β synchronous. Wait for the full answer, return JSON.
|
| 349 |
+
# Simple to integrate, but slow (the whole loop runs first).
|
| 350 |
+
# GET /agent/stream β streaming SSE. Watch the agent's thinking in real time.
|
| 351 |
+
# Shows each tool call as it happens (like watching AI think).
|
| 352 |
+
#
|
| 353 |
+
# The streaming endpoint is the "wow" version β users can see the agent reasoning
|
| 354 |
+
# live: "Searching for backward()... found engine.py... now looking for callers..."
|
| 355 |
+
|
| 356 |
+
@app.post("/agent/query", response_model=AgentResponse, tags=["agent"])
|
| 357 |
+
async def agent_query(
|
| 358 |
+
request: AgentRequest,
|
| 359 |
+
agent_svc: Annotated[AgentService, Depends(get_agent_service)],
|
| 360 |
+
):
|
| 361 |
+
"""
|
| 362 |
+
Run the agentic RAG loop synchronously.
|
| 363 |
+
|
| 364 |
+
The agent searches the codebase multiple times, from different angles,
|
| 365 |
+
until it has enough evidence to answer confidently. Returns the full
|
| 366 |
+
reasoning trace (tool_calls) alongside the answer.
|
| 367 |
+
|
| 368 |
+
Slower than /query but more thorough β the agent decides what to search,
|
| 369 |
+
not a fixed single retrieval. Best for complex multi-hop questions like
|
| 370 |
+
"how does the training loop interact with the optimizer?" that require
|
| 371 |
+
understanding how multiple pieces connect.
|
| 372 |
+
"""
|
| 373 |
+
try:
|
| 374 |
+
result = agent_svc.run(request.question, repo_filter=request.repo)
|
| 375 |
+
return AgentResponse(
|
| 376 |
+
answer=result["answer"],
|
| 377 |
+
tool_calls=[AgentToolCall(**tc) for tc in result["tool_calls"]],
|
| 378 |
+
iterations=result["iterations"],
|
| 379 |
+
)
|
| 380 |
+
except Exception as e:
|
| 381 |
+
raise HTTPException(status_code=500, detail=f"Agent error: {e}")
|
| 382 |
+
|
| 383 |
+
|
| 384 |
+
@app.get("/agent/stream", tags=["agent"])
|
| 385 |
+
async def agent_stream(
|
| 386 |
+
question: Annotated[str, Query(description="Question about the codebase")],
|
| 387 |
+
agent_svc: Annotated[AgentService, Depends(get_agent_service)],
|
| 388 |
+
repo: str | None = None,
|
| 389 |
+
):
|
| 390 |
+
"""
|
| 391 |
+
Run the agentic RAG loop with real-time SSE progress streaming.
|
| 392 |
+
|
| 393 |
+
Unlike /query/stream (which just streams tokens), this endpoint lets you
|
| 394 |
+
watch the agent's full reasoning process as it happens:
|
| 395 |
+
|
| 396 |
+
event: tool_call β agent is about to call a tool (shows name + args)
|
| 397 |
+
event: tool_result β tool returned, agent is reading the result
|
| 398 |
+
(default event) β text token of the final answer
|
| 399 |
+
event: done β agent finished (includes iteration count)
|
| 400 |
+
|
| 401 |
+
This is the "glass box" view of the agent β users can see exactly what
|
| 402 |
+
it searched for and what it found, not just the final answer. Critical
|
| 403 |
+
for trust and debugging in production RAG systems.
|
| 404 |
+
|
| 405 |
+
SSE event format for each type:
|
| 406 |
+
event: tool_call
|
| 407 |
+
data: {"tool": "search_code", "input": {"query": "backward pass"}}
|
| 408 |
+
|
| 409 |
+
event: tool_result
|
| 410 |
+
data: {"tool": "search_code", "output": "Source 1: engine.py..."}
|
| 411 |
+
|
| 412 |
+
(default)
|
| 413 |
+
data: According to the code...
|
| 414 |
+
|
| 415 |
+
event: done
|
| 416 |
+
data: {"iterations": 3}
|
| 417 |
+
"""
|
| 418 |
+
import json
|
| 419 |
+
|
| 420 |
+
def event_stream():
|
| 421 |
+
for event in agent_svc.stream(question, repo_filter=repo):
|
| 422 |
+
etype = event["type"]
|
| 423 |
+
|
| 424 |
+
if etype == "tool_call":
|
| 425 |
+
payload = json.dumps({"tool": event["tool"], "input": event["input"]})
|
| 426 |
+
yield f"event: tool_call\ndata: {payload}\n\n"
|
| 427 |
+
|
| 428 |
+
elif etype == "tool_result":
|
| 429 |
+
payload = json.dumps({"tool": event["tool"], "output": event["output"]})
|
| 430 |
+
yield f"event: tool_result\ndata: {payload}\n\n"
|
| 431 |
+
|
| 432 |
+
elif etype == "token":
|
| 433 |
+
safe = event["text"].replace("\n", "\\n")
|
| 434 |
+
yield f"data: {safe}\n\n"
|
| 435 |
+
|
| 436 |
+
elif etype == "done":
|
| 437 |
+
payload = json.dumps({"iterations": event["iterations"]})
|
| 438 |
+
yield f"event: done\ndata: {payload}\n\n"
|
| 439 |
+
yield "data: [DONE]\n\n"
|
| 440 |
+
|
| 441 |
+
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
| 442 |
+
|
| 443 |
+
|
| 444 |
# ββ Health check βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 445 |
|
| 446 |
@app.get("/health", tags=["meta"])
|
|
@@ -133,3 +133,56 @@ class ReposResponse(BaseModel):
|
|
| 133 |
"""Response from GET /repos β list all indexed repos."""
|
| 134 |
repos: list[RepoInfo]
|
| 135 |
total_chunks: int
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
"""Response from GET /repos β list all indexed repos."""
|
| 134 |
repos: list[RepoInfo]
|
| 135 |
total_chunks: int
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ββ Agent (Agentic RAG) βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
+
#
|
| 140 |
+
# These schemas describe the agent's inputs and outputs.
|
| 141 |
+
#
|
| 142 |
+
# The key difference from plain RAG:
|
| 143 |
+
# Plain RAG: one retrieval β one answer (deterministic, fast)
|
| 144 |
+
# Agentic RAG: N retrievals β N observations β one answer (adaptive, slower)
|
| 145 |
+
#
|
| 146 |
+
# The tool_calls list is the "trace" β a record of every search the agent made
|
| 147 |
+
# and what it found. This is the crucial insight that makes agents explainable:
|
| 148 |
+
# you can see exactly WHY the agent answered what it did, step by step.
|
| 149 |
+
|
| 150 |
+
class AgentToolCall(BaseModel):
|
| 151 |
+
"""
|
| 152 |
+
A single tool call made by the agent during its ReAct loop.
|
| 153 |
+
|
| 154 |
+
This is one step in the agent's reasoning trace:
|
| 155 |
+
- tool: which tool it called (search_code, get_file_chunk, find_callers)
|
| 156 |
+
- input: what arguments it passed (shows WHAT it was looking for)
|
| 157 |
+
- output: truncated result (shows WHAT it found)
|
| 158 |
+
|
| 159 |
+
The sequence of these calls tells the story of the agent's reasoning.
|
| 160 |
+
"""
|
| 161 |
+
tool: str # tool name
|
| 162 |
+
input: dict # arguments passed to the tool
|
| 163 |
+
output: str # first 500 chars of the result (truncated for display)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
class AgentRequest(BaseModel):
|
| 167 |
+
"""Request body for POST /agent/query β run the agentic RAG loop."""
|
| 168 |
+
question: str = Field(..., description="Question about the codebase")
|
| 169 |
+
repo: Optional[str] = Field(
|
| 170 |
+
default=None,
|
| 171 |
+
description="Restrict search to a specific repo slug (e.g. 'karpathy/micrograd')",
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
class AgentResponse(BaseModel):
|
| 176 |
+
"""
|
| 177 |
+
Response from POST /agent/query.
|
| 178 |
+
|
| 179 |
+
In addition to the answer, we return:
|
| 180 |
+
- tool_calls: the agent's full reasoning trace (what it searched + found)
|
| 181 |
+
- iterations: how many ReAct steps it took (capped at MAX_ITERATIONS=8)
|
| 182 |
+
|
| 183 |
+
This transparency is intentional β it shows users HOW the agent reasoned,
|
| 184 |
+
not just what it concluded. Makes debugging and trust much easier.
|
| 185 |
+
"""
|
| 186 |
+
answer: str
|
| 187 |
+
tool_calls: list[AgentToolCall]
|
| 188 |
+
iterations: int
|
|
@@ -0,0 +1,400 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
agent.py β Agentic RAG using Anthropic tool use.
|
| 3 |
+
|
| 4 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
+
WHAT IS AN AGENT? (vs plain RAG)
|
| 6 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 7 |
+
|
| 8 |
+
Plain RAG (what we had before):
|
| 9 |
+
Query β single retrieval β LLM β answer
|
| 10 |
+
|
| 11 |
+
The problem: one retrieval step may miss critical context.
|
| 12 |
+
"How does the training loop work?" retrieves train() but misses
|
| 13 |
+
the DataLoader, the gradient accumulation, the optimizer step.
|
| 14 |
+
One shot, then done.
|
| 15 |
+
|
| 16 |
+
Agentic RAG (what we're building):
|
| 17 |
+
Query β think β search β observe β think β search β observe β answer
|
| 18 |
+
|
| 19 |
+
The LLM DECIDES when it has enough information.
|
| 20 |
+
It can call tools multiple times, from different angles,
|
| 21 |
+
until it's confident in its answer.
|
| 22 |
+
|
| 23 |
+
This is called a ReAct loop (Reason + Act):
|
| 24 |
+
1. REASON: "I need to find the backward() implementation"
|
| 25 |
+
2. ACT: call search_code("backward implementation")
|
| 26 |
+
3. OBSERVE: "I see relu._backward, but not the main backward()"
|
| 27 |
+
4. REASON: "Let me search specifically for Value.backward"
|
| 28 |
+
5. ACT: call find_callers("backward")
|
| 29 |
+
6. OBSERVE: "Found it β it does topological sort first"
|
| 30 |
+
7. REASON: "I have enough to answer"
|
| 31 |
+
8. RESPOND: full answer with citations
|
| 32 |
+
|
| 33 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 34 |
+
HOW ANTHROPIC TOOL USE WORKS
|
| 35 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
+
|
| 37 |
+
Normal message:
|
| 38 |
+
You β [message] β Claude β [answer text]
|
| 39 |
+
|
| 40 |
+
With tools:
|
| 41 |
+
You β [message + tool_definitions] β Claude
|
| 42 |
+
β either: [answer text] (done, no tools needed)
|
| 43 |
+
β or: [tool_use block] (Claude wants to call a tool)
|
| 44 |
+
You run the tool β [tool_result] β Claude
|
| 45 |
+
β either: [answer text]
|
| 46 |
+
β or: [another tool_use block]
|
| 47 |
+
... repeat until Claude returns text
|
| 48 |
+
|
| 49 |
+
The conversation history grows:
|
| 50 |
+
messages = [
|
| 51 |
+
{"role": "user", "content": "How does backward() work?"},
|
| 52 |
+
{"role": "assistant", "content": [{"type": "tool_use", "name": "search_code", ...}]},
|
| 53 |
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "...", "content": "..."}]},
|
| 54 |
+
{"role": "assistant", "content": [{"type": "tool_use", "name": "find_callers", ...}]},
|
| 55 |
+
{"role": "user", "content": [{"type": "tool_result", "tool_use_id": "...", "content": "..."}]},
|
| 56 |
+
{"role": "assistant", "content": "According to Source 4, backward() works by..."},
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
The key insight: tool results are fed back as "user" messages.
|
| 60 |
+
The model never "runs" the tool β YOU do, and report back.
|
| 61 |
+
|
| 62 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 63 |
+
STOPPING CONDITIONS
|
| 64 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 65 |
+
|
| 66 |
+
The loop ends when:
|
| 67 |
+
1. Claude returns stop_reason="end_turn" (it's satisfied)
|
| 68 |
+
2. We hit max_iterations (safety cap β prevents infinite loops)
|
| 69 |
+
3. Claude returns text with no tool calls (it has its answer)
|
| 70 |
+
|
| 71 |
+
We cap at 8 iterations. Each iteration is one Claude API call + one
|
| 72 |
+
tool execution. This bounds cost and latency while allowing real
|
| 73 |
+
multi-hop reasoning (most questions need 2β4 hops).
|
| 74 |
+
"""
|
| 75 |
+
|
| 76 |
+
import json
|
| 77 |
+
from pathlib import Path
|
| 78 |
+
from typing import Iterator
|
| 79 |
+
import sys
|
| 80 |
+
|
| 81 |
+
import requests as http_requests
|
| 82 |
+
|
| 83 |
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
| 84 |
+
from backend.config import settings
|
| 85 |
+
from retrieval.retrieval import RetrievalService
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# ββ Tool definitions (Anthropic format) βββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
# These are the same tools as the MCP server but defined in Anthropic's
|
| 90 |
+
# tool schema format. Same capabilities, different wire format.
|
| 91 |
+
#
|
| 92 |
+
# Notice the pattern: name, description (LLM reads this!), input_schema.
|
| 93 |
+
# The description tells the LLM WHEN to use the tool. Write it like a
|
| 94 |
+
# docstring for the model's benefit, not yours.
|
| 95 |
+
|
| 96 |
+
TOOLS = [
|
| 97 |
+
{
|
| 98 |
+
"name": "search_code",
|
| 99 |
+
"description": (
|
| 100 |
+
"Search the indexed GitHub repositories for code relevant to a query. "
|
| 101 |
+
"Uses hybrid BM25 + semantic search. Returns ranked code chunks with "
|
| 102 |
+
"file paths, function names, and line numbers. "
|
| 103 |
+
"Call this first when answering any question about the codebase. "
|
| 104 |
+
"You can call it multiple times with different queries to explore different aspects."
|
| 105 |
+
),
|
| 106 |
+
"input_schema": {
|
| 107 |
+
"type": "object",
|
| 108 |
+
"properties": {
|
| 109 |
+
"query": {"type": "string", "description": "What to search for"},
|
| 110 |
+
"repo": {"type": "string", "description": "Optional: 'owner/repo' to restrict search"},
|
| 111 |
+
"mode": {
|
| 112 |
+
"type": "string",
|
| 113 |
+
"enum": ["hybrid", "semantic", "keyword"],
|
| 114 |
+
"description": "hybrid=default, keyword=exact identifiers, semantic=concepts",
|
| 115 |
+
},
|
| 116 |
+
"top_k": {"type": "integer", "description": "Number of results (default 5)"},
|
| 117 |
+
},
|
| 118 |
+
"required": ["query"],
|
| 119 |
+
},
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"name": "get_file_chunk",
|
| 123 |
+
"description": (
|
| 124 |
+
"Fetch the raw content of a specific section of a file from GitHub. "
|
| 125 |
+
"Use this when a search result shows a function but you need more context: "
|
| 126 |
+
"the lines above (docstring, decorators) or below (what comes after). "
|
| 127 |
+
"Also useful to see the full class when search only returned one method."
|
| 128 |
+
),
|
| 129 |
+
"input_schema": {
|
| 130 |
+
"type": "object",
|
| 131 |
+
"properties": {
|
| 132 |
+
"repo": {"type": "string", "description": "'owner/repo'"},
|
| 133 |
+
"filepath": {"type": "string", "description": "path within the repo"},
|
| 134 |
+
"start_line": {"type": "integer"},
|
| 135 |
+
"end_line": {"type": "integer"},
|
| 136 |
+
},
|
| 137 |
+
"required": ["repo", "filepath", "start_line", "end_line"],
|
| 138 |
+
},
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"name": "find_callers",
|
| 142 |
+
"description": (
|
| 143 |
+
"Find all places in the codebase that call a specific function or class. "
|
| 144 |
+
"Essential for understanding HOW something is used, not just what it does. "
|
| 145 |
+
"Example: after finding the definition of Value.__mul__, call find_callers "
|
| 146 |
+
"to see where multiplication is actually performed in training code."
|
| 147 |
+
),
|
| 148 |
+
"input_schema": {
|
| 149 |
+
"type": "object",
|
| 150 |
+
"properties": {
|
| 151 |
+
"function_name": {"type": "string"},
|
| 152 |
+
"repo": {"type": "string", "description": "Optional: restrict to one repo"},
|
| 153 |
+
},
|
| 154 |
+
"required": ["function_name"],
|
| 155 |
+
},
|
| 156 |
+
},
|
| 157 |
+
]
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class AgentService:
|
| 161 |
+
"""
|
| 162 |
+
Runs a ReAct (Reason + Act) loop using Anthropic tool use.
|
| 163 |
+
|
| 164 |
+
The agent has access to three tools: search_code, get_file_chunk, find_callers.
|
| 165 |
+
It runs until either it produces an answer or hits max_iterations.
|
| 166 |
+
|
| 167 |
+
Each call to `run()` returns a structured result including:
|
| 168 |
+
- The final answer
|
| 169 |
+
- The tool call trace (what it searched, what it found)
|
| 170 |
+
- The sources actually used in the answer
|
| 171 |
+
"""
|
| 172 |
+
|
| 173 |
+
MAX_ITERATIONS = 8
|
| 174 |
+
|
| 175 |
+
SYSTEM_PROMPT = """You are an expert code assistant with access to a searchable index of GitHub repositories.
|
| 176 |
+
|
| 177 |
+
When answering questions about code:
|
| 178 |
+
1. Start by calling search_code to find relevant code
|
| 179 |
+
2. If the initial results don't fully answer the question, search again with a different query
|
| 180 |
+
3. Use get_file_chunk to see more context around a result (e.g., the full class or surrounding code)
|
| 181 |
+
4. Use find_callers to understand how functions are used, not just defined
|
| 182 |
+
5. Only answer when you have enough evidence from the actual code
|
| 183 |
+
|
| 184 |
+
Always cite your sources: mention the file path and line numbers.
|
| 185 |
+
Be precise β if the code doesn't show what you're looking for, say so rather than guessing."""
|
| 186 |
+
|
| 187 |
+
def __init__(self, retrieval_service: RetrievalService):
|
| 188 |
+
self.retrieval = retrieval_service
|
| 189 |
+
if not settings.anthropic_api_key:
|
| 190 |
+
raise ValueError("ANTHROPIC_API_KEY required for agentic queries")
|
| 191 |
+
import anthropic
|
| 192 |
+
self._client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
|
| 193 |
+
|
| 194 |
+
def run(self, question: str, repo_filter: str | None = None) -> dict:
|
| 195 |
+
"""
|
| 196 |
+
Run the agent loop synchronously.
|
| 197 |
+
|
| 198 |
+
Returns:
|
| 199 |
+
{
|
| 200 |
+
"answer": str, # final LLM answer
|
| 201 |
+
"tool_calls": list[dict], # trace: [{tool, input, output}, ...]
|
| 202 |
+
"iterations": int, # how many reasoning steps it took
|
| 203 |
+
}
|
| 204 |
+
"""
|
| 205 |
+
# The conversation starts with just the user question.
|
| 206 |
+
# Tool results will be appended as the loop progresses.
|
| 207 |
+
messages = [{"role": "user", "content": question}]
|
| 208 |
+
|
| 209 |
+
# If the user selected a specific repo, hint the agent
|
| 210 |
+
if repo_filter:
|
| 211 |
+
messages[0]["content"] += f"\n\n(Search in repo: {repo_filter})"
|
| 212 |
+
|
| 213 |
+
tool_trace = []
|
| 214 |
+
|
| 215 |
+
for iteration in range(self.MAX_ITERATIONS):
|
| 216 |
+
# ββ Ask Claude (with tools available) βββββββββββββββββββββββββββββ
|
| 217 |
+
response = self._client.messages.create(
|
| 218 |
+
model="claude-haiku-4-5-20251001",
|
| 219 |
+
max_tokens=2048,
|
| 220 |
+
system=self.SYSTEM_PROMPT,
|
| 221 |
+
tools=TOOLS,
|
| 222 |
+
messages=messages,
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
# ββ Did Claude give us a final answer? ββββββββββββββββββββββββββββ
|
| 226 |
+
# stop_reason="end_turn" means Claude is done β no more tool calls.
|
| 227 |
+
if response.stop_reason == "end_turn":
|
| 228 |
+
answer = ""
|
| 229 |
+
for block in response.content:
|
| 230 |
+
if hasattr(block, "text"):
|
| 231 |
+
answer += block.text
|
| 232 |
+
return {
|
| 233 |
+
"answer": answer,
|
| 234 |
+
"tool_calls": tool_trace,
|
| 235 |
+
"iterations": iteration + 1,
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
# ββ Claude wants to call tools ββββββββββββββββββββββββββββββββββββ
|
| 239 |
+
# The response content may have multiple blocks:
|
| 240 |
+
# - TextContent blocks (thinking out loud)
|
| 241 |
+
# - ToolUseContent blocks (actual tool calls)
|
| 242 |
+
|
| 243 |
+
# Append Claude's response to the conversation history
|
| 244 |
+
messages.append({"role": "assistant", "content": response.content})
|
| 245 |
+
|
| 246 |
+
# Process each tool call
|
| 247 |
+
tool_results = []
|
| 248 |
+
for block in response.content:
|
| 249 |
+
if block.type != "tool_use":
|
| 250 |
+
continue
|
| 251 |
+
|
| 252 |
+
tool_name = block.name
|
| 253 |
+
tool_input = block.input
|
| 254 |
+
tool_use_id = block.id
|
| 255 |
+
|
| 256 |
+
# ββ Execute the tool ββββββββββββββββββββββββββββββββββββββββββ
|
| 257 |
+
try:
|
| 258 |
+
result = self._execute_tool(tool_name, tool_input)
|
| 259 |
+
except Exception as e:
|
| 260 |
+
result = f"Tool error: {e}"
|
| 261 |
+
|
| 262 |
+
# Record for the trace
|
| 263 |
+
tool_trace.append({
|
| 264 |
+
"tool": tool_name,
|
| 265 |
+
"input": tool_input,
|
| 266 |
+
"output": result[:500] + "..." if len(result) > 500 else result,
|
| 267 |
+
})
|
| 268 |
+
|
| 269 |
+
# ββ Build the tool_result message βββββββββββββββββββββββββββββ
|
| 270 |
+
# This goes back to Claude as a "user" turn.
|
| 271 |
+
# Claude reads these results and decides what to do next.
|
| 272 |
+
tool_results.append({
|
| 273 |
+
"type": "tool_result",
|
| 274 |
+
"tool_use_id": tool_use_id,
|
| 275 |
+
"content": result,
|
| 276 |
+
})
|
| 277 |
+
|
| 278 |
+
# Add all tool results to the conversation
|
| 279 |
+
messages.append({"role": "user", "content": tool_results})
|
| 280 |
+
|
| 281 |
+
# Hit max iterations β return what we have
|
| 282 |
+
return {
|
| 283 |
+
"answer": "I was unable to fully answer this question within the allowed reasoning steps.",
|
| 284 |
+
"tool_calls": tool_trace,
|
| 285 |
+
"iterations": self.MAX_ITERATIONS,
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
def stream(self, question: str, repo_filter: str | None = None) -> Iterator[dict]:
|
| 289 |
+
"""
|
| 290 |
+
Stream agent progress as it happens.
|
| 291 |
+
|
| 292 |
+
Yields dicts with type:
|
| 293 |
+
{"type": "tool_call", "tool": "search_code", "input": {...}}
|
| 294 |
+
{"type": "tool_result","tool": "search_code", "output": "..."}
|
| 295 |
+
{"type": "token", "text": "According..."}
|
| 296 |
+
{"type": "done", "iterations": 3}
|
| 297 |
+
"""
|
| 298 |
+
messages = [{"role": "user", "content": question}]
|
| 299 |
+
if repo_filter:
|
| 300 |
+
messages[0]["content"] += f"\n\n(Search in repo: {repo_filter})"
|
| 301 |
+
|
| 302 |
+
for iteration in range(self.MAX_ITERATIONS):
|
| 303 |
+
response = self._client.messages.create(
|
| 304 |
+
model="claude-haiku-4-5-20251001",
|
| 305 |
+
max_tokens=2048,
|
| 306 |
+
system=self.SYSTEM_PROMPT,
|
| 307 |
+
tools=TOOLS,
|
| 308 |
+
messages=messages,
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
if response.stop_reason == "end_turn":
|
| 312 |
+
# Stream the final answer token by token
|
| 313 |
+
for block in response.content:
|
| 314 |
+
if hasattr(block, "text"):
|
| 315 |
+
# Yield word-by-word for a streaming feel
|
| 316 |
+
for word in block.text.split(" "):
|
| 317 |
+
yield {"type": "token", "text": word + " "}
|
| 318 |
+
yield {"type": "done", "iterations": iteration + 1}
|
| 319 |
+
return
|
| 320 |
+
|
| 321 |
+
messages.append({"role": "assistant", "content": response.content})
|
| 322 |
+
|
| 323 |
+
tool_results = []
|
| 324 |
+
for block in response.content:
|
| 325 |
+
if block.type != "tool_use":
|
| 326 |
+
continue
|
| 327 |
+
|
| 328 |
+
yield {"type": "tool_call", "tool": block.name, "input": block.input}
|
| 329 |
+
|
| 330 |
+
try:
|
| 331 |
+
result = self._execute_tool(block.name, block.input)
|
| 332 |
+
except Exception as e:
|
| 333 |
+
result = f"Tool error: {e}"
|
| 334 |
+
|
| 335 |
+
yield {"type": "tool_result", "tool": block.name, "output": result[:300]}
|
| 336 |
+
|
| 337 |
+
tool_results.append({
|
| 338 |
+
"type": "tool_result",
|
| 339 |
+
"tool_use_id": block.id,
|
| 340 |
+
"content": result,
|
| 341 |
+
})
|
| 342 |
+
|
| 343 |
+
messages.append({"role": "user", "content": tool_results})
|
| 344 |
+
|
| 345 |
+
yield {"type": "done", "iterations": self.MAX_ITERATIONS}
|
| 346 |
+
|
| 347 |
+
# ββ Tool execution βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 348 |
+
|
| 349 |
+
def _execute_tool(self, name: str, args: dict) -> str:
|
| 350 |
+
if name == "search_code":
|
| 351 |
+
return self._tool_search_code(args)
|
| 352 |
+
elif name == "get_file_chunk":
|
| 353 |
+
return self._tool_get_file_chunk(args)
|
| 354 |
+
elif name == "find_callers":
|
| 355 |
+
return self._tool_find_callers(args)
|
| 356 |
+
return f"Unknown tool: {name}"
|
| 357 |
+
|
| 358 |
+
def _tool_search_code(self, args: dict) -> str:
|
| 359 |
+
results = self.retrieval.search(
|
| 360 |
+
query=args["query"],
|
| 361 |
+
top_k=args.get("top_k", 5),
|
| 362 |
+
repo_filter=args.get("repo"),
|
| 363 |
+
mode=args.get("mode", "hybrid"),
|
| 364 |
+
)
|
| 365 |
+
if not results:
|
| 366 |
+
return "No results found."
|
| 367 |
+
return self.retrieval.format_context(results)
|
| 368 |
+
|
| 369 |
+
def _tool_get_file_chunk(self, args: dict) -> str:
|
| 370 |
+
repo = args["repo"]
|
| 371 |
+
filepath = args["filepath"]
|
| 372 |
+
start = args["start_line"]
|
| 373 |
+
end = args["end_line"]
|
| 374 |
+
owner, name = repo.split("/", 1)
|
| 375 |
+
url = f"https://api.github.com/repos/{owner}/{name}/contents/{filepath}"
|
| 376 |
+
headers = {"Accept": "application/vnd.github.v3.raw"}
|
| 377 |
+
if settings.github_token:
|
| 378 |
+
headers["Authorization"] = f"token {settings.github_token}"
|
| 379 |
+
resp = http_requests.get(url, headers=headers, timeout=15)
|
| 380 |
+
if resp.status_code == 404:
|
| 381 |
+
return f"File not found: {filepath}"
|
| 382 |
+
resp.raise_for_status()
|
| 383 |
+
lines = resp.text.splitlines()
|
| 384 |
+
start = max(1, start)
|
| 385 |
+
end = min(len(lines), end)
|
| 386 |
+
chunk = "\n".join(f"{i+start}: {line}" for i, line in enumerate(lines[start-1:end]))
|
| 387 |
+
return f"# {repo} β {filepath} (lines {start}β{end})\n\n{chunk}"
|
| 388 |
+
|
| 389 |
+
def _tool_find_callers(self, args: dict) -> str:
|
| 390 |
+
name = args["function_name"]
|
| 391 |
+
results = self.retrieval.search(
|
| 392 |
+
query=name,
|
| 393 |
+
top_k=8,
|
| 394 |
+
repo_filter=args.get("repo"),
|
| 395 |
+
mode="keyword",
|
| 396 |
+
)
|
| 397 |
+
callers = [r for r in results if name in r["text"]]
|
| 398 |
+
if not callers:
|
| 399 |
+
return f"No call sites found for '{name}'."
|
| 400 |
+
return self.retrieval.format_context(callers)
|
|
File without changes
|
|
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
mcp_server/server.py β Our GitHub RAG Copilot as an MCP server.
|
| 3 |
+
|
| 4 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 5 |
+
WHAT IS MCP?
|
| 6 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 7 |
+
|
| 8 |
+
MCP (Model Context Protocol) is an open standard created by Anthropic that
|
| 9 |
+
defines HOW AI models connect to external tools and data sources.
|
| 10 |
+
|
| 11 |
+
Think of it like USB-C for AI:
|
| 12 |
+
Before USB-C, every device had a different charging port.
|
| 13 |
+
Before MCP, every AI application built its own custom tool integration.
|
| 14 |
+
|
| 15 |
+
With MCP:
|
| 16 |
+
- You build a server ONCE exposing your capabilities
|
| 17 |
+
- ANY MCP client (Claude Desktop, Cursor, your custom app) can use it
|
| 18 |
+
- The AI model gets a consistent interface regardless of the tool
|
| 19 |
+
|
| 20 |
+
Without MCP (what we had before):
|
| 21 |
+
our_app β hardcoded API calls β specific tools
|
| 22 |
+
|
| 23 |
+
With MCP:
|
| 24 |
+
our_app ββ MCP protocol ββ ANY tools
|
| 25 |
+
Claude Desktop ββ MCP protocol ββ our RAG server
|
| 26 |
+
Cursor ββ MCP protocol ββ our RAG server
|
| 27 |
+
|
| 28 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 29 |
+
MCP'S THREE PRIMITIVES
|
| 30 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
+
|
| 32 |
+
MCP defines exactly three things a server can expose:
|
| 33 |
+
|
| 34 |
+
1. TOOLS β functions the LLM can call
|
| 35 |
+
"search for code", "read a file", "run a query"
|
| 36 |
+
β LLM decides when to call them (autonomous)
|
| 37 |
+
|
| 38 |
+
2. RESOURCES β data the LLM can read (like files or DB records)
|
| 39 |
+
"here is the list of indexed repos"
|
| 40 |
+
β Client controls when to read them (not LLM)
|
| 41 |
+
|
| 42 |
+
3. PROMPTS β reusable prompt templates with arguments
|
| 43 |
+
"explain this function: {code}"
|
| 44 |
+
β User triggers these (shown as slash commands in Claude Desktop)
|
| 45 |
+
|
| 46 |
+
Each primitive has a different actor:
|
| 47 |
+
Tools β LLM-driven (model decides to call them mid-reasoning)
|
| 48 |
+
Resources β Client-driven (app fetches them at context-building time)
|
| 49 |
+
Prompts β User-driven (user picks them from a menu)
|
| 50 |
+
|
| 51 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
+
TWO TRANSPORT MODES
|
| 53 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 54 |
+
|
| 55 |
+
MCP servers communicate over one of two transports:
|
| 56 |
+
|
| 57 |
+
STDIO (standard input/output):
|
| 58 |
+
- Claude Desktop spawns your server as a subprocess
|
| 59 |
+
- Communication happens over stdin/stdout pipes
|
| 60 |
+
- Simpler, no network configuration needed
|
| 61 |
+
- Best for: local tools, Claude Desktop integration
|
| 62 |
+
|
| 63 |
+
HTTP + SSE (Server-Sent Events):
|
| 64 |
+
- Your server runs as a web service
|
| 65 |
+
- LLM connects over the network
|
| 66 |
+
- Supports multiple concurrent clients
|
| 67 |
+
- Best for: deployed services, shared team tools
|
| 68 |
+
|
| 69 |
+
This server supports BOTH β stdio for local dev, HTTP for production.
|
| 70 |
+
|
| 71 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 72 |
+
TOOLS WE EXPOSE
|
| 73 |
+
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 74 |
+
|
| 75 |
+
search_code(query, repo?, language?, mode?, top_k?)
|
| 76 |
+
β Hybrid BM25 + semantic search over indexed repos
|
| 77 |
+
β Returns ranked code chunks with filepath + line numbers
|
| 78 |
+
|
| 79 |
+
list_repos()
|
| 80 |
+
β Returns all repos currently in the index
|
| 81 |
+
|
| 82 |
+
get_file_chunk(repo, filepath, start_line, end_line)
|
| 83 |
+
β Fetches a specific range of lines from GitHub
|
| 84 |
+
β Used for follow-up: "show me more of that function"
|
| 85 |
+
|
| 86 |
+
find_callers(function_name, repo)
|
| 87 |
+
β Searches for all call sites of a function
|
| 88 |
+
β Enables "who calls this?" multi-hop reasoning
|
| 89 |
+
"""
|
| 90 |
+
|
| 91 |
+
from pathlib import Path
|
| 92 |
+
import sys
|
| 93 |
+
import json
|
| 94 |
+
|
| 95 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 96 |
+
|
| 97 |
+
from mcp.server import Server
|
| 98 |
+
from mcp.server.stdio import stdio_server
|
| 99 |
+
from mcp import types
|
| 100 |
+
|
| 101 |
+
from retrieval.retrieval import RetrievalService
|
| 102 |
+
from ingestion.qdrant_store import QdrantStore
|
| 103 |
+
from ingestion.repo_fetcher import fetch_repo_files, parse_github_url
|
| 104 |
+
from backend.config import settings
|
| 105 |
+
|
| 106 |
+
# ββ Server init βββββββββββββββββββββββββββββββββββββββββββββοΏ½οΏ½οΏ½βββββββββββββββββ
|
| 107 |
+
# The Server object is the MCP server. It handles the protocol:
|
| 108 |
+
# - responding to tool/resource/prompt list requests
|
| 109 |
+
# - dispatching tool calls to our handlers
|
| 110 |
+
# - serialising results back in MCP format
|
| 111 |
+
|
| 112 |
+
app = Server("github-rag-copilot")
|
| 113 |
+
|
| 114 |
+
# Services loaded once β same pattern as FastAPI lifespan
|
| 115 |
+
_retrieval: RetrievalService | None = None
|
| 116 |
+
_store: QdrantStore | None = None
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def get_retrieval() -> RetrievalService:
|
| 120 |
+
global _retrieval
|
| 121 |
+
if _retrieval is None:
|
| 122 |
+
_retrieval = RetrievalService()
|
| 123 |
+
return _retrieval
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def get_store() -> QdrantStore:
|
| 127 |
+
global _store
|
| 128 |
+
if _store is None:
|
| 129 |
+
_store = QdrantStore()
|
| 130 |
+
return _store
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 134 |
+
# TOOLS
|
| 135 |
+
# Every tool needs:
|
| 136 |
+
# 1. A name (what the LLM calls)
|
| 137 |
+
# 2. A description (what the LLM reads to decide whether to call it)
|
| 138 |
+
# 3. An inputSchema (JSON Schema β the LLM fills this in)
|
| 139 |
+
# 4. A handler (@app.call_tool)
|
| 140 |
+
#
|
| 141 |
+
# The description is CRITICAL β it's the only thing the LLM reads when
|
| 142 |
+
# deciding which tool to use. Write it like documentation for a smart
|
| 143 |
+
# person who can't see your code.
|
| 144 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 145 |
+
|
| 146 |
+
@app.list_tools()
|
| 147 |
+
async def list_tools() -> list[types.Tool]:
|
| 148 |
+
"""Called by MCP clients to discover what this server can do."""
|
| 149 |
+
return [
|
| 150 |
+
types.Tool(
|
| 151 |
+
name="search_code",
|
| 152 |
+
description=(
|
| 153 |
+
"Search for code chunks relevant to a query using hybrid BM25 + semantic search. "
|
| 154 |
+
"Returns ranked code snippets with file paths and line numbers. "
|
| 155 |
+
"Use this to find function definitions, class implementations, usage examples, "
|
| 156 |
+
"or any code related to a concept. "
|
| 157 |
+
"Specify repo to restrict search to a single repository."
|
| 158 |
+
),
|
| 159 |
+
inputSchema={
|
| 160 |
+
"type": "object",
|
| 161 |
+
"properties": {
|
| 162 |
+
"query": {
|
| 163 |
+
"type": "string",
|
| 164 |
+
"description": "Natural language question or code identifier to search for",
|
| 165 |
+
},
|
| 166 |
+
"repo": {
|
| 167 |
+
"type": "string",
|
| 168 |
+
"description": "Optional: restrict to a repo slug like 'karpathy/micrograd'",
|
| 169 |
+
},
|
| 170 |
+
"language": {
|
| 171 |
+
"type": "string",
|
| 172 |
+
"description": "Optional: filter by language like 'python', 'typescript'",
|
| 173 |
+
},
|
| 174 |
+
"mode": {
|
| 175 |
+
"type": "string",
|
| 176 |
+
"enum": ["hybrid", "semantic", "keyword"],
|
| 177 |
+
"description": "Search strategy. hybrid (default) combines semantic + BM25.",
|
| 178 |
+
},
|
| 179 |
+
"top_k": {
|
| 180 |
+
"type": "integer",
|
| 181 |
+
"description": "Number of results to return (default 5)",
|
| 182 |
+
},
|
| 183 |
+
},
|
| 184 |
+
"required": ["query"],
|
| 185 |
+
},
|
| 186 |
+
),
|
| 187 |
+
types.Tool(
|
| 188 |
+
name="list_repos",
|
| 189 |
+
description=(
|
| 190 |
+
"List all GitHub repositories currently indexed and available for search. "
|
| 191 |
+
"Returns repo slugs (owner/name) and chunk counts. "
|
| 192 |
+
"Call this first to know which repos are available before searching."
|
| 193 |
+
),
|
| 194 |
+
inputSchema={
|
| 195 |
+
"type": "object",
|
| 196 |
+
"properties": {},
|
| 197 |
+
"required": [],
|
| 198 |
+
},
|
| 199 |
+
),
|
| 200 |
+
types.Tool(
|
| 201 |
+
name="get_file_chunk",
|
| 202 |
+
description=(
|
| 203 |
+
"Fetch the raw content of a specific file section from GitHub. "
|
| 204 |
+
"Use this to see more context around a search result β for example, "
|
| 205 |
+
"if search returns lines 45β52 but you need the full function including "
|
| 206 |
+
"its docstring at lines 38β44. "
|
| 207 |
+
"Requires the repo to be publicly accessible on GitHub."
|
| 208 |
+
),
|
| 209 |
+
inputSchema={
|
| 210 |
+
"type": "object",
|
| 211 |
+
"properties": {
|
| 212 |
+
"repo": {
|
| 213 |
+
"type": "string",
|
| 214 |
+
"description": "Repository slug like 'karpathy/micrograd'",
|
| 215 |
+
},
|
| 216 |
+
"filepath": {
|
| 217 |
+
"type": "string",
|
| 218 |
+
"description": "File path within the repo like 'micrograd/engine.py'",
|
| 219 |
+
},
|
| 220 |
+
"start_line": {
|
| 221 |
+
"type": "integer",
|
| 222 |
+
"description": "First line to fetch (1-indexed)",
|
| 223 |
+
},
|
| 224 |
+
"end_line": {
|
| 225 |
+
"type": "integer",
|
| 226 |
+
"description": "Last line to fetch (inclusive)",
|
| 227 |
+
},
|
| 228 |
+
},
|
| 229 |
+
"required": ["repo", "filepath", "start_line", "end_line"],
|
| 230 |
+
},
|
| 231 |
+
),
|
| 232 |
+
types.Tool(
|
| 233 |
+
name="find_callers",
|
| 234 |
+
description=(
|
| 235 |
+
"Find all places in the indexed code that call a specific function or class. "
|
| 236 |
+
"Use this for multi-hop reasoning: after finding a function definition, "
|
| 237 |
+
"call this to understand how it's used and in what context. "
|
| 238 |
+
"Returns code chunks containing calls to the specified name."
|
| 239 |
+
),
|
| 240 |
+
inputSchema={
|
| 241 |
+
"type": "object",
|
| 242 |
+
"properties": {
|
| 243 |
+
"function_name": {
|
| 244 |
+
"type": "string",
|
| 245 |
+
"description": "Function or class name to search for call sites",
|
| 246 |
+
},
|
| 247 |
+
"repo": {
|
| 248 |
+
"type": "string",
|
| 249 |
+
"description": "Optional: restrict to a specific repository",
|
| 250 |
+
},
|
| 251 |
+
},
|
| 252 |
+
"required": ["function_name"],
|
| 253 |
+
},
|
| 254 |
+
),
|
| 255 |
+
]
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 259 |
+
# TOOL HANDLERS
|
| 260 |
+
# @app.call_tool() receives the tool name + arguments from the LLM.
|
| 261 |
+
# Returns a list of content blocks (text, image, or resource).
|
| 262 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 263 |
+
|
| 264 |
+
@app.call_tool()
|
| 265 |
+
async def call_tool(name: str, arguments: dict) -> list[types.TextContent]:
|
| 266 |
+
"""Route tool calls to the appropriate handler."""
|
| 267 |
+
if name == "search_code":
|
| 268 |
+
return await _handle_search_code(arguments)
|
| 269 |
+
elif name == "list_repos":
|
| 270 |
+
return await _handle_list_repos(arguments)
|
| 271 |
+
elif name == "get_file_chunk":
|
| 272 |
+
return await _handle_get_file_chunk(arguments)
|
| 273 |
+
elif name == "find_callers":
|
| 274 |
+
return await _handle_find_callers(arguments)
|
| 275 |
+
else:
|
| 276 |
+
return [types.TextContent(type="text", text=f"Unknown tool: {name}")]
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
async def _handle_search_code(args: dict) -> list[types.TextContent]:
|
| 280 |
+
retrieval = get_retrieval()
|
| 281 |
+
results = retrieval.search(
|
| 282 |
+
query=args["query"],
|
| 283 |
+
top_k=args.get("top_k", 5),
|
| 284 |
+
repo_filter=args.get("repo"),
|
| 285 |
+
language_filter=args.get("language"),
|
| 286 |
+
mode=args.get("mode", "hybrid"),
|
| 287 |
+
)
|
| 288 |
+
if not results:
|
| 289 |
+
return [types.TextContent(type="text", text="No results found.")]
|
| 290 |
+
return [types.TextContent(
|
| 291 |
+
type="text",
|
| 292 |
+
text=retrieval.format_context(results),
|
| 293 |
+
)]
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
async def _handle_list_repos(args: dict) -> list[types.TextContent]:
|
| 297 |
+
store = get_store()
|
| 298 |
+
repos = store.list_repos()
|
| 299 |
+
if not repos:
|
| 300 |
+
return [types.TextContent(type="text", text="No repositories indexed yet.")]
|
| 301 |
+
lines = [f"- {slug} ({store.count(repo=slug)} chunks)" for slug in repos]
|
| 302 |
+
return [types.TextContent(type="text", text="Indexed repositories:\n" + "\n".join(lines))]
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
async def _handle_get_file_chunk(args: dict) -> list[types.TextContent]:
|
| 306 |
+
"""Fetch a file from GitHub and return the requested line range."""
|
| 307 |
+
import requests
|
| 308 |
+
repo = args["repo"]
|
| 309 |
+
filepath = args["filepath"]
|
| 310 |
+
start = args["start_line"]
|
| 311 |
+
end = args["end_line"]
|
| 312 |
+
|
| 313 |
+
owner, name = repo.split("/", 1)
|
| 314 |
+
url = f"https://api.github.com/repos/{owner}/{name}/contents/{filepath}"
|
| 315 |
+
headers = {"Accept": "application/vnd.github.v3.raw"}
|
| 316 |
+
if settings.github_token:
|
| 317 |
+
headers["Authorization"] = f"token {settings.github_token}"
|
| 318 |
+
|
| 319 |
+
response = requests.get(url, headers=headers, timeout=15)
|
| 320 |
+
if response.status_code == 404:
|
| 321 |
+
return [types.TextContent(type="text", text=f"File not found: {filepath}")]
|
| 322 |
+
response.raise_for_status()
|
| 323 |
+
|
| 324 |
+
lines = response.text.splitlines()
|
| 325 |
+
# Clamp to actual file length
|
| 326 |
+
start = max(1, start)
|
| 327 |
+
end = min(len(lines), end)
|
| 328 |
+
chunk = "\n".join(f"{i+start}: {line}" for i, line in enumerate(lines[start-1:end]))
|
| 329 |
+
return [types.TextContent(
|
| 330 |
+
type="text",
|
| 331 |
+
text=f"# {repo} β {filepath} (lines {start}β{end})\n\n{chunk}",
|
| 332 |
+
)]
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
async def _handle_find_callers(args: dict) -> list[types.TextContent]:
|
| 336 |
+
"""Find call sites by keyword-searching for the function name."""
|
| 337 |
+
retrieval = get_retrieval()
|
| 338 |
+
# Keyword mode is best for exact identifier matching
|
| 339 |
+
results = retrieval.search(
|
| 340 |
+
query=args["function_name"],
|
| 341 |
+
top_k=8,
|
| 342 |
+
repo_filter=args.get("repo"),
|
| 343 |
+
mode="keyword",
|
| 344 |
+
)
|
| 345 |
+
# Filter to chunks that actually contain the name (keyword search may return
|
| 346 |
+
# chunks that share tokens with the name)
|
| 347 |
+
name = args["function_name"]
|
| 348 |
+
callers = [r for r in results if name in r["text"]]
|
| 349 |
+
if not callers:
|
| 350 |
+
return [types.TextContent(type="text", text=f"No call sites found for '{name}'.")]
|
| 351 |
+
return [types.TextContent(
|
| 352 |
+
type="text",
|
| 353 |
+
text=retrieval.format_context(callers),
|
| 354 |
+
)]
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 358 |
+
# RESOURCES
|
| 359 |
+
# Resources are read-only data the LLM (or client) can browse.
|
| 360 |
+
# Unlike tools (LLM calls them to act), resources are like open tabs β
|
| 361 |
+
# the client can read them to build up context.
|
| 362 |
+
#
|
| 363 |
+
# We expose each indexed repo as a resource with a custom URI scheme:
|
| 364 |
+
# rag://repos/karpathy/micrograd
|
| 365 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 366 |
+
|
| 367 |
+
@app.list_resources()
|
| 368 |
+
async def list_resources() -> list[types.Resource]:
|
| 369 |
+
"""Expose indexed repos as browsable resources."""
|
| 370 |
+
store = get_store()
|
| 371 |
+
repos = store.list_repos()
|
| 372 |
+
return [
|
| 373 |
+
types.Resource(
|
| 374 |
+
uri=f"rag://repos/{slug}",
|
| 375 |
+
name=slug,
|
| 376 |
+
description=f"Indexed code from {slug} ({store.count(repo=slug)} chunks)",
|
| 377 |
+
mimeType="text/plain",
|
| 378 |
+
)
|
| 379 |
+
for slug in repos
|
| 380 |
+
]
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
@app.read_resource()
|
| 384 |
+
async def read_resource(uri: str) -> str:
|
| 385 |
+
"""Return a summary for a repo resource."""
|
| 386 |
+
# Parse rag://repos/owner/name
|
| 387 |
+
slug = uri.removeprefix("rag://repos/")
|
| 388 |
+
store = get_store()
|
| 389 |
+
count = store.count(repo=slug)
|
| 390 |
+
return f"Repository: {slug}\nIndexed chunks: {count}\n\nUse the search_code tool to query this repo."
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 394 |
+
# PROMPTS
|
| 395 |
+
# Prompts are reusable templates shown to the user as slash commands in
|
| 396 |
+
# Claude Desktop. The user picks a prompt, fills in arguments, and Claude
|
| 397 |
+
# executes it with the template expanded.
|
| 398 |
+
#
|
| 399 |
+
# This is different from tools (which the LLM calls) and resources (which
|
| 400 |
+
# the client reads). Prompts are USER-INITIATED templates.
|
| 401 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 402 |
+
|
| 403 |
+
@app.list_prompts()
|
| 404 |
+
async def list_prompts() -> list[types.Prompt]:
|
| 405 |
+
return [
|
| 406 |
+
types.Prompt(
|
| 407 |
+
name="explain-function",
|
| 408 |
+
description="Retrieve and explain a specific function from the indexed repos",
|
| 409 |
+
arguments=[
|
| 410 |
+
types.PromptArgument(
|
| 411 |
+
name="function_name",
|
| 412 |
+
description="Name of the function to explain",
|
| 413 |
+
required=True,
|
| 414 |
+
),
|
| 415 |
+
types.PromptArgument(
|
| 416 |
+
name="repo",
|
| 417 |
+
description="Repository slug (optional, e.g. karpathy/micrograd)",
|
| 418 |
+
required=False,
|
| 419 |
+
),
|
| 420 |
+
],
|
| 421 |
+
),
|
| 422 |
+
types.Prompt(
|
| 423 |
+
name="repo-overview",
|
| 424 |
+
description="Generate an architectural overview of an indexed repository",
|
| 425 |
+
arguments=[
|
| 426 |
+
types.PromptArgument(
|
| 427 |
+
name="repo",
|
| 428 |
+
description="Repository slug like 'karpathy/micrograd'",
|
| 429 |
+
required=True,
|
| 430 |
+
),
|
| 431 |
+
],
|
| 432 |
+
),
|
| 433 |
+
]
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
@app.get_prompt()
|
| 437 |
+
async def get_prompt(name: str, arguments: dict) -> types.GetPromptResult:
|
| 438 |
+
if name == "explain-function":
|
| 439 |
+
fn = arguments.get("function_name", "")
|
| 440 |
+
repo = arguments.get("repo", "")
|
| 441 |
+
repo_clause = f" in {repo}" if repo else ""
|
| 442 |
+
return types.GetPromptResult(
|
| 443 |
+
description=f"Explain {fn}",
|
| 444 |
+
messages=[
|
| 445 |
+
types.PromptMessage(
|
| 446 |
+
role="user",
|
| 447 |
+
content=types.TextContent(
|
| 448 |
+
type="text",
|
| 449 |
+
text=(
|
| 450 |
+
f"Use the search_code tool to find the implementation of `{fn}`{repo_clause}. "
|
| 451 |
+
f"Then explain what it does, its parameters, return value, and any important "
|
| 452 |
+
f"implementation details. Cite the source file and line numbers."
|
| 453 |
+
),
|
| 454 |
+
),
|
| 455 |
+
)
|
| 456 |
+
],
|
| 457 |
+
)
|
| 458 |
+
elif name == "repo-overview":
|
| 459 |
+
repo = arguments.get("repo", "")
|
| 460 |
+
return types.GetPromptResult(
|
| 461 |
+
description=f"Overview of {repo}",
|
| 462 |
+
messages=[
|
| 463 |
+
types.PromptMessage(
|
| 464 |
+
role="user",
|
| 465 |
+
content=types.TextContent(
|
| 466 |
+
type="text",
|
| 467 |
+
text=(
|
| 468 |
+
f"Use search_code with repo='{repo}' to explore the codebase. "
|
| 469 |
+
f"Search for: main entry points, core data structures, key abstractions. "
|
| 470 |
+
f"Then write a structured architectural overview covering: "
|
| 471 |
+
f"1) What the project does, 2) Main modules and their responsibilities, "
|
| 472 |
+
f"3) Key data flow, 4) Important design patterns used."
|
| 473 |
+
),
|
| 474 |
+
),
|
| 475 |
+
)
|
| 476 |
+
],
|
| 477 |
+
)
|
| 478 |
+
raise ValueError(f"Unknown prompt: {name}")
|
| 479 |
+
|
| 480 |
+
|
| 481 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 482 |
+
# ENTRY POINT
|
| 483 |
+
# Run as stdio server (for Claude Desktop) or imported for HTTP mode.
|
| 484 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 485 |
+
|
| 486 |
+
async def main():
|
| 487 |
+
"""Run the MCP server over stdio (for Claude Desktop integration)."""
|
| 488 |
+
print("Starting GitHub RAG MCP server...", flush=True)
|
| 489 |
+
async with stdio_server() as (read_stream, write_stream):
|
| 490 |
+
await app.run(
|
| 491 |
+
read_stream,
|
| 492 |
+
write_stream,
|
| 493 |
+
app.create_initialization_options(),
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
|
| 497 |
+
if __name__ == "__main__":
|
| 498 |
+
import asyncio
|
| 499 |
+
asyncio.run(main())
|
|
@@ -24,3 +24,4 @@ anthropic
|
|
| 24 |
# Utilities
|
| 25 |
python-dotenv
|
| 26 |
pydantic
|
|
|
|
|
|
| 24 |
# Utilities
|
| 25 |
python-dotenv
|
| 26 |
pydantic
|
| 27 |
+
mcp
|
|
@@ -1,12 +1,13 @@
|
|
| 1 |
import { useState, useEffect, useRef, useCallback } from "react";
|
| 2 |
import Sidebar from "./components/Sidebar";
|
| 3 |
import Message from "./components/Message";
|
| 4 |
-
import { fetchRepos, streamQuery } from "./api";
|
| 5 |
|
| 6 |
export default function App() {
|
| 7 |
const [repos, setRepos] = useState([]);
|
| 8 |
const [activeRepo, setActiveRepo] = useState(null);
|
| 9 |
const [mode, setMode] = useState("hybrid");
|
|
|
|
| 10 |
const [messages, setMessages] = useState([]);
|
| 11 |
const [input, setInput] = useState("");
|
| 12 |
const [streaming, setStreaming] = useState(false);
|
|
@@ -54,55 +55,109 @@ export default function App() {
|
|
| 54 |
if (!question || streaming) return;
|
| 55 |
setInput("");
|
| 56 |
|
| 57 |
-
// Add user message
|
| 58 |
const userMsg = { role: "user", content: question };
|
| 59 |
-
// Add placeholder assistant message
|
| 60 |
const assistantId = Date.now();
|
| 61 |
const assistantMsg = {
|
| 62 |
id: assistantId, role: "assistant",
|
| 63 |
content: "", sources: [], queryType: null, streaming: true,
|
|
|
|
|
|
|
| 64 |
};
|
| 65 |
setMessages((prev) => [...prev, userMsg, assistantMsg]);
|
| 66 |
setStreaming(true);
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
)
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
stopStream.current = stop;
|
| 108 |
}
|
|
@@ -133,6 +188,8 @@ export default function App() {
|
|
| 133 |
onReposChange={loadRepos}
|
| 134 |
mode={mode}
|
| 135 |
onModeChange={setMode}
|
|
|
|
|
|
|
| 136 |
/>
|
| 137 |
|
| 138 |
<div className="main">
|
|
|
|
| 1 |
import { useState, useEffect, useRef, useCallback } from "react";
|
| 2 |
import Sidebar from "./components/Sidebar";
|
| 3 |
import Message from "./components/Message";
|
| 4 |
+
import { fetchRepos, streamQuery, streamAgentQuery } from "./api";
|
| 5 |
|
| 6 |
export default function App() {
|
| 7 |
const [repos, setRepos] = useState([]);
|
| 8 |
const [activeRepo, setActiveRepo] = useState(null);
|
| 9 |
const [mode, setMode] = useState("hybrid");
|
| 10 |
+
const [agentMode, setAgentMode] = useState(false);
|
| 11 |
const [messages, setMessages] = useState([]);
|
| 12 |
const [input, setInput] = useState("");
|
| 13 |
const [streaming, setStreaming] = useState(false);
|
|
|
|
| 55 |
if (!question || streaming) return;
|
| 56 |
setInput("");
|
| 57 |
|
| 58 |
+
// Add user message + placeholder assistant message
|
| 59 |
const userMsg = { role: "user", content: question };
|
|
|
|
| 60 |
const assistantId = Date.now();
|
| 61 |
const assistantMsg = {
|
| 62 |
id: assistantId, role: "assistant",
|
| 63 |
content: "", sources: [], queryType: null, streaming: true,
|
| 64 |
+
// Agent-mode extras:
|
| 65 |
+
toolCalls: [], currentTool: null, iterations: null,
|
| 66 |
};
|
| 67 |
setMessages((prev) => [...prev, userMsg, assistantMsg]);
|
| 68 |
setStreaming(true);
|
| 69 |
|
| 70 |
+
// ββ Common callbacks ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 71 |
+
const onToken = (token) =>
|
| 72 |
+
setMessages((prev) =>
|
| 73 |
+
prev.map((m) => m.id === assistantId ? { ...m, content: m.content + token } : m)
|
| 74 |
+
);
|
| 75 |
+
|
| 76 |
+
const onError = (err) => {
|
| 77 |
+
setMessages((prev) =>
|
| 78 |
+
prev.map((m) => m.id === assistantId
|
| 79 |
+
? { ...m, content: `Error: ${err}`, streaming: false }
|
| 80 |
+
: m
|
| 81 |
+
)
|
| 82 |
+
);
|
| 83 |
+
setStreaming(false);
|
| 84 |
+
stopStream.current = null;
|
| 85 |
+
};
|
| 86 |
+
|
| 87 |
+
let stop;
|
| 88 |
+
|
| 89 |
+
if (agentMode) {
|
| 90 |
+
// ββ Agent mode: ReAct loop with live tool-call trace ββββββββββββββββββ
|
| 91 |
+
stop = streamAgentQuery({
|
| 92 |
+
question,
|
| 93 |
+
repo: activeRepo,
|
| 94 |
+
onToolCall: (tool, input) => {
|
| 95 |
+
// Show spinner with tool name while agent is calling
|
| 96 |
+
setMessages((prev) =>
|
| 97 |
+
prev.map((m) => m.id === assistantId
|
| 98 |
+
? { ...m, currentTool: tool }
|
| 99 |
+
: m
|
| 100 |
+
)
|
| 101 |
+
);
|
| 102 |
+
// Append to the tool call trace (output will be filled by onToolResult)
|
| 103 |
+
setMessages((prev) =>
|
| 104 |
+
prev.map((m) => m.id === assistantId
|
| 105 |
+
? { ...m, toolCalls: [...m.toolCalls, { tool, input, output: "" }] }
|
| 106 |
+
: m
|
| 107 |
+
)
|
| 108 |
+
);
|
| 109 |
+
},
|
| 110 |
+
onToolResult: (tool, output) => {
|
| 111 |
+
// Fill in the output of the last tool call in the trace
|
| 112 |
+
setMessages((prev) =>
|
| 113 |
+
prev.map((m) => {
|
| 114 |
+
if (m.id !== assistantId) return m;
|
| 115 |
+
const calls = [...m.toolCalls];
|
| 116 |
+
// Find last call for this tool (most recent) and fill its output
|
| 117 |
+
for (let i = calls.length - 1; i >= 0; i--) {
|
| 118 |
+
if (calls[i].tool === tool && !calls[i].output) {
|
| 119 |
+
calls[i] = { ...calls[i], output };
|
| 120 |
+
break;
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
return { ...m, toolCalls: calls, currentTool: "thinking" };
|
| 124 |
+
})
|
| 125 |
+
);
|
| 126 |
+
},
|
| 127 |
+
onToken,
|
| 128 |
+
onDone: (iterations) => {
|
| 129 |
+
setMessages((prev) =>
|
| 130 |
+
prev.map((m) => m.id === assistantId
|
| 131 |
+
? { ...m, streaming: false, currentTool: null, iterations }
|
| 132 |
+
: m
|
| 133 |
+
)
|
| 134 |
+
);
|
| 135 |
+
setStreaming(false);
|
| 136 |
+
stopStream.current = null;
|
| 137 |
+
},
|
| 138 |
+
onError,
|
| 139 |
+
});
|
| 140 |
+
} else {
|
| 141 |
+
// ββ Plain RAG mode: single retrieval β stream tokens ββββββββββββββββββ
|
| 142 |
+
stop = streamQuery({
|
| 143 |
+
question,
|
| 144 |
+
repo: activeRepo,
|
| 145 |
+
mode,
|
| 146 |
+
onToken,
|
| 147 |
+
onSources: (sources, queryType) =>
|
| 148 |
+
setMessages((prev) =>
|
| 149 |
+
prev.map((m) => m.id === assistantId ? { ...m, sources, queryType } : m)
|
| 150 |
+
),
|
| 151 |
+
onDone: () => {
|
| 152 |
+
setMessages((prev) =>
|
| 153 |
+
prev.map((m) => m.id === assistantId ? { ...m, streaming: false } : m)
|
| 154 |
+
);
|
| 155 |
+
setStreaming(false);
|
| 156 |
+
stopStream.current = null;
|
| 157 |
+
},
|
| 158 |
+
onError,
|
| 159 |
+
});
|
| 160 |
+
}
|
| 161 |
|
| 162 |
stopStream.current = stop;
|
| 163 |
}
|
|
|
|
| 188 |
onReposChange={loadRepos}
|
| 189 |
mode={mode}
|
| 190 |
onModeChange={setMode}
|
| 191 |
+
agentMode={agentMode}
|
| 192 |
+
onAgentModeChange={setAgentMode}
|
| 193 |
/>
|
| 194 |
|
| 195 |
<div className="main">
|
|
@@ -70,3 +70,66 @@ export function streamQuery({ question, repo, mode, onToken, onSources, onDone,
|
|
| 70 |
|
| 71 |
return () => es.close();
|
| 72 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
return () => es.close();
|
| 72 |
}
|
| 73 |
+
|
| 74 |
+
/**
|
| 75 |
+
* Stream the agentic RAG loop via SSE.
|
| 76 |
+
*
|
| 77 |
+
* Unlike streamQuery (one retrieval β tokens), this endpoint shows the
|
| 78 |
+
* agent's full ReAct reasoning loop in real time:
|
| 79 |
+
*
|
| 80 |
+
* 1. agent decides to search β event: tool_call
|
| 81 |
+
* 2. result comes back β event: tool_result
|
| 82 |
+
* 3. agent decides to search again (or answer)
|
| 83 |
+
* 4. when done, answer streams token-by-token (default events)
|
| 84 |
+
* 5. event: done signals completion with iteration count
|
| 85 |
+
*
|
| 86 |
+
* Callbacks:
|
| 87 |
+
* onToolCall(tool, input) β agent is calling a tool
|
| 88 |
+
* onToolResult(tool, output) β tool returned a result
|
| 89 |
+
* onToken(text) β token of the final answer
|
| 90 |
+
* onDone(iterations) β agent finished
|
| 91 |
+
* onError(msg) β connection or server error
|
| 92 |
+
*/
|
| 93 |
+
export function streamAgentQuery({ question, repo, onToolCall, onToolResult, onToken, onDone, onError }) {
|
| 94 |
+
const params = new URLSearchParams({
|
| 95 |
+
question,
|
| 96 |
+
...(repo ? { repo } : {}),
|
| 97 |
+
});
|
| 98 |
+
|
| 99 |
+
const es = new EventSource(`${BASE}/agent/stream?${params}`);
|
| 100 |
+
|
| 101 |
+
// Named event: agent is about to call a tool
|
| 102 |
+
es.addEventListener("tool_call", (e) => {
|
| 103 |
+
const { tool, input } = JSON.parse(e.data);
|
| 104 |
+
onToolCall?.(tool, input);
|
| 105 |
+
});
|
| 106 |
+
|
| 107 |
+
// Named event: tool returned a result
|
| 108 |
+
es.addEventListener("tool_result", (e) => {
|
| 109 |
+
const { tool, output } = JSON.parse(e.data);
|
| 110 |
+
onToolResult?.(tool, output);
|
| 111 |
+
});
|
| 112 |
+
|
| 113 |
+
// Named event: agent finished
|
| 114 |
+
es.addEventListener("done", (e) => {
|
| 115 |
+
const { iterations } = JSON.parse(e.data);
|
| 116 |
+
onDone?.(iterations);
|
| 117 |
+
});
|
| 118 |
+
|
| 119 |
+
// Default events: token text (or [DONE] sentinel)
|
| 120 |
+
es.onmessage = (e) => {
|
| 121 |
+
if (e.data === "[DONE]") {
|
| 122 |
+
es.close();
|
| 123 |
+
return;
|
| 124 |
+
}
|
| 125 |
+
const token = e.data.replace(/\\n/g, "\n");
|
| 126 |
+
onToken?.(token);
|
| 127 |
+
};
|
| 128 |
+
|
| 129 |
+
es.onerror = () => {
|
| 130 |
+
es.close();
|
| 131 |
+
onError?.("Agent connection lost");
|
| 132 |
+
};
|
| 133 |
+
|
| 134 |
+
return () => es.close();
|
| 135 |
+
}
|
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import ReactMarkdown from "react-markdown";
|
| 2 |
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
|
| 3 |
import { oneDark } from "react-syntax-highlighter/dist/esm/styles/prism";
|
|
@@ -30,6 +31,69 @@ const mdComponents = {
|
|
| 30 |
},
|
| 31 |
};
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
export default function Message({ msg }) {
|
| 34 |
const isUser = msg.role === "user";
|
| 35 |
|
|
@@ -39,20 +103,36 @@ export default function Message({ msg }) {
|
|
| 39 |
<div className="bubble">{msg.content}</div>
|
| 40 |
) : (
|
| 41 |
<>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
{/* Answer bubble */}
|
| 43 |
<div className="bubble">
|
| 44 |
<ReactMarkdown components={mdComponents}>
|
| 45 |
{msg.content || " "}
|
| 46 |
</ReactMarkdown>
|
| 47 |
-
{msg.streaming && <span className="cursor" />}
|
| 48 |
</div>
|
| 49 |
|
| 50 |
-
{/* Query type badge */}
|
| 51 |
-
{msg.
|
|
|
|
|
|
|
|
|
|
| 52 |
<span className="query-type-badge">{msg.queryType}</span>
|
| 53 |
)}
|
| 54 |
|
| 55 |
-
{/* Sources */}
|
| 56 |
{msg.sources && msg.sources.length > 0 && !msg.streaming && (
|
| 57 |
<div className="sources">
|
| 58 |
<div className="sources-header">
|
|
|
|
| 1 |
+
import { useState } from "react";
|
| 2 |
import ReactMarkdown from "react-markdown";
|
| 3 |
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
|
| 4 |
import { oneDark } from "react-syntax-highlighter/dist/esm/styles/prism";
|
|
|
|
| 31 |
},
|
| 32 |
};
|
| 33 |
|
| 34 |
+
// ToolCallTrace shows the agent's reasoning steps.
|
| 35 |
+
//
|
| 36 |
+
// DURING streaming: shows steps live, expanded, as they accumulate.
|
| 37 |
+
// AFTER completion: collapses to a toggle button to keep the UI clean.
|
| 38 |
+
//
|
| 39 |
+
// This is the "glass box" view β users can watch the LLM reason in real time,
|
| 40 |
+
// see what it searched for, and what it found, step by step.
|
| 41 |
+
function ToolCallTrace({ steps, streaming }) {
|
| 42 |
+
const [expanded, setExpanded] = useState(true);
|
| 43 |
+
if (!steps || steps.length === 0) return null;
|
| 44 |
+
|
| 45 |
+
// Tool name β emoji for quick visual scanning
|
| 46 |
+
const toolIcon = { search_code: "π", get_file_chunk: "π", find_callers: "π" };
|
| 47 |
+
|
| 48 |
+
const stepsEl = (
|
| 49 |
+
<div className="agent-trace-steps">
|
| 50 |
+
{steps.map((step, i) => (
|
| 51 |
+
<div key={i} className={`agent-step ${step.output ? "done" : "pending"}`}>
|
| 52 |
+
<div className="agent-step-header">
|
| 53 |
+
<span className="agent-step-icon">{toolIcon[step.tool] || "βοΈ"}</span>
|
| 54 |
+
<span className="agent-step-tool">{step.tool}</span>
|
| 55 |
+
<span className="agent-step-query">
|
| 56 |
+
{step.input?.query || step.input?.function_name || JSON.stringify(step.input)}
|
| 57 |
+
</span>
|
| 58 |
+
{/* Spinner on the last step while waiting for result */}
|
| 59 |
+
{!step.output && i === steps.length - 1 && (
|
| 60 |
+
<span className="spinner" style={{ marginLeft: "auto", flexShrink: 0 }} />
|
| 61 |
+
)}
|
| 62 |
+
</div>
|
| 63 |
+
{step.output && (
|
| 64 |
+
<div className="agent-step-output">{step.output}</div>
|
| 65 |
+
)}
|
| 66 |
+
</div>
|
| 67 |
+
))}
|
| 68 |
+
</div>
|
| 69 |
+
);
|
| 70 |
+
|
| 71 |
+
if (streaming) {
|
| 72 |
+
// Live view: always expanded while agent is running
|
| 73 |
+
return (
|
| 74 |
+
<div className="agent-trace live">
|
| 75 |
+
<div className="agent-trace-label">
|
| 76 |
+
β¦ Agent reasoning Β· {steps.length} step{steps.length !== 1 ? "s" : ""}
|
| 77 |
+
</div>
|
| 78 |
+
{stepsEl}
|
| 79 |
+
</div>
|
| 80 |
+
);
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
// Collapsed view after completion
|
| 84 |
+
return (
|
| 85 |
+
<div className="agent-trace">
|
| 86 |
+
<button
|
| 87 |
+
className="agent-trace-toggle"
|
| 88 |
+
onClick={() => setExpanded((v) => !v)}
|
| 89 |
+
>
|
| 90 |
+
{expanded ? "βΌ" : "βΆ"} Reasoning trace Β· {steps.length} step{steps.length !== 1 ? "s" : ""}
|
| 91 |
+
</button>
|
| 92 |
+
{expanded && stepsEl}
|
| 93 |
+
</div>
|
| 94 |
+
);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
export default function Message({ msg }) {
|
| 98 |
const isUser = msg.role === "user";
|
| 99 |
|
|
|
|
| 103 |
<div className="bubble">{msg.content}</div>
|
| 104 |
) : (
|
| 105 |
<>
|
| 106 |
+
{/* Agent reasoning trace β live during streaming, collapsible after */}
|
| 107 |
+
{msg.toolCalls && msg.toolCalls.length > 0 && (
|
| 108 |
+
<ToolCallTrace steps={msg.toolCalls} streaming={msg.streaming} />
|
| 109 |
+
)}
|
| 110 |
+
|
| 111 |
+
{/* "Thinkingβ¦" shown before the first tool call fires */}
|
| 112 |
+
{msg.streaming && msg.currentTool === null && !msg.content && (!msg.toolCalls || msg.toolCalls.length === 0) && (
|
| 113 |
+
<div className="agent-thinking">
|
| 114 |
+
<span className="spinner" />
|
| 115 |
+
Thinkingβ¦
|
| 116 |
+
</div>
|
| 117 |
+
)}
|
| 118 |
+
|
| 119 |
{/* Answer bubble */}
|
| 120 |
<div className="bubble">
|
| 121 |
<ReactMarkdown components={mdComponents}>
|
| 122 |
{msg.content || " "}
|
| 123 |
</ReactMarkdown>
|
| 124 |
+
{msg.streaming && !msg.currentTool && <span className="cursor" />}
|
| 125 |
</div>
|
| 126 |
|
| 127 |
+
{/* Query type badge or agent iterations badge */}
|
| 128 |
+
{!msg.streaming && msg.iterations && (
|
| 129 |
+
<span className="query-type-badge">agent Β· {msg.iterations} step{msg.iterations !== 1 ? "s" : ""}</span>
|
| 130 |
+
)}
|
| 131 |
+
{!msg.streaming && msg.queryType && !msg.iterations && (
|
| 132 |
<span className="query-type-badge">{msg.queryType}</span>
|
| 133 |
)}
|
| 134 |
|
| 135 |
+
{/* Sources (only for non-agent RAG) */}
|
| 136 |
{msg.sources && msg.sources.length > 0 && !msg.streaming && (
|
| 137 |
<div className="sources">
|
| 138 |
<div className="sources-header">
|
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import { useState } from "react";
|
| 2 |
import { ingestRepo, deleteRepo } from "../api";
|
| 3 |
|
| 4 |
-
export default function Sidebar({ repos, activeRepo, onSelectRepo, onReposChange, mode, onModeChange }) {
|
| 5 |
const [url, setUrl] = useState("");
|
| 6 |
const [status, setStatus] = useState(null); // {type, text}
|
| 7 |
const [loading, setLoading] = useState(false);
|
|
@@ -61,22 +61,46 @@ export default function Sidebar({ repos, activeRepo, onSelectRepo, onReposChange
|
|
| 61 |
)}
|
| 62 |
</div>
|
| 63 |
|
| 64 |
-
{/* ββ
|
| 65 |
<div>
|
| 66 |
-
<div className="section-label">
|
|
|
|
| 67 |
<div className="mode-pills">
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
</div>
|
| 78 |
</div>
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
{/* ββ Repos ββ */}
|
| 81 |
<div style={{ flex: 1 }}>
|
| 82 |
<div className="section-label">Indexed Repos ({repos.length})</div>
|
|
|
|
| 1 |
import { useState } from "react";
|
| 2 |
import { ingestRepo, deleteRepo } from "../api";
|
| 3 |
|
| 4 |
+
export default function Sidebar({ repos, activeRepo, onSelectRepo, onReposChange, mode, onModeChange, agentMode, onAgentModeChange }) {
|
| 5 |
const [url, setUrl] = useState("");
|
| 6 |
const [status, setStatus] = useState(null); // {type, text}
|
| 7 |
const [loading, setLoading] = useState(false);
|
|
|
|
| 61 |
)}
|
| 62 |
</div>
|
| 63 |
|
| 64 |
+
{/* ββ Query mode (RAG vs Agent) ββ */}
|
| 65 |
<div>
|
| 66 |
+
<div className="section-label">Query Mode</div>
|
| 67 |
+
{/* Agent mode toggle β switches between plain RAG and agentic ReAct loop */}
|
| 68 |
<div className="mode-pills">
|
| 69 |
+
<button
|
| 70 |
+
className={`pill ${!agentMode ? "active" : ""}`}
|
| 71 |
+
onClick={() => onAgentModeChange(false)}
|
| 72 |
+
title="Single retrieval, fast answer"
|
| 73 |
+
>
|
| 74 |
+
RAG
|
| 75 |
+
</button>
|
| 76 |
+
<button
|
| 77 |
+
className={`pill ${agentMode ? "active" : ""}`}
|
| 78 |
+
onClick={() => onAgentModeChange(true)}
|
| 79 |
+
title="Multi-step reasoning, more thorough"
|
| 80 |
+
>
|
| 81 |
+
Agent β¦
|
| 82 |
+
</button>
|
| 83 |
</div>
|
| 84 |
</div>
|
| 85 |
|
| 86 |
+
{/* ββ Search mode (only visible in RAG mode) ββ */}
|
| 87 |
+
{!agentMode && (
|
| 88 |
+
<div>
|
| 89 |
+
<div className="section-label">Search Mode</div>
|
| 90 |
+
<div className="mode-pills">
|
| 91 |
+
{["hybrid", "semantic", "keyword"].map((m) => (
|
| 92 |
+
<button
|
| 93 |
+
key={m}
|
| 94 |
+
className={`pill ${mode === m ? "active" : ""}`}
|
| 95 |
+
onClick={() => onModeChange(m)}
|
| 96 |
+
>
|
| 97 |
+
{m}
|
| 98 |
+
</button>
|
| 99 |
+
))}
|
| 100 |
+
</div>
|
| 101 |
+
</div>
|
| 102 |
+
)}
|
| 103 |
+
|
| 104 |
{/* ββ Repos ββ */}
|
| 105 |
<div style={{ flex: 1 }}>
|
| 106 |
<div className="section-label">Indexed Repos ({repos.length})</div>
|
|
@@ -428,6 +428,112 @@ body {
|
|
| 428 |
}
|
| 429 |
@keyframes blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
|
| 430 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
/* ββ Scrollbar βββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 432 |
::-webkit-scrollbar { width: 6px; }
|
| 433 |
::-webkit-scrollbar-track { background: transparent; }
|
|
|
|
| 428 |
}
|
| 429 |
@keyframes blink { 0%, 100% { opacity: 1; } 50% { opacity: 0; } }
|
| 430 |
|
| 431 |
+
/* ββ Agent trace βββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 432 |
+
/* The collapsible "Reasoning trace" block shown above the answer
|
| 433 |
+
in agent mode β lets users see exactly what the LLM searched. */
|
| 434 |
+
.agent-trace {
|
| 435 |
+
width: 100%;
|
| 436 |
+
max-width: 760px;
|
| 437 |
+
margin-bottom: 8px;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
.agent-trace.live {
|
| 441 |
+
/* Highlighted border when live reasoning is in progress */
|
| 442 |
+
border: 1px solid var(--accent);
|
| 443 |
+
border-radius: 8px;
|
| 444 |
+
padding: 8px 12px;
|
| 445 |
+
background: var(--accent-dim);
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
.agent-trace-label {
|
| 449 |
+
font-size: 11px;
|
| 450 |
+
font-weight: 600;
|
| 451 |
+
color: var(--accent);
|
| 452 |
+
text-transform: uppercase;
|
| 453 |
+
letter-spacing: 0.06em;
|
| 454 |
+
margin-bottom: 8px;
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
.agent-trace-toggle {
|
| 458 |
+
background: none;
|
| 459 |
+
border: 1px solid var(--border);
|
| 460 |
+
border-radius: 6px;
|
| 461 |
+
color: var(--muted);
|
| 462 |
+
cursor: pointer;
|
| 463 |
+
font-size: 11px;
|
| 464 |
+
font-family: inherit;
|
| 465 |
+
padding: 4px 10px;
|
| 466 |
+
transition: border-color 0.15s, color 0.15s;
|
| 467 |
+
}
|
| 468 |
+
.agent-trace-toggle:hover { border-color: var(--accent); color: var(--accent); }
|
| 469 |
+
|
| 470 |
+
.agent-trace-steps {
|
| 471 |
+
margin-top: 6px;
|
| 472 |
+
display: flex;
|
| 473 |
+
flex-direction: column;
|
| 474 |
+
gap: 4px;
|
| 475 |
+
}
|
| 476 |
+
|
| 477 |
+
.agent-step {
|
| 478 |
+
background: var(--surface);
|
| 479 |
+
border: 1px solid var(--border);
|
| 480 |
+
border-radius: 6px;
|
| 481 |
+
padding: 7px 12px;
|
| 482 |
+
font-size: 12px;
|
| 483 |
+
transition: border-color 0.2s;
|
| 484 |
+
}
|
| 485 |
+
.agent-step.pending { border-color: var(--accent); }
|
| 486 |
+
.agent-step.done { opacity: 0.85; }
|
| 487 |
+
|
| 488 |
+
.agent-step-header {
|
| 489 |
+
display: flex;
|
| 490 |
+
align-items: center;
|
| 491 |
+
gap: 6px;
|
| 492 |
+
}
|
| 493 |
+
|
| 494 |
+
.agent-step-icon { font-size: 13px; }
|
| 495 |
+
|
| 496 |
+
.agent-step-tool {
|
| 497 |
+
font-family: "JetBrains Mono", monospace;
|
| 498 |
+
font-size: 11px;
|
| 499 |
+
font-weight: 600;
|
| 500 |
+
color: var(--accent);
|
| 501 |
+
white-space: nowrap;
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
.agent-step-query {
|
| 505 |
+
color: var(--text);
|
| 506 |
+
font-size: 12px;
|
| 507 |
+
white-space: nowrap;
|
| 508 |
+
overflow: hidden;
|
| 509 |
+
text-overflow: ellipsis;
|
| 510 |
+
flex: 1;
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
.agent-step-output {
|
| 514 |
+
margin-top: 5px;
|
| 515 |
+
color: var(--muted);
|
| 516 |
+
font-size: 11px;
|
| 517 |
+
font-family: "JetBrains Mono", monospace;
|
| 518 |
+
white-space: pre-wrap;
|
| 519 |
+
word-break: break-word;
|
| 520 |
+
max-height: 80px;
|
| 521 |
+
overflow: hidden;
|
| 522 |
+
mask-image: linear-gradient(to bottom, black 60%, transparent 100%);
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
/* Live "agent is thinking" indicator shown while tool calls are in flight */
|
| 526 |
+
.agent-thinking {
|
| 527 |
+
display: flex;
|
| 528 |
+
align-items: center;
|
| 529 |
+
gap: 8px;
|
| 530 |
+
font-size: 12px;
|
| 531 |
+
color: var(--muted);
|
| 532 |
+
margin-bottom: 8px;
|
| 533 |
+
width: 100%;
|
| 534 |
+
max-width: 760px;
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
/* ββ Scrollbar βββββββββββββββββββββββββββββββββββββββββββββββββ */
|
| 538 |
::-webkit-scrollbar { width: 6px; }
|
| 539 |
::-webkit-scrollbar-track { background: transparent; }
|