| """REPOMIND β HuggingFace Space entry point. |
| |
| Public demo. Auto-detects backend from environment variables (Steve Kimoi's |
| canonical lablab/AMD tutorial pattern): |
| |
| VLLM_BASE_URL β set in Space β Settings β Variables and secrets |
| to point at a live MI300X vLLM endpoint, e.g. |
| http://<your-droplet-ip>:8000/v1 |
| MODEL_NAME β model id served by vLLM, defaults to |
| Qwen/Qwen3-Coder-Next-FP8 |
| |
| When VLLM_BASE_URL is unset (default), the Space runs the offline mock |
| backend on CPU-basic so it stays free 24/7. When set, the Space wires |
| through to the live AMD MI300X for real inference. |
| |
| Local repo: https://github.com/SRKRZ23/repomind |
| Hackathon: https://lablab.ai/ai-hackathons/amd-developer |
| """ |
| from __future__ import annotations |
| import json |
| import os |
| import sys |
| import tempfile |
| from pathlib import Path |
|
|
| |
| sys.path.insert(0, str(Path(__file__).resolve().parent)) |
|
|
| import gradio as gr |
|
|
| from ingestion.chunker import ingest_to_json |
| from ingestion.cloner import clone |
|
|
|
|
| |
| VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "").strip() |
| MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen3-Coder-Next-FP8").strip() |
| LIVE_BACKEND = bool(VLLM_BASE_URL) |
| BACKEND_LABEL = "π’ Live AMD MI300X" if LIVE_BACKEND else "π‘ Mock backend (CPU-basic, demo mode)" |
| BACKEND_HINT = ( |
| f"Connected to vLLM endpoint: `{VLLM_BASE_URL}` Β· model `{MODEL_NAME}`" |
| if LIVE_BACKEND else |
| "Set the Space secrets `VLLM_BASE_URL` + `MODEL_NAME` to wire a real MI300X backend." |
| ) |
|
|
|
|
| HEADER_MD = f""" |
| # REPOMIND |
| **Open-source repo-scale coding agent on AMD MI300X.** |
| |
| Ingest a git repository (up to 256K tokens, FP8) on a single GPU and |
| reason across the whole codebase with multi-step tool use. |
| |
| > π¦ GitHub: <a href="https://github.com/SRKRZ23/repomind" target="_blank" rel="noopener noreferrer">SRKRZ23/repomind</a> Β· MIT |
| > π Built for the <a href="https://lablab.ai/ai-hackathons/amd-developer" target="_blank" rel="noopener noreferrer">AMD Developer Hackathon 2026</a> |
| > π€ HF Special Prize candidate Β· π‘ Conservative claim discipline applied |
| |
| ### Why AMD MI300X (verified 2026-05-05 on real hardware) |
| |
| - Qwen3-Coder-Next-FP8 weights = **77.29 GiB** in VRAM (verified) |
| - 256K KV cache @ FP8 = **94.58 GiB** available (2,065,744 tokens, verified) |
| - Activations + framework overhead β peak 176/191.7 GiB β **92% utilization** |
| - NVIDIA H100 80 GB cannot accommodate this on a single card by VRAM |
| accounting (~143 GB > 80 GB); MI300X 192 GB has the headroom |
| |
| ### Status |
| |
| **Backend right now**: {BACKEND_LABEL} |
| |
| {BACKEND_HINT} |
| """ |
|
|
|
|
| |
| MAX_INGEST_SIZE_MB = 50 |
| SCRATCH_DIR = Path(tempfile.gettempdir()) / "repomind_hf" |
| SCRATCH_DIR.mkdir(exist_ok=True) |
|
|
|
|
| def ingest(url_or_path: str, chunk_tokens: int) -> str: |
| if not url_or_path or not url_or_path.strip(): |
| return "Provide a GitHub URL or `owner/repo` shorthand." |
| out = SCRATCH_DIR / "active.json" |
| try: |
| |
| if Path(url_or_path).is_dir(): |
| repo_root = Path(url_or_path) |
| label = repo_root.name |
| else: |
| res = clone(url_or_path, cache_dir=SCRATCH_DIR / "repos") |
| repo_root = res.local_path |
| label = res.url.rsplit("/", 1)[-1].removesuffix(".git") |
| summary = ingest_to_json( |
| repo_root, |
| out, |
| repo_label=label, |
| max_tokens_per_chunk=chunk_tokens, |
| ) |
| return json.dumps(summary, indent=2) |
| except Exception as e: |
| return f"β {type(e).__name__}: {e}" |
|
|
|
|
| def _build_llm(): |
| """Return an LLM client based on env-var configuration.""" |
| if LIVE_BACKEND: |
| from serving.vllm_client import VLLMClient |
| return VLLMClient(base_url=VLLM_BASE_URL, model=MODEL_NAME) |
| from serving.mock_client import MockClient |
| return MockClient(max_tool_turns=2) |
|
|
|
|
| def ask(question: str): |
| summary_path = SCRATCH_DIR / "active.json" |
| if not summary_path.exists(): |
| return "Ingest a repo first.", "" |
| if not question or not question.strip(): |
| return "Type a question.", "" |
|
|
| summary = json.loads(summary_path.read_text()) |
| repo_root = Path(summary.get("root", ".")) |
|
|
| try: |
| llm = _build_llm() |
| except Exception as e: |
| return f"β failed to init LLM client: {type(e).__name__}: {e}", "" |
|
|
| from agent.loop import Agent |
| from tools.registry import default_registry |
|
|
| try: |
| agent = Agent( |
| llm=llm, |
| tools=default_registry(repo_root, scratch_dir=SCRATCH_DIR / "scratch"), |
| max_steps=4, |
| ) |
| result = agent.run(question, summary) |
| except Exception as e: |
| return f"β agent failed: {type(e).__name__}: {e}", "" |
|
|
| trace_lines = [ |
| f"- {tc['name']} {json.dumps(tc['arguments'], ensure_ascii=False)}" |
| for tc in result.tool_calls |
| ] |
| trace = "\n".join(trace_lines) or "(no tool calls)" |
| return result.answer, trace |
|
|
|
|
| with gr.Blocks( |
| title="REPOMIND β repo-scale coding agent on AMD MI300X", |
| ) as demo: |
| gr.Markdown(HEADER_MD) |
|
|
| with gr.Tab("1. Ingest"): |
| gr.Markdown( |
| "Paste any **GitHub URL** or `owner/repo` shorthand. " |
| "REPOMIND clones it, parses the source files, and chunks them " |
| "into priority-ranked sections (README first, then top-level " |
| "symbols, then nested code, then tests)." |
| ) |
| with gr.Row(): |
| url = gr.Textbox( |
| label="GitHub URL or owner/repo", |
| placeholder="https://github.com/pallets/flask OR pallets/flask", |
| scale=4, |
| ) |
| chunk_tokens = gr.Slider( |
| 256, 4096, value=1024, step=128, label="Tokens / chunk", scale=1 |
| ) |
| ingest_btn = gr.Button("Ingest", variant="primary") |
| ingest_out = gr.Code(label="Ingestion summary", language="json") |
| ingest_btn.click(ingest, [url, chunk_tokens], ingest_out) |
|
|
| gr.Markdown( |
| "**Examples that work on a single MI300X**: " |
| "`pallets/flask` (~408K tokens, fits in 256K window with priority chunking) Β· " |
| "`pytorch/vision` (~1.3M tokens, trimmed to 180K of highest-priority " |
| "content via the chunker) Β· this repo `SRKRZ23/repomind` (~68K tokens, fits whole)." |
| ) |
|
|
| with gr.Tab("2. Ask"): |
| gr.Markdown( |
| f"Ask any question about the ingested repo. The agent runs an " |
| f"SC-TIR loop (PLAN β CALL TOOL β OBSERVE β THINK β ANSWER) with " |
| f"five tools: `read_file`, `grep_codebase`, `execute_code` " |
| f"(sandboxed), `run_tests`, `git_log`.\n\n" |
| f"**Backend**: {BACKEND_LABEL}" |
| ) |
| question = gr.Textbox( |
| label="Question", |
| lines=3, |
| placeholder=( |
| "Where is the WSGI entry point? Β· " |
| "What does the chunker prioritize? Β· " |
| "Trace one slab allocation through the call graph." |
| ), |
| ) |
| ask_btn = gr.Button("Ask", variant="primary") |
| answer = gr.Markdown(label="Answer") |
| tool_trace = gr.Code(label="Tool trace (agent steps)", language="markdown") |
|
|
| ask_btn.click(ask, [question], [answer, tool_trace]) |
|
|
| with gr.Tab("3. Verified evidence"): |
| gr.Markdown( |
| "REPOMIND was stress-tested on a real AMD MI300X x1 droplet across " |
| "two sessions (**2026-05-05 / 2026-05-06**, 124 min total, $4.12). " |
| "Highlights:\n\n" |
| "| Test | Result |\n" |
| "|---|---|\n" |
| "| Memory peak | 176/191.7 GiB (92%) |\n" |
| "| `--max-model-len 262144` | started clean |\n" |
| "| Concurrency 8K / 16K / 32K / 64K @ N=31 | **31/31 success at every context** β
|\n" |
| "| Concurrency 128K @ N=31 | 25/31 (6 timeouts past 15 min) |\n" |
| "| Long-context needle at 200K | **3/3** pass (early/middle/late) |\n" |
| "| End-to-end repo Q&A | **9/9** correct across 3 repos |\n" |
| "| Largest repo tested | **pytorch/vision (1.3M tokens)** |\n" |
| "| Tuning attempt: AITER backend | regression β 137/144 cells broken under FP8 KV cache; default Triton stays production-safe |\n" |
| "| Cost | $1.99/hr cloud, $45.75/1M completion tokens |\n\n" |
| "Full evidence pack β JSON results, plots, raw model outputs β is at " |
| '<a href="https://github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test" target="_blank" rel="noopener noreferrer">github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test</a>. ' |
| "Extended PHASE 1+2 narrative + AITER A/B in the " |
| '<a href="https://github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test/extended" target="_blank" rel="noopener noreferrer">extended/SUMMARY.md</a>.' |
| ) |
|
|
| gr.HTML( |
| """ |
| <hr/> |
| <p><strong>Author:</strong> Sardor Razikov β Tashkent πΊπΏ</p> |
| <p> |
| <a href="https://github.com/SRKRZ23/repomind" target="_blank" rel="noopener noreferrer">GitHub</a> Β· |
| <a href="https://www.linkedin.com/in/sardor-razikov-569a5327b" target="_blank" rel="noopener noreferrer">LinkedIn</a> Β· |
| <a href="https://x.com/SardorRazi99093" target="_blank" rel="noopener noreferrer">X / Twitter</a> Β· |
| <a href="https://doi.org/10.5281/zenodo.19791329" target="_blank" rel="noopener noreferrer">Zenodo (ECB)</a> |
| </p> |
| <p>π§ |
| <a href="mailto:razikovsardor1@gmail.com">razikovsardor1@gmail.com</a> Β· |
| <a href="mailto:razikovs777@gmail.com">razikovs777@gmail.com</a> |
| </p> |
| <p><em>If the MI300X memory-architecture story resonates, |
| <strong>a like on this Space helps with the Hugging Face Special Prize judging.</strong> π€</em></p> |
| """ |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch(theme=gr.themes.Soft(primary_hue="red", secondary_hue="gray")) |
|
|