repomind / app.py
ZeroR3's picture
fix: links iframe-safe β€” switch markdown links to HTML <a target=_blank>; footer uses gr.HTML for raw anchor support
16bce8b
"""REPOMIND β€” HuggingFace Space entry point.
Public demo. Auto-detects backend from environment variables (Steve Kimoi's
canonical lablab/AMD tutorial pattern):
VLLM_BASE_URL β€” set in Space β†’ Settings β†’ Variables and secrets
to point at a live MI300X vLLM endpoint, e.g.
http://<your-droplet-ip>:8000/v1
MODEL_NAME β€” model id served by vLLM, defaults to
Qwen/Qwen3-Coder-Next-FP8
When VLLM_BASE_URL is unset (default), the Space runs the offline mock
backend on CPU-basic so it stays free 24/7. When set, the Space wires
through to the live AMD MI300X for real inference.
Local repo: https://github.com/SRKRZ23/repomind
Hackathon: https://lablab.ai/ai-hackathons/amd-developer
"""
from __future__ import annotations
import json
import os
import sys
import tempfile
from pathlib import Path
# make submodules importable
sys.path.insert(0, str(Path(__file__).resolve().parent))
import gradio as gr
from ingestion.chunker import ingest_to_json
from ingestion.cloner import clone
# ─── Configuration via env vars (Steve Kimoi tutorial pattern) ────────────
VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "").strip()
MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen3-Coder-Next-FP8").strip()
LIVE_BACKEND = bool(VLLM_BASE_URL)
BACKEND_LABEL = "🟒 Live AMD MI300X" if LIVE_BACKEND else "🟑 Mock backend (CPU-basic, demo mode)"
BACKEND_HINT = (
f"Connected to vLLM endpoint: `{VLLM_BASE_URL}` Β· model `{MODEL_NAME}`"
if LIVE_BACKEND else
"Set the Space secrets `VLLM_BASE_URL` + `MODEL_NAME` to wire a real MI300X backend."
)
HEADER_MD = f"""
# REPOMIND
**Open-source repo-scale coding agent on AMD MI300X.**
Ingest a git repository (up to 256K tokens, FP8) on a single GPU and
reason across the whole codebase with multi-step tool use.
> πŸ“¦ GitHub: <a href="https://github.com/SRKRZ23/repomind" target="_blank" rel="noopener noreferrer">SRKRZ23/repomind</a> Β· MIT
> πŸ† Built for the <a href="https://lablab.ai/ai-hackathons/amd-developer" target="_blank" rel="noopener noreferrer">AMD Developer Hackathon 2026</a>
> πŸ€— HF Special Prize candidate Β· πŸ›‘ Conservative claim discipline applied
### Why AMD MI300X (verified 2026-05-05 on real hardware)
- Qwen3-Coder-Next-FP8 weights = **77.29 GiB** in VRAM (verified)
- 256K KV cache @ FP8 = **94.58 GiB** available (2,065,744 tokens, verified)
- Activations + framework overhead β†’ peak 176/191.7 GiB β‰ˆ **92% utilization**
- NVIDIA H100 80 GB cannot accommodate this on a single card by VRAM
accounting (~143 GB > 80 GB); MI300X 192 GB has the headroom
### Status
**Backend right now**: {BACKEND_LABEL}
{BACKEND_HINT}
"""
# Minimal cap β€” HF Space CPU-basic gets 16 GB RAM. Don't blow it on giant repos.
MAX_INGEST_SIZE_MB = 50
SCRATCH_DIR = Path(tempfile.gettempdir()) / "repomind_hf"
SCRATCH_DIR.mkdir(exist_ok=True)
def ingest(url_or_path: str, chunk_tokens: int) -> str:
if not url_or_path or not url_or_path.strip():
return "Provide a GitHub URL or `owner/repo` shorthand."
out = SCRATCH_DIR / "active.json"
try:
# Local path mode (rare on HF β€” usually URL)
if Path(url_or_path).is_dir():
repo_root = Path(url_or_path)
label = repo_root.name
else:
res = clone(url_or_path, cache_dir=SCRATCH_DIR / "repos")
repo_root = res.local_path
label = res.url.rsplit("/", 1)[-1].removesuffix(".git")
summary = ingest_to_json(
repo_root,
out,
repo_label=label,
max_tokens_per_chunk=chunk_tokens,
)
return json.dumps(summary, indent=2)
except Exception as e:
return f"❌ {type(e).__name__}: {e}"
def _build_llm():
"""Return an LLM client based on env-var configuration."""
if LIVE_BACKEND:
from serving.vllm_client import VLLMClient
return VLLMClient(base_url=VLLM_BASE_URL, model=MODEL_NAME)
from serving.mock_client import MockClient
return MockClient(max_tool_turns=2)
def ask(question: str):
summary_path = SCRATCH_DIR / "active.json"
if not summary_path.exists():
return "Ingest a repo first.", ""
if not question or not question.strip():
return "Type a question.", ""
summary = json.loads(summary_path.read_text())
repo_root = Path(summary.get("root", "."))
try:
llm = _build_llm()
except Exception as e:
return f"❌ failed to init LLM client: {type(e).__name__}: {e}", ""
from agent.loop import Agent
from tools.registry import default_registry
try:
agent = Agent(
llm=llm,
tools=default_registry(repo_root, scratch_dir=SCRATCH_DIR / "scratch"),
max_steps=4,
)
result = agent.run(question, summary)
except Exception as e:
return f"❌ agent failed: {type(e).__name__}: {e}", ""
trace_lines = [
f"- {tc['name']} {json.dumps(tc['arguments'], ensure_ascii=False)}"
for tc in result.tool_calls
]
trace = "\n".join(trace_lines) or "(no tool calls)"
return result.answer, trace
with gr.Blocks(
title="REPOMIND β€” repo-scale coding agent on AMD MI300X",
) as demo:
gr.Markdown(HEADER_MD)
with gr.Tab("1. Ingest"):
gr.Markdown(
"Paste any **GitHub URL** or `owner/repo` shorthand. "
"REPOMIND clones it, parses the source files, and chunks them "
"into priority-ranked sections (README first, then top-level "
"symbols, then nested code, then tests)."
)
with gr.Row():
url = gr.Textbox(
label="GitHub URL or owner/repo",
placeholder="https://github.com/pallets/flask OR pallets/flask",
scale=4,
)
chunk_tokens = gr.Slider(
256, 4096, value=1024, step=128, label="Tokens / chunk", scale=1
)
ingest_btn = gr.Button("Ingest", variant="primary")
ingest_out = gr.Code(label="Ingestion summary", language="json")
ingest_btn.click(ingest, [url, chunk_tokens], ingest_out)
gr.Markdown(
"**Examples that work on a single MI300X**: "
"`pallets/flask` (~408K tokens, fits in 256K window with priority chunking) Β· "
"`pytorch/vision` (~1.3M tokens, trimmed to 180K of highest-priority "
"content via the chunker) Β· this repo `SRKRZ23/repomind` (~68K tokens, fits whole)."
)
with gr.Tab("2. Ask"):
gr.Markdown(
f"Ask any question about the ingested repo. The agent runs an "
f"SC-TIR loop (PLAN β†’ CALL TOOL β†’ OBSERVE β†’ THINK β†’ ANSWER) with "
f"five tools: `read_file`, `grep_codebase`, `execute_code` "
f"(sandboxed), `run_tests`, `git_log`.\n\n"
f"**Backend**: {BACKEND_LABEL}"
)
question = gr.Textbox(
label="Question",
lines=3,
placeholder=(
"Where is the WSGI entry point? Β· "
"What does the chunker prioritize? Β· "
"Trace one slab allocation through the call graph."
),
)
ask_btn = gr.Button("Ask", variant="primary")
answer = gr.Markdown(label="Answer")
tool_trace = gr.Code(label="Tool trace (agent steps)", language="markdown")
ask_btn.click(ask, [question], [answer, tool_trace])
with gr.Tab("3. Verified evidence"):
gr.Markdown(
"REPOMIND was stress-tested on a real AMD MI300X x1 droplet across "
"two sessions (**2026-05-05 / 2026-05-06**, 124 min total, $4.12). "
"Highlights:\n\n"
"| Test | Result |\n"
"|---|---|\n"
"| Memory peak | 176/191.7 GiB (92%) |\n"
"| `--max-model-len 262144` | started clean |\n"
"| Concurrency 8K / 16K / 32K / 64K @ N=31 | **31/31 success at every context** βœ… |\n"
"| Concurrency 128K @ N=31 | 25/31 (6 timeouts past 15 min) |\n"
"| Long-context needle at 200K | **3/3** pass (early/middle/late) |\n"
"| End-to-end repo Q&A | **9/9** correct across 3 repos |\n"
"| Largest repo tested | **pytorch/vision (1.3M tokens)** |\n"
"| Tuning attempt: AITER backend | regression β€” 137/144 cells broken under FP8 KV cache; default Triton stays production-safe |\n"
"| Cost | $1.99/hr cloud, $45.75/1M completion tokens |\n\n"
"Full evidence pack β€” JSON results, plots, raw model outputs β€” is at "
'<a href="https://github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test" target="_blank" rel="noopener noreferrer">github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test</a>. '
"Extended PHASE 1+2 narrative + AITER A/B in the "
'<a href="https://github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test/extended" target="_blank" rel="noopener noreferrer">extended/SUMMARY.md</a>.'
)
gr.HTML(
"""
<hr/>
<p><strong>Author:</strong> Sardor Razikov β€” Tashkent πŸ‡ΊπŸ‡Ώ</p>
<p>
<a href="https://github.com/SRKRZ23/repomind" target="_blank" rel="noopener noreferrer">GitHub</a> Β·
<a href="https://www.linkedin.com/in/sardor-razikov-569a5327b" target="_blank" rel="noopener noreferrer">LinkedIn</a> Β·
<a href="https://x.com/SardorRazi99093" target="_blank" rel="noopener noreferrer">X / Twitter</a> Β·
<a href="https://doi.org/10.5281/zenodo.19791329" target="_blank" rel="noopener noreferrer">Zenodo (ECB)</a>
</p>
<p>πŸ“§
<a href="mailto:razikovsardor1@gmail.com">razikovsardor1@gmail.com</a> Β·
<a href="mailto:razikovs777@gmail.com">razikovs777@gmail.com</a>
</p>
<p><em>If the MI300X memory-architecture story resonates,
<strong>a like on this Space helps with the Hugging Face Special Prize judging.</strong> πŸ€—</em></p>
"""
)
if __name__ == "__main__":
demo.launch(theme=gr.themes.Soft(primary_hue="red", secondary_hue="gray"))