repomind

Running

App Files Files Community

repomind / app.py

ZeroR3

fix: links iframe-safe — switch markdown links to HTML <a target=_blank>; footer uses gr.HTML for raw anchor support

16bce8b 1 day ago

raw

history blame contribute delete

10.3 kB

	"""REPOMIND — HuggingFace Space entry point.

	Public demo. Auto-detects backend from environment variables (Steve Kimoi's
	canonical lablab/AMD tutorial pattern):

	VLLM_BASE_URL — set in Space → Settings → Variables and secrets
	to point at a live MI300X vLLM endpoint, e.g.
	http://<your-droplet-ip>:8000/v1
	MODEL_NAME — model id served by vLLM, defaults to
	Qwen/Qwen3-Coder-Next-FP8

	When VLLM_BASE_URL is unset (default), the Space runs the offline mock
	backend on CPU-basic so it stays free 24/7. When set, the Space wires
	through to the live AMD MI300X for real inference.

	Local repo: https://github.com/SRKRZ23/repomind
	Hackathon: https://lablab.ai/ai-hackathons/amd-developer
	"""
	from __future__ import annotations
	import json
	import os
	import sys
	import tempfile
	from pathlib import Path

	# make submodules importable
	sys.path.insert(0, str(Path(__file__).resolve().parent))

	import gradio as gr

	from ingestion.chunker import ingest_to_json
	from ingestion.cloner import clone


	# ─── Configuration via env vars (Steve Kimoi tutorial pattern) ────────────
	VLLM_BASE_URL = os.environ.get("VLLM_BASE_URL", "").strip()
	MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen3-Coder-Next-FP8").strip()
	LIVE_BACKEND = bool(VLLM_BASE_URL)
	BACKEND_LABEL = "🟢 Live AMD MI300X" if LIVE_BACKEND else "🟡 Mock backend (CPU-basic, demo mode)"
	BACKEND_HINT = (
	f"Connected to vLLM endpoint: `{VLLM_BASE_URL}` · model `{MODEL_NAME}`"
	if LIVE_BACKEND else
	"Set the Space secrets `VLLM_BASE_URL` + `MODEL_NAME` to wire a real MI300X backend."
	)


	HEADER_MD = f"""
	# REPOMIND
	Open-source repo-scale coding agent on AMD MI300X.

	Ingest a git repository (up to 256K tokens, FP8) on a single GPU and
	reason across the whole codebase with multi-step tool use.

	> 📦 GitHub: <a href="https://github.com/SRKRZ23/repomind" target="_blank" rel="noopener noreferrer">SRKRZ23/repomind</a> · MIT
	> 🏆 Built for the <a href="https://lablab.ai/ai-hackathons/amd-developer" target="_blank" rel="noopener noreferrer">AMD Developer Hackathon 2026</a>
	> 🤗 HF Special Prize candidate · 🛡 Conservative claim discipline applied

	### Why AMD MI300X (verified 2026-05-05 on real hardware)

	- Qwen3-Coder-Next-FP8 weights = 77.29 GiB in VRAM (verified)
	- 256K KV cache @ FP8 = 94.58 GiB available (2,065,744 tokens, verified)
	- Activations + framework overhead → peak 176/191.7 GiB ≈ 92% utilization
	- NVIDIA H100 80 GB cannot accommodate this on a single card by VRAM
	accounting (~143 GB > 80 GB); MI300X 192 GB has the headroom

	### Status

	Backend right now: {BACKEND_LABEL}

	{BACKEND_HINT}
	"""


	# Minimal cap — HF Space CPU-basic gets 16 GB RAM. Don't blow it on giant repos.
	MAX_INGEST_SIZE_MB = 50
	SCRATCH_DIR = Path(tempfile.gettempdir()) / "repomind_hf"
	SCRATCH_DIR.mkdir(exist_ok=True)


	def ingest(url_or_path: str, chunk_tokens: int) -> str:
	if not url_or_path or not url_or_path.strip():
	return "Provide a GitHub URL or `owner/repo` shorthand."
	out = SCRATCH_DIR / "active.json"
	try:
	# Local path mode (rare on HF — usually URL)
	if Path(url_or_path).is_dir():
	repo_root = Path(url_or_path)
	label = repo_root.name
	else:
	res = clone(url_or_path, cache_dir=SCRATCH_DIR / "repos")
	repo_root = res.local_path
	label = res.url.rsplit("/", 1)[-1].removesuffix(".git")
	summary = ingest_to_json(
	repo_root,
	out,
	repo_label=label,
	max_tokens_per_chunk=chunk_tokens,
	)
	return json.dumps(summary, indent=2)
	except Exception as e:
	return f"❌ {type(e).__name__}: {e}"


	def _build_llm():
	"""Return an LLM client based on env-var configuration."""
	if LIVE_BACKEND:
	from serving.vllm_client import VLLMClient
	return VLLMClient(base_url=VLLM_BASE_URL, model=MODEL_NAME)
	from serving.mock_client import MockClient
	return MockClient(max_tool_turns=2)


	def ask(question: str):
	summary_path = SCRATCH_DIR / "active.json"
	if not summary_path.exists():
	return "Ingest a repo first.", ""
	if not question or not question.strip():
	return "Type a question.", ""

	summary = json.loads(summary_path.read_text())
	repo_root = Path(summary.get("root", "."))

	try:
	llm = _build_llm()
	except Exception as e:
	return f"❌ failed to init LLM client: {type(e).__name__}: {e}", ""

	from agent.loop import Agent
	from tools.registry import default_registry

	try:
	agent = Agent(
	llm=llm,
	tools=default_registry(repo_root, scratch_dir=SCRATCH_DIR / "scratch"),
	max_steps=4,
	)
	result = agent.run(question, summary)
	except Exception as e:
	return f"❌ agent failed: {type(e).__name__}: {e}", ""

	trace_lines = [
	f"- {tc['name']} {json.dumps(tc['arguments'], ensure_ascii=False)}"
	for tc in result.tool_calls
	]
	trace = "\n".join(trace_lines) or "(no tool calls)"
	return result.answer, trace


	with gr.Blocks(
	title="REPOMIND — repo-scale coding agent on AMD MI300X",
	) as demo:
	gr.Markdown(HEADER_MD)

	with gr.Tab("1. Ingest"):
	gr.Markdown(
	"Paste any GitHub URL or `owner/repo` shorthand. "
	"REPOMIND clones it, parses the source files, and chunks them "
	"into priority-ranked sections (README first, then top-level "
	"symbols, then nested code, then tests)."
	)
	with gr.Row():
	url = gr.Textbox(
	label="GitHub URL or owner/repo",
	placeholder="https://github.com/pallets/flask OR pallets/flask",
	scale=4,
	)
	chunk_tokens = gr.Slider(
	256, 4096, value=1024, step=128, label="Tokens / chunk", scale=1
	)
	ingest_btn = gr.Button("Ingest", variant="primary")
	ingest_out = gr.Code(label="Ingestion summary", language="json")
	ingest_btn.click(ingest, [url, chunk_tokens], ingest_out)

	gr.Markdown(
	"Examples that work on a single MI300X: "
	"`pallets/flask` (~408K tokens, fits in 256K window with priority chunking) · "
	"`pytorch/vision` (~1.3M tokens, trimmed to 180K of highest-priority "
	"content via the chunker) · this repo `SRKRZ23/repomind` (~68K tokens, fits whole)."
	)

	with gr.Tab("2. Ask"):
	gr.Markdown(
	f"Ask any question about the ingested repo. The agent runs an "
	f"SC-TIR loop (PLAN → CALL TOOL → OBSERVE → THINK → ANSWER) with "
	f"five tools: `read_file`, `grep_codebase`, `execute_code` "
	f"(sandboxed), `run_tests`, `git_log`.\n\n"
	f"Backend: {BACKEND_LABEL}"
	)
	question = gr.Textbox(
	label="Question",
	lines=3,
	placeholder=(
	"Where is the WSGI entry point? · "
	"What does the chunker prioritize? · "
	"Trace one slab allocation through the call graph."
	),
	)
	ask_btn = gr.Button("Ask", variant="primary")
	answer = gr.Markdown(label="Answer")
	tool_trace = gr.Code(label="Tool trace (agent steps)", language="markdown")

	ask_btn.click(ask, [question], [answer, tool_trace])

	with gr.Tab("3. Verified evidence"):
	gr.Markdown(
	"REPOMIND was stress-tested on a real AMD MI300X x1 droplet across "
	"two sessions (2026-05-05 / 2026-05-06, 124 min total, $4.12). "
	"Highlights:\n\n"
	"\| Test \| Result \|\n"
	"\|---\|---\|\n"
	"\| Memory peak \| 176/191.7 GiB (92%) \|\n"
	"\| `--max-model-len 262144` \| started clean \|\n"
	"\| Concurrency 8K / 16K / 32K / 64K @ N=31 \| 31/31 success at every context ✅ \|\n"
	"\| Concurrency 128K @ N=31 \| 25/31 (6 timeouts past 15 min) \|\n"
	"\| Long-context needle at 200K \| 3/3 pass (early/middle/late) \|\n"
	"\| End-to-end repo Q&A \| 9/9 correct across 3 repos \|\n"
	"\| Largest repo tested \| pytorch/vision (1.3M tokens) \|\n"
	"\| Tuning attempt: AITER backend \| regression — 137/144 cells broken under FP8 KV cache; default Triton stays production-safe \|\n"
	"\| Cost \| $1.99/hr cloud, $45.75/1M completion tokens \|\n\n"
	"Full evidence pack — JSON results, plots, raw model outputs — is at "
	'<a href="https://github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test" target="_blank" rel="noopener noreferrer">github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test</a>. '
	"Extended PHASE 1+2 narrative + AITER A/B in the "
	'<a href="https://github.com/SRKRZ23/repomind/tree/main/benchmarks/2026-05-05-mi300x-stress-test/extended" target="_blank" rel="noopener noreferrer">extended/SUMMARY.md</a>.'
	)

	gr.HTML(
	"""
	<hr/>
	<p><strong>Author:</strong> Sardor Razikov — Tashkent 🇺🇿</p>
	<p>
	<a href="https://github.com/SRKRZ23/repomind" target="_blank" rel="noopener noreferrer">GitHub</a> ·
	<a href="https://www.linkedin.com/in/sardor-razikov-569a5327b" target="_blank" rel="noopener noreferrer">LinkedIn</a> ·
	<a href="https://x.com/SardorRazi99093" target="_blank" rel="noopener noreferrer">X / Twitter</a> ·
	<a href="https://doi.org/10.5281/zenodo.19791329" target="_blank" rel="noopener noreferrer">Zenodo (ECB)</a>
	</p>
	<p>📧
	<a href="mailto:razikovsardor1@gmail.com">razikovsardor1@gmail.com</a> ·
	<a href="mailto:razikovs777@gmail.com">razikovs777@gmail.com</a>
	</p>
	<p><em>If the MI300X memory-architecture story resonates,
	<strong>a like on this Space helps with the Hugging Face Special Prize judging.</strong> 🤗</em></p>
	"""
	)


	if __name__ == "__main__":
	demo.launch(theme=gr.themes.Soft(primary_hue="red", secondary_hue="gray"))