Spaces:

lablab-ai-amd-developer-hackathon
/

SwarmAudit

Running

App Files Files Community

Pranoy Mukherjee commited on 3 days ago

Commit

a3ecd30

1 Parent(s): 3c067f0

Add crawler, security agent, API, and Gradio MVP

Browse files

Files changed (28) hide show

.env.example +9 -0
.gitignore +4 -0
README.md +64 -1
app/__init__.py +1 -0
app/agents/__init__.py +1 -0
app/agents/docs_agent.py +8 -0
app/agents/graph.py +92 -0
app/agents/performance_agent.py +8 -0
app/agents/quality_agent.py +8 -0
app/agents/security_agent.py +77 -0
app/agents/synthesizer_agent.py +42 -0
app/config.py +22 -0
app/main.py +23 -0
app/schemas.py +73 -0
app/services/__init__.py +1 -0
app/services/chunker.py +58 -0
app/services/llm_client.py +41 -0
app/services/repo_crawler.py +139 -0
app/services/report_formatter.py +49 -0
app/ui/__init__.py +1 -0
app/ui/gradio_app.py +42 -0
pytest.ini +3 -0
requirements.txt +11 -0
tests/test_api.py +10 -0
tests/test_chunker.py +22 -0
tests/test_graph_progress.py +26 -0
tests/test_repo_crawler.py +28 -0
tests/test_security_report.py +28 -0

.env.example ADDED Viewed

	@@ -0,0 +1,9 @@

+APP_NAME=SwarmAudit
+LLM_PROVIDER=mock
+LLM_BASE_URL=http://localhost:8000/v1
+LLM_API_KEY=not-needed-for-mock
+LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
+MAX_FILES=200
+MAX_FILE_SIZE_KB=250
+MAX_CHARS_PER_CHUNK=12000
+CLONE_TIMEOUT_SECONDS=60

.gitignore CHANGED Viewed

@@ -206,6 +206,10 @@ tempCodeRunnerFile.py
 # Ruff stuff:
 .ruff_cache/
 # PyPI configuration file
 .pypirc

 # Ruff stuff:
 .ruff_cache/
+# SwarmAudit local test artifacts
+.tmp_pytest/
+pytest-cache-files-*
 # PyPI configuration file
 .pypirc

README.md CHANGED Viewed

@@ -1,2 +1,65 @@
 # SwarmAudit
-AI-powered multi agent code auditing for GitHub repositories. Detect vulnerabilities, performance bottlenecks, and architecture issues in minutes.

 # SwarmAudit
+AI-powered multi-agent code auditing for GitHub repositories. Paste a public GitHub URL and get a structured audit report with severity, file references, and suggested fixes.
+## MVP
+SwarmAudit currently runs with a mock-first LLM interface so the demo is not blocked by ROCm, vLLM, or AMD MI300X setup. The first graph is:
+```text
+GitHub URL -> Crawler -> Chunker -> Security Agent -> Synthesizer -> Report
+```
+## Quick Start
+```bash
+python -m venv .venv
+.venv\Scripts\activate
+pip install -r requirements.txt
+```
+Run the FastAPI backend:
+```bash
+uvicorn app.main:app --reload
+```
+Health check:
+```bash
+curl http://127.0.0.1:8000/health
+```
+Run the Gradio demo:
+```bash
+python -m app.ui.gradio_app
+```
+## Configuration
+Copy `.env.example` to `.env` for local overrides. Default inference mode is:
+```text
+LLM_PROVIDER=mock
+```
+Later, set `LLM_PROVIDER=vllm` and point `LLM_BASE_URL` at an OpenAI-compatible vLLM endpoint running Qwen2.5-Coder.
+## Report Schema
+Each finding includes:
+- title
+- severity: CRITICAL, HIGH, MEDIUM, LOW
+- file path and line range
+- description
+- why it matters
+- suggested fix
+- agent source
+## Tests
+```bash
+pytest
+```

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """SwarmAudit application package."""

app/agents/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Agent implementations for SwarmAudit."""

app/agents/docs_agent.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from app.schemas import AgentOutput, CodeChunk
+class DocsAgent:
+    name = "Docs Agent"
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        return AgentOutput(agent_name=self.name, findings=[], metadata={"chunks_scanned": len(chunks)})

app/agents/graph.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from collections.abc import AsyncIterator
+from typing import TypedDict
+from langgraph.graph import END, StateGraph
+from app.agents.security_agent import SecurityAgent
+from app.agents.synthesizer_agent import SynthesizerAgent
+from app.config import Settings, get_settings
+from app.schemas import AgentOutput, AuditReport, CodeChunk, RepoScanResult
+from app.services.chunker import Chunker
+from app.services.llm_client import LLMClient
+from app.services.repo_crawler import RepoCrawler
+class AuditState(TypedDict, total=False):
+    repo_url: str
+    repo: RepoScanResult
+    chunks: list[CodeChunk]
+    security_output: AgentOutput
+    report: AuditReport
+    progress: list[str]
+class AuditGraph:
+    def __init__(self, settings: Settings | None = None):
+        self.settings = settings or get_settings()
+        self.crawler = RepoCrawler(self.settings)
+        self.chunker = Chunker(self.settings)
+        self.llm_client = LLMClient(self.settings)
+        self.security_agent = SecurityAgent(self.llm_client)
+        self.synthesizer = SynthesizerAgent()
+        self.graph = self._build_graph()
+    def _build_graph(self):
+        graph = StateGraph(AuditState)
+        graph.add_node("crawl", self._crawl)
+        graph.add_node("chunk", self._chunk)
+        graph.add_node("security", self._security)
+        graph.add_node("synthesize", self._synthesize)
+        graph.set_entry_point("crawl")
+        graph.add_edge("crawl", "chunk")
+        graph.add_edge("chunk", "security")
+        graph.add_edge("security", "synthesize")
+        graph.add_edge("synthesize", END)
+        return graph.compile()
+    async def run(self, repo_url: str) -> AuditReport:
+        result = await self.graph.ainvoke({"repo_url": repo_url, "progress": []})
+        return result["report"]
+    async def run_with_progress(self, repo_url: str) -> AsyncIterator[str | AuditReport]:
+        repo: RepoScanResult | None = None
+        yield "Crawler Agent: cloning and mapping repository..."
+        repo = self.crawler.clone_and_scan(repo_url)
+        yield f"Crawler Agent: mapped {len(repo.files)} files and skipped {repo.skipped_files}."
+        try:
+            yield "Chunker: filtering source files and creating chunks..."
+            chunks = self.chunker.chunk_files(repo.files)
+            yield f"Chunker: created {len(chunks)} code chunks."
+            yield "Security Agent: scanning for risky patterns..."
+            security_output = await self.security_agent.analyze(chunks)
+            yield f"Security Agent: found {len(security_output.findings)} findings."
+            yield "Synthesizer Agent: ranking findings and formatting report..."
+            report = await self.synthesizer.synthesize(repo, [security_output])
+            yield "Synthesizer Agent: final report generated."
+            yield report
+        finally:
+            self.crawler.cleanup(repo)
+    async def _crawl(self, state: AuditState) -> AuditState:
+        repo = self.crawler.clone_and_scan(state["repo_url"])
+        progress = state.get("progress", []) + [f"Crawler Agent: mapped {len(repo.files)} files."]
+        return {"repo": repo, "progress": progress}
+    async def _chunk(self, state: AuditState) -> AuditState:
+        chunks = self.chunker.chunk_files(state["repo"].files)
+        progress = state.get("progress", []) + [f"Chunker: created {len(chunks)} code chunks."]
+        return {"chunks": chunks, "progress": progress}
+    async def _security(self, state: AuditState) -> AuditState:
+        output = await self.security_agent.analyze(state["chunks"])
+        progress = state.get("progress", []) + [f"Security Agent: found {len(output.findings)} findings."]
+        return {"security_output": output, "progress": progress}
+    async def _synthesize(self, state: AuditState) -> AuditState:
+        report = await self.synthesizer.synthesize(state["repo"], [state["security_output"]])
+        progress = state.get("progress", []) + ["Synthesizer Agent: final report generated."]
+        self.crawler.cleanup(state["repo"])
+        return {"report": report, "progress": progress}

app/agents/performance_agent.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from app.schemas import AgentOutput, CodeChunk
+class PerformanceAgent:
+    name = "Performance Agent"
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        return AgentOutput(agent_name=self.name, findings=[], metadata={"chunks_scanned": len(chunks)})

app/agents/quality_agent.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from app.schemas import AgentOutput, CodeChunk
+class QualityAgent:
+    name = "Quality Agent"
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        return AgentOutput(agent_name=self.name, findings=[], metadata={"chunks_scanned": len(chunks)})

app/agents/security_agent.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import re
+from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+from app.services.llm_client import LLMClient
+SECURITY_PATTERNS = [
+    (
+        re.compile(r"(?i)(api[_-]?key|secret|token|password)\s*=\s*['\"][^'\"]{8,}['\"]"),
+        "Potential hardcoded secret",
+        Severity.high,
+        "A credential-like value appears to be hardcoded.",
+        "Move secrets into environment variables or a managed secret store.",
+    ),
+    (
+        re.compile(r"(?i)verify\s*=\s*False"),
+        "TLS certificate verification disabled",
+        Severity.high,
+        "Disabling TLS verification can allow man-in-the-middle attacks.",
+        "Remove verify=False and use a trusted CA bundle if needed.",
+    ),
+    (
+        re.compile(r"(?i)(eval|exec)\s*\("),
+        "Dynamic code execution",
+        Severity.medium,
+        "Dynamic execution can turn untrusted input into arbitrary code execution.",
+        "Replace eval/exec with explicit parsing or a constrained command map.",
+    ),
+]
+class SecurityAgent:
+    name = "Security Agent"
+    def __init__(self, llm_client: LLMClient):
+        self.llm_client = llm_client
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        findings: list[Finding] = []
+        for chunk in chunks:
+            findings.extend(self._scan_chunk(chunk))
+        await self.llm_client.complete_json(
+            "You are a security code review agent. Return JSON findings only.",
+            f"Review {len(chunks)} chunks for security issues.",
+        )
+        return AgentOutput(
+            agent_name=self.name,
+            findings=findings,
+            metadata={"chunks_scanned": len(chunks), "mode": "static-rules-plus-llm-interface"},
+        )
+    def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
+        findings: list[Finding] = []
+        lines = chunk.content.splitlines()
+        for offset, line in enumerate(lines):
+            actual_line = chunk.line_start + offset
+            for pattern, title, severity, description, fix in SECURITY_PATTERNS:
+                if pattern.search(line):
+                    findings.append(
+                        Finding(
+                            title=title,
+                            severity=severity,
+                            file_path=chunk.file_path,
+                            line_start=actual_line,
+                            line_end=actual_line,
+                            description=description,
+                            why_it_matters="Attackers often search repos for exposed credentials and unsafe execution paths.",
+                            suggested_fix=fix,
+                            agent_source=self.name,
+                        )
+                    )
+        return findings

app/agents/synthesizer_agent.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from app.schemas import AgentOutput, AuditReport, Finding, RepoScanResult, Severity
+SEVERITY_ORDER = {
+    Severity.critical: 0,
+    Severity.high: 1,
+    Severity.medium: 2,
+    Severity.low: 3,
+}
+class SynthesizerAgent:
+    name = "Synthesizer Agent"
+    async def synthesize(self, repo: RepoScanResult, outputs: list[AgentOutput]) -> AuditReport:
+        findings = self._dedupe([finding for output in outputs for finding in output.findings])
+        findings.sort(key=lambda finding: (SEVERITY_ORDER[finding.severity], finding.file_path, finding.line_start))
+        summary = {severity: 0 for severity in Severity}
+        for finding in findings:
+            summary[finding.severity] += 1
+        return AuditReport(
+            repo_url=repo.repo_url,
+            scanned_file_count=len(repo.files),
+            skipped_file_count=repo.skipped_files,
+            findings=findings,
+            severity_summary=summary,
+            agents_run=[output.agent_name for output in outputs] + [self.name],
+            warnings=repo.warnings,
+        )
+    def _dedupe(self, findings: list[Finding]) -> list[Finding]:
+        seen: set[tuple[str, int, str, str]] = set()
+        unique: list[Finding] = []
+        for finding in findings:
+            key = (finding.file_path, finding.line_start, finding.title, finding.agent_source)
+            if key in seen:
+                continue
+            seen.add(key)
+            unique.append(finding)
+        return unique

app/config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from functools import lru_cache
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    app_name: str = "SwarmAudit"
+    llm_provider: str = "mock"
+    llm_base_url: str = "http://localhost:8000/v1"
+    llm_api_key: str = "not-needed-for-mock"
+    llm_model: str = "Qwen/Qwen2.5-Coder-32B-Instruct"
+    max_files: int = 200
+    max_file_size_kb: int = 250
+    max_chars_per_chunk: int = 12000
+    clone_timeout_seconds: int = 60
+    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
+@lru_cache
+def get_settings() -> Settings:
+    return Settings()

app/main.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from fastapi import FastAPI, HTTPException
+from app.agents.graph import AuditGraph
+from app.config import get_settings
+from app.schemas import AuditReport, AuditRequest
+app = FastAPI(title="SwarmAudit", version="0.1.0")
+@app.get("/health")
+async def health() -> dict[str, str]:
+    return {"status": "ok", "app": get_settings().app_name}
+@app.post("/audit", response_model=AuditReport)
+async def audit(request: AuditRequest) -> AuditReport:
+    try:
+        graph = AuditGraph()
+        return await graph.run(str(request.repo_url))
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=f"Audit failed: {exc}") from exc

app/schemas.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from datetime import datetime, timezone
+from enum import Enum
+from typing import Any
+from pydantic import BaseModel, Field, HttpUrl
+class Severity(str, Enum):
+    critical = "CRITICAL"
+    high = "HIGH"
+    medium = "MEDIUM"
+    low = "LOW"
+class AuditRequest(BaseModel):
+    repo_url: HttpUrl
+class SourceFile(BaseModel):
+    path: str
+    absolute_path: str
+    size_bytes: int
+    language: str | None = None
+class CodeChunk(BaseModel):
+    file_path: str
+    language: str | None = None
+    line_start: int
+    line_end: int
+    content: str
+class Finding(BaseModel):
+    title: str
+    severity: Severity
+    file_path: str
+    line_start: int = Field(ge=1)
+    line_end: int = Field(ge=1)
+    description: str
+    why_it_matters: str
+    suggested_fix: str
+    agent_source: str
+class AgentOutput(BaseModel):
+    agent_name: str
+    findings: list[Finding] = Field(default_factory=list)
+    metadata: dict[str, Any] = Field(default_factory=dict)
+class RepoScanResult(BaseModel):
+    repo_url: str
+    local_path: str
+    files: list[SourceFile]
+    skipped_files: int = 0
+    warnings: list[str] = Field(default_factory=list)
+class AuditReport(BaseModel):
+    repo_url: str
+    scanned_file_count: int
+    skipped_file_count: int
+    findings: list[Finding]
+    severity_summary: dict[Severity, int]
+    generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    agents_run: list[str]
+    warnings: list[str] = Field(default_factory=list)
+class AuditProgress(BaseModel):
+    message: str
+    stage: str

app/services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Service layer for crawling, chunking, LLM access, and formatting."""

app/services/chunker.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from pathlib import Path
+from app.config import Settings
+from app.schemas import CodeChunk, SourceFile
+class Chunker:
+    def __init__(self, settings: Settings):
+        self.settings = settings
+    def chunk_files(self, files: list[SourceFile]) -> list[CodeChunk]:
+        chunks: list[CodeChunk] = []
+        for source_file in files:
+            chunks.extend(self.chunk_file(source_file))
+        return chunks
+    def chunk_file(self, source_file: SourceFile) -> list[CodeChunk]:
+        text = Path(source_file.absolute_path).read_text(encoding="utf-8", errors="ignore")
+        lines = text.splitlines()
+        if not lines:
+            return []
+        chunks: list[CodeChunk] = []
+        current_lines: list[str] = []
+        current_start = 1
+        current_chars = 0
+        for index, line in enumerate(lines, start=1):
+            line_chars = len(line) + 1
+            if current_lines and current_chars + line_chars > self.settings.max_chars_per_chunk:
+                chunks.append(
+                    CodeChunk(
+                        file_path=source_file.path,
+                        language=source_file.language,
+                        line_start=current_start,
+                        line_end=index - 1,
+                        content="\n".join(current_lines),
+                    )
+                )
+                current_lines = []
+                current_start = index
+                current_chars = 0
+            current_lines.append(line)
+            current_chars += line_chars
+        if current_lines:
+            chunks.append(
+                CodeChunk(
+                    file_path=source_file.path,
+                    language=source_file.language,
+                    line_start=current_start,
+                    line_end=len(lines),
+                    content="\n".join(current_lines),
+                )
+            )
+        return chunks

app/services/llm_client.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import json
+from typing import Any
+import httpx
+from app.config import Settings
+class LLMClient:
+    def __init__(self, settings: Settings):
+        self.settings = settings
+    async def complete_json(self, system_prompt: str, user_prompt: str) -> dict[str, Any]:
+        if self.settings.llm_provider == "mock":
+            return {
+                "findings": [],
+                "note": "Mock LLM is active; static rules produced the demo findings.",
+            }
+        if self.settings.llm_provider != "vllm":
+            raise ValueError(f"Unsupported LLM_PROVIDER={self.settings.llm_provider}")
+        payload = {
+            "model": self.settings.llm_model,
+            "messages": [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ],
+            "temperature": 0.1,
+            "response_format": {"type": "json_object"},
+        }
+        headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
+        async with httpx.AsyncClient(timeout=120) as client:
+            response = await client.post(
+                f"{self.settings.llm_base_url.rstrip('/')}/chat/completions",
+                json=payload,
+                headers=headers,
+            )
+            response.raise_for_status()
+        content = response.json()["choices"][0]["message"]["content"]
+        return json.loads(content)

app/services/repo_crawler.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import shutil
+import tempfile
+from pathlib import Path
+from urllib.parse import urlparse
+from git import Repo
+from app.config import Settings
+from app.schemas import RepoScanResult, SourceFile
+IGNORED_DIRS = {
+    ".git",
+    "node_modules",
+    "dist",
+    "build",
+    ".venv",
+    "__pycache__",
+    "vendor",
+    "target",
+    ".next",
+}
+SUPPORTED_EXTENSIONS = {
+    ".py": "Python",
+    ".js": "JavaScript",
+    ".ts": "TypeScript",
+    ".tsx": "TypeScript React",
+    ".jsx": "JavaScript React",
+    ".java": "Java",
+    ".go": "Go",
+    ".rs": "Rust",
+    ".cpp": "C++",
+    ".c": "C",
+    ".cs": "C#",
+    ".php": "PHP",
+    ".rb": "Ruby",
+}
+def validate_github_url(repo_url: str) -> str:
+    parsed = urlparse(repo_url)
+    if parsed.scheme not in {"https", "http"}:
+        raise ValueError("Only HTTP(S) GitHub URLs are supported.")
+    if parsed.netloc.lower() != "github.com":
+        raise ValueError("Only public github.com repository URLs are supported.")
+    parts = [part for part in parsed.path.split("/") if part]
+    if len(parts) < 2:
+        raise ValueError("GitHub URL must include owner and repo name.")
+    owner, repo = parts[0], parts[1].removesuffix(".git")
+    return f"https://github.com/{owner}/{repo}.git"
+class RepoCrawler:
+    def __init__(self, settings: Settings):
+        self.settings = settings
+    def clone_and_scan(self, repo_url: str) -> RepoScanResult:
+        clone_url = validate_github_url(repo_url)
+        temp_root = Path(tempfile.mkdtemp(prefix="swarm_audit_"))
+        repo_path = temp_root / "repo"
+        try:
+            Repo.clone_from(
+                clone_url,
+                repo_path,
+                depth=1,
+                single_branch=True,
+                kill_after_timeout=self.settings.clone_timeout_seconds,
+                env={"GIT_TERMINAL_PROMPT": "0"},
+                multi_options=["--filter=blob:none"],
+            )
+            return self.scan_local_repo(repo_url=repo_url, repo_path=repo_path)
+        except Exception:
+            shutil.rmtree(temp_root, ignore_errors=True)
+            raise
+    def scan_local_repo(self, repo_url: str, repo_path: Path) -> RepoScanResult:
+        files: list[SourceFile] = []
+        skipped = 0
+        warnings: list[str] = []
+        max_bytes = self.settings.max_file_size_kb * 1024
+        for path in repo_path.rglob("*"):
+            if not path.is_file():
+                continue
+            rel_path = path.relative_to(repo_path)
+            if any(part in IGNORED_DIRS for part in rel_path.parts):
+                skipped += 1
+                continue
+            if path.suffix.lower() not in SUPPORTED_EXTENSIONS:
+                skipped += 1
+                continue
+            size = path.stat().st_size
+            if size > max_bytes:
+                skipped += 1
+                warnings.append(f"Skipped large file: {rel_path}")
+                continue
+            if len(files) >= self.settings.max_files:
+                skipped += 1
+                continue
+            files.append(
+                SourceFile(
+                    path=str(rel_path).replace("\\", "/"),
+                    absolute_path=str(path),
+                    size_bytes=size,
+                    language=SUPPORTED_EXTENSIONS[path.suffix.lower()],
+                )
+            )
+        if len(files) >= self.settings.max_files:
+            warnings.append(f"Repo hit MAX_FILES={self.settings.max_files}; remaining files were skipped.")
+        return RepoScanResult(
+            repo_url=repo_url,
+            local_path=str(repo_path),
+            files=files,
+            skipped_files=skipped,
+            warnings=warnings,
+        )
+    def cleanup(self, scan_result: RepoScanResult | None) -> None:
+        if scan_result is None:
+            return
+        repo_path = Path(scan_result.local_path)
+        temp_root = repo_path.parent
+        temp_dir = Path(tempfile.gettempdir()).resolve()
+        try:
+            resolved_temp_root = temp_root.resolve()
+        except FileNotFoundError:
+            return
+        if temp_dir in resolved_temp_root.parents and temp_root.name.startswith("swarm_audit_"):
+            shutil.rmtree(temp_root, ignore_errors=True)

app/services/report_formatter.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from app.schemas import AuditReport, Severity
+def format_report_markdown(report: AuditReport) -> str:
+    lines = [
+        "# SwarmAudit Report",
+        "",
+        f"Repository: `{report.repo_url}`",
+        f"Files scanned: `{report.scanned_file_count}`",
+        f"Files skipped: `{report.skipped_file_count}`",
+        "",
+        "## Severity Summary",
+        "",
+    ]
+    for severity in [Severity.critical, Severity.high, Severity.medium, Severity.low]:
+        lines.append(f"- **{severity.value}**: {report.severity_summary.get(severity, 0)}")
+    if report.warnings:
+        lines.extend(["", "## Warnings", ""])
+        lines.extend(f"- {warning}" for warning in report.warnings)
+    lines.extend(["", "## Findings", ""])
+    if not report.findings:
+        lines.append("No findings detected by the current MVP agents.")
+        return "\n".join(lines)
+    for finding in report.findings:
+        lines.extend(
+            [
+                f"### [{finding.severity.value}] {finding.title}",
+                "",
+                f"- File: `{finding.file_path}:{finding.line_start}-{finding.line_end}`",
+                f"- Agent: `{finding.agent_source}`",
+                "",
+                finding.description,
+                "",
+                f"**Why it matters:** {finding.why_it_matters}",
+                "",
+                "**Suggested fix:**",
+                "",
+                "```text",
+                finding.suggested_fix,
+                "```",
+                "",
+            ]
+        )
+    return "\n".join(lines)

app/ui/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Gradio UI for SwarmAudit."""

app/ui/gradio_app.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import gradio as gr
+from app.agents.graph import AuditGraph
+from app.schemas import AuditReport
+from app.services.report_formatter import format_report_markdown
+async def analyze_repo(repo_url: str):
+    if not repo_url.strip():
+        yield "Paste a public GitHub repository URL to start."
+        return
+    transcript: list[str] = []
+    try:
+        async for event in AuditGraph().run_with_progress(repo_url.strip()):
+            if isinstance(event, AuditReport):
+                transcript.append("")
+                transcript.append(format_report_markdown(event))
+            else:
+                transcript.append(event)
+            yield "\n".join(transcript)
+    except Exception as exc:
+        transcript.append(f"Audit failed: {exc}")
+        yield "\n".join(transcript)
+def build_app() -> gr.Blocks:
+    with gr.Blocks(title="SwarmAudit") as demo:
+        gr.Markdown("# SwarmAudit")
+        gr.Markdown("Paste any public GitHub URL. Get a structured AI code review in minutes.")
+        repo_url = gr.Textbox(
+            label="GitHub Repository URL",
+            placeholder="https://github.com/owner/repo",
+        )
+        analyze = gr.Button("Analyze")
+        output = gr.Markdown(label="Audit Report")
+        analyze.click(analyze_repo, inputs=repo_url, outputs=output)
+    return demo
+if __name__ == "__main__":
+    build_app().queue().launch()

pytest.ini ADDED Viewed

	@@ -0,0 +1,3 @@

+[pytest]
+testpaths = tests
+norecursedirs = .git .venv __pycache__ .pytest_cache .tmp_pytest pytest-cache-files-*

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi
+uvicorn[standard]
+gradio
+gitpython
+pydantic
+pydantic-settings
+langgraph
+langchain-core
+httpx
+python-dotenv
+pytest

tests/test_api.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from fastapi.testclient import TestClient
+from app.main import app
+def test_health_endpoint():
+    response = TestClient(app).get("/health")
+    assert response.status_code == 200
+    assert response.json() == {"status": "ok", "app": "SwarmAudit"}

tests/test_chunker.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from pathlib import Path
+from app.config import Settings
+from app.schemas import SourceFile
+from app.services.chunker import Chunker
+def test_chunker_preserves_line_ranges(tmp_path: Path):
+    source = tmp_path / "demo.py"
+    source.write_text("a = 1\nb = 2\nc = 3\n", encoding="utf-8")
+    source_file = SourceFile(
+        path="demo.py",
+        absolute_path=str(source),
+        size_bytes=source.stat().st_size,
+        language="Python",
+    )
+    chunks = Chunker(Settings(max_chars_per_chunk=8)).chunk_file(source_file)
+    assert len(chunks) > 1
+    assert chunks[0].line_start == 1
+    assert chunks[-1].line_end == 3

tests/test_graph_progress.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from pathlib import Path
+import pytest
+from app.agents.graph import AuditGraph
+from app.config import Settings
+from app.schemas import AuditReport
+@pytest.mark.anyio
+async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
+    source = tmp_path / "app.py"
+    source.write_text("API_KEY = '1234567890abcdef'\n", encoding="utf-8")
+    graph = AuditGraph(Settings(max_files=10, max_file_size_kb=10, max_chars_per_chunk=1000))
+    graph.crawler.clone_and_scan = lambda repo_url: graph.crawler.scan_local_repo(repo_url, tmp_path)
+    graph.crawler.cleanup = lambda scan_result: None
+    events = []
+    async for event in graph.run_with_progress("https://github.com/example/project"):
+        events.append(event)
+    assert any("Crawler Agent" in event for event in events if isinstance(event, str))
+    assert any("Security Agent" in event for event in events if isinstance(event, str))
+    assert isinstance(events[-1], AuditReport)
+    assert len(events[-1].findings) == 1

tests/test_repo_crawler.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from pathlib import Path
+import pytest
+from app.config import Settings
+from app.services.repo_crawler import RepoCrawler, validate_github_url
+def test_validate_github_url_normalizes_clone_url():
+    assert validate_github_url("https://github.com/example/project") == "https://github.com/example/project.git"
+def test_validate_github_url_rejects_non_github():
+    with pytest.raises(ValueError):
+        validate_github_url("https://gitlab.com/example/project")
+def test_scan_local_repo_filters_supported_files(tmp_path: Path):
+    (tmp_path / "node_modules").mkdir()
+    (tmp_path / "node_modules" / "ignored.js").write_text("x", encoding="utf-8")
+    (tmp_path / "app.py").write_text("API_KEY = '1234567890'\n", encoding="utf-8")
+    (tmp_path / "notes.txt").write_text("hello", encoding="utf-8")
+    crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1))
+    result = crawler.scan_local_repo("https://github.com/example/project", tmp_path)
+    assert [file.path for file in result.files] == ["app.py"]
+    assert result.skipped_files == 2

tests/test_security_report.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import pytest
+from app.agents.security_agent import SecurityAgent
+from app.agents.synthesizer_agent import SynthesizerAgent
+from app.config import Settings
+from app.schemas import CodeChunk, RepoScanResult, Severity
+from app.services.llm_client import LLMClient
+@pytest.mark.anyio
+async def test_security_agent_and_synthesizer_return_structured_report():
+    chunk = CodeChunk(
+        file_path="app.py",
+        language="Python",
+        line_start=10,
+        line_end=10,
+        content="API_KEY = '1234567890abcdef'",
+    )
+    output = await SecurityAgent(LLMClient(Settings())).analyze([chunk])
+    repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
+    report = await SynthesizerAgent().synthesize(repo, [output])
+    assert len(report.findings) == 1
+    assert report.findings[0].severity == Severity.high
+    assert report.findings[0].file_path == "app.py"
+    assert report.findings[0].line_start == 10
+    assert report.severity_summary[Severity.high] == 1