Spaces:

lablab-ai-amd-developer-hackathon
/

SwarmAudit

Running

App Files Files Community

Pranoy Mukherjee commited on 3 days ago

Commit

3e631d0

1 Parent(s): a3ecd30

Add multi-agent audit MVP

Browse files

Files changed (5) hide show

README.md +8 -2
app/agents/graph.py +26 -13
app/agents/performance_agent.py +121 -2
tests/test_graph_progress.py +5 -2
tests/test_performance_agent.py +52 -0

README.md CHANGED Viewed

@@ -4,10 +4,10 @@ AI-powered multi-agent code auditing for GitHub repositories. Paste a public Git
 ## MVP
-SwarmAudit currently runs with a mock-first LLM interface so the demo is not blocked by ROCm, vLLM, or AMD MI300X setup. The first graph is:
 ```text
-GitHub URL -> Crawler -> Chunker -> Security Agent -> Synthesizer -> Report
 ```
 ## Quick Start
@@ -58,6 +58,12 @@ Each finding includes:
 - suggested fix
 - agent source
 ## Tests
 ```bash

 ## MVP
+SwarmAudit currently runs with a mock-first LLM interface so the demo is not blocked by ROCm, vLLM, or AMD MI300X setup. The current graph is:
 ```text
+GitHub URL -> Crawler -> Chunker -> [Security Agent + Performance Agent] -> Synthesizer -> Report
 ```
 ## Quick Start
 - suggested fix
 - agent source
+## Current Agents
+- Security Agent: flags hardcoded secrets, disabled TLS verification, and dynamic code execution.
+- Performance Agent: flags HTTP calls without timeouts, blocking sleep inside async functions, nested loops, file reads in loops, and synchronous Node.js filesystem calls.
+- Synthesizer Agent: deduplicates findings, sorts by severity, and builds the final report.
 ## Tests
 ```bash

app/agents/graph.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from collections.abc import AsyncIterator
-from typing import TypedDict
 from langgraph.graph import END, StateGraph
 from app.agents.security_agent import SecurityAgent
 from app.agents.synthesizer_agent import SynthesizerAgent
 from app.config import Settings, get_settings
@@ -17,8 +19,9 @@ class AuditState(TypedDict, total=False):
     repo: RepoScanResult
     chunks: list[CodeChunk]
     security_output: AgentOutput
     report: AuditReport
-    progress: list[str]
 class AuditGraph:
@@ -28,6 +31,7 @@ class AuditGraph:
         self.chunker = Chunker(self.settings)
         self.llm_client = LLMClient(self.settings)
         self.security_agent = SecurityAgent(self.llm_client)
         self.synthesizer = SynthesizerAgent()
         self.graph = self._build_graph()
@@ -36,11 +40,13 @@ class AuditGraph:
         graph.add_node("crawl", self._crawl)
         graph.add_node("chunk", self._chunk)
         graph.add_node("security", self._security)
         graph.add_node("synthesize", self._synthesize)
         graph.set_entry_point("crawl")
         graph.add_edge("crawl", "chunk")
         graph.add_edge("chunk", "security")
-        graph.add_edge("security", "synthesize")
         graph.add_edge("synthesize", END)
         return graph.compile()
@@ -63,8 +69,12 @@ class AuditGraph:
             security_output = await self.security_agent.analyze(chunks)
             yield f"Security Agent: found {len(security_output.findings)} findings."
             yield "Synthesizer Agent: ranking findings and formatting report..."
-            report = await self.synthesizer.synthesize(repo, [security_output])
             yield "Synthesizer Agent: final report generated."
             yield report
         finally:
@@ -72,21 +82,24 @@ class AuditGraph:
     async def _crawl(self, state: AuditState) -> AuditState:
         repo = self.crawler.clone_and_scan(state["repo_url"])
-        progress = state.get("progress", []) + [f"Crawler Agent: mapped {len(repo.files)} files."]
-        return {"repo": repo, "progress": progress}
     async def _chunk(self, state: AuditState) -> AuditState:
         chunks = self.chunker.chunk_files(state["repo"].files)
-        progress = state.get("progress", []) + [f"Chunker: created {len(chunks)} code chunks."]
-        return {"chunks": chunks, "progress": progress}
     async def _security(self, state: AuditState) -> AuditState:
         output = await self.security_agent.analyze(state["chunks"])
-        progress = state.get("progress", []) + [f"Security Agent: found {len(output.findings)} findings."]
-        return {"security_output": output, "progress": progress}
     async def _synthesize(self, state: AuditState) -> AuditState:
-        report = await self.synthesizer.synthesize(state["repo"], [state["security_output"]])
-        progress = state.get("progress", []) + ["Synthesizer Agent: final report generated."]
         self.crawler.cleanup(state["repo"])
-        return {"report": report, "progress": progress}

 from collections.abc import AsyncIterator
+from operator import add
+from typing import Annotated, TypedDict
 from langgraph.graph import END, StateGraph
+from app.agents.performance_agent import PerformanceAgent
 from app.agents.security_agent import SecurityAgent
 from app.agents.synthesizer_agent import SynthesizerAgent
 from app.config import Settings, get_settings
     repo: RepoScanResult
     chunks: list[CodeChunk]
     security_output: AgentOutput
+    performance_output: AgentOutput
     report: AuditReport
+    progress: Annotated[list[str], add]
 class AuditGraph:
         self.chunker = Chunker(self.settings)
         self.llm_client = LLMClient(self.settings)
         self.security_agent = SecurityAgent(self.llm_client)
+        self.performance_agent = PerformanceAgent()
         self.synthesizer = SynthesizerAgent()
         self.graph = self._build_graph()
         graph.add_node("crawl", self._crawl)
         graph.add_node("chunk", self._chunk)
         graph.add_node("security", self._security)
+        graph.add_node("performance", self._performance)
         graph.add_node("synthesize", self._synthesize)
         graph.set_entry_point("crawl")
         graph.add_edge("crawl", "chunk")
         graph.add_edge("chunk", "security")
+        graph.add_edge("chunk", "performance")
+        graph.add_edge(["security", "performance"], "synthesize")
         graph.add_edge("synthesize", END)
         return graph.compile()
             security_output = await self.security_agent.analyze(chunks)
             yield f"Security Agent: found {len(security_output.findings)} findings."
+            yield "Performance Agent: scanning for slow-path patterns..."
+            performance_output = await self.performance_agent.analyze(chunks)
+            yield f"Performance Agent: found {len(performance_output.findings)} findings."
             yield "Synthesizer Agent: ranking findings and formatting report..."
+            report = await self.synthesizer.synthesize(repo, [security_output, performance_output])
             yield "Synthesizer Agent: final report generated."
             yield report
         finally:
     async def _crawl(self, state: AuditState) -> AuditState:
         repo = self.crawler.clone_and_scan(state["repo_url"])
+        return {"repo": repo, "progress": [f"Crawler Agent: mapped {len(repo.files)} files."]}
     async def _chunk(self, state: AuditState) -> AuditState:
         chunks = self.chunker.chunk_files(state["repo"].files)
+        return {"chunks": chunks, "progress": [f"Chunker: created {len(chunks)} code chunks."]}
     async def _security(self, state: AuditState) -> AuditState:
         output = await self.security_agent.analyze(state["chunks"])
+        return {"security_output": output, "progress": [f"Security Agent: found {len(output.findings)} findings."]}
+    async def _performance(self, state: AuditState) -> AuditState:
+        output = await self.performance_agent.analyze(state["chunks"])
+        return {"performance_output": output, "progress": [f"Performance Agent: found {len(output.findings)} findings."]}
     async def _synthesize(self, state: AuditState) -> AuditState:
+        report = await self.synthesizer.synthesize(
+            state["repo"],
+            [state["security_output"], state["performance_output"]],
+        )
         self.crawler.cleanup(state["repo"])
+        return {"report": report, "progress": ["Synthesizer Agent: final report generated."]}

app/agents/performance_agent.py CHANGED Viewed

@@ -1,8 +1,127 @@
-from app.schemas import AgentOutput, CodeChunk
 class PerformanceAgent:
     name = "Performance Agent"
     async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
-        return AgentOutput(agent_name=self.name, findings=[], metadata={"chunks_scanned": len(chunks)})

+import re
+from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+REQUEST_WITHOUT_TIMEOUT = re.compile(r"\brequests\.(get|post|put|patch|delete)\s*\((?!.*\btimeout\s*=)")
+SYNC_FS_JS = re.compile(r"\b(readFileSync|writeFileSync|readdirSync|statSync)\s*\(")
+PYTHON_LOOP = re.compile(r"^(\s*)(for|while)\b")
+PYTHON_FILE_READ = re.compile(r"\b(open\s*\(|Path\s*\([^)]*\)\.read_(text|bytes)\s*\()")
 class PerformanceAgent:
     name = "Performance Agent"
     async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        findings: list[Finding] = []
+        for chunk in chunks:
+            findings.extend(self._scan_chunk(chunk))
+        return AgentOutput(
+            agent_name=self.name,
+            findings=findings,
+            metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
+        )
+    def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
+        findings: list[Finding] = []
+        lines = chunk.content.splitlines()
+        loop_stack: list[int] = []
+        async_indent_stack: list[int] = []
+        for offset, line in enumerate(lines):
+            actual_line = chunk.line_start + offset
+            stripped = line.strip()
+            indent = len(line) - len(line.lstrip(" "))
+            loop_stack = [loop_indent for loop_indent in loop_stack if indent > loop_indent]
+            async_indent_stack = [async_indent for async_indent in async_indent_stack if indent > async_indent]
+            if stripped.startswith("async def "):
+                async_indent_stack.append(indent)
+            loop_match = PYTHON_LOOP.match(line)
+            if loop_match:
+                if loop_stack:
+                    findings.append(
+                        self._finding(
+                            "Nested loop may become expensive",
+                            Severity.low,
+                            chunk,
+                            actual_line,
+                            "A loop nested inside another loop can turn small inputs into slow O(n^2) work.",
+                            "Consider indexing data with a dictionary/set, batching work, or documenting why nested iteration is bounded.",
+                        )
+                    )
+                loop_stack.append(len(loop_match.group(1)))
+            if REQUEST_WITHOUT_TIMEOUT.search(line):
+                findings.append(
+                    self._finding(
+                        "HTTP request without timeout",
+                        Severity.medium,
+                        chunk,
+                        actual_line,
+                        "Network calls without timeouts can hang workers and make the app appear frozen under bad network conditions.",
+                        "Pass an explicit timeout, for example requests.get(url, timeout=10).",
+                    )
+                )
+            if async_indent_stack and "time.sleep(" in line:
+                findings.append(
+                    self._finding(
+                        "Blocking sleep inside async function",
+                        Severity.medium,
+                        chunk,
+                        actual_line,
+                        "time.sleep blocks the event loop, delaying unrelated async work.",
+                        "Use await asyncio.sleep(...) inside async functions.",
+                    )
+                )
+            if loop_stack and PYTHON_FILE_READ.search(line):
+                findings.append(
+                    self._finding(
+                        "File read inside loop",
+                        Severity.low,
+                        chunk,
+                        actual_line,
+                        "Repeated disk reads inside loops can dominate runtime and slow audits on larger inputs.",
+                        "Read once before the loop, cache results, or stream data deliberately.",
+                    )
+                )
+            if SYNC_FS_JS.search(line):
+                findings.append(
+                    self._finding(
+                        "Synchronous filesystem call",
+                        Severity.low,
+                        chunk,
+                        actual_line,
+                        "Synchronous filesystem APIs block the Node.js event loop and can hurt request latency.",
+                        "Use async fs.promises APIs or move blocking work outside latency-sensitive paths.",
+                    )
+                )
+        return findings
+    def _finding(
+        self,
+        title: str,
+        severity: Severity,
+        chunk: CodeChunk,
+        line_number: int,
+        description: str,
+        suggested_fix: str,
+    ) -> Finding:
+        return Finding(
+            title=title,
+            severity=severity,
+            file_path=chunk.file_path,
+            line_start=line_number,
+            line_end=line_number,
+            description=description,
+            why_it_matters="Performance issues in hot paths can increase latency, resource usage, and demo analysis time.",
+            suggested_fix=suggested_fix,
+            agent_source=self.name,
+        )

tests/test_graph_progress.py CHANGED Viewed

@@ -10,7 +10,7 @@ from app.schemas import AuditReport
 @pytest.mark.anyio
 async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
     source = tmp_path / "app.py"
-    source.write_text("API_KEY = '1234567890abcdef'\n", encoding="utf-8")
     graph = AuditGraph(Settings(max_files=10, max_file_size_kb=10, max_chars_per_chunk=1000))
     graph.crawler.clone_and_scan = lambda repo_url: graph.crawler.scan_local_repo(repo_url, tmp_path)
@@ -22,5 +22,8 @@ async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
     assert any("Crawler Agent" in event for event in events if isinstance(event, str))
     assert any("Security Agent" in event for event in events if isinstance(event, str))
     assert isinstance(events[-1], AuditReport)
-    assert len(events[-1].findings) == 1

 @pytest.mark.anyio
 async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
     source = tmp_path / "app.py"
+    source.write_text("API_KEY = '1234567890abcdef'\nresponse = requests.get(url)\n", encoding="utf-8")
     graph = AuditGraph(Settings(max_files=10, max_file_size_kb=10, max_chars_per_chunk=1000))
     graph.crawler.clone_and_scan = lambda repo_url: graph.crawler.scan_local_repo(repo_url, tmp_path)
     assert any("Crawler Agent" in event for event in events if isinstance(event, str))
     assert any("Security Agent" in event for event in events if isinstance(event, str))
+    assert any("Performance Agent" in event for event in events if isinstance(event, str))
     assert isinstance(events[-1], AuditReport)
+    assert len(events[-1].findings) == 2
+    assert "Security Agent" in events[-1].agents_run
+    assert "Performance Agent" in events[-1].agents_run

tests/test_performance_agent.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import pytest
+from app.agents.performance_agent import PerformanceAgent
+from app.schemas import CodeChunk, Severity
+@pytest.mark.anyio
+async def test_performance_agent_flags_requests_without_timeout():
+    chunk = CodeChunk(
+        file_path="client.py",
+        language="Python",
+        line_start=1,
+        line_end=1,
+        content="response = requests.get(url)",
+    )
+    output = await PerformanceAgent().analyze([chunk])
+    assert output.findings[0].title == "HTTP request without timeout"
+    assert output.findings[0].severity == Severity.medium
+@pytest.mark.anyio
+async def test_performance_agent_flags_blocking_sleep_in_async_function():
+    chunk = CodeChunk(
+        file_path="worker.py",
+        language="Python",
+        line_start=20,
+        line_end=22,
+        content="async def run():\n    time.sleep(1)\n    return True",
+    )
+    output = await PerformanceAgent().analyze([chunk])
+    assert output.findings[0].title == "Blocking sleep inside async function"
+    assert output.findings[0].line_start == 21
+@pytest.mark.anyio
+async def test_performance_agent_flags_nested_loop():
+    chunk = CodeChunk(
+        file_path="search.py",
+        language="Python",
+        line_start=5,
+        line_end=7,
+        content="for user in users:\n    for order in orders:\n        match(user, order)",
+    )
+    output = await PerformanceAgent().analyze([chunk])
+    assert output.findings[0].title == "Nested loop may become expensive"
+    assert output.findings[0].line_start == 6