Spaces:

lablab-ai-amd-developer-hackathon
/

SwarmAudit

Running

App Files Files Community

Pranoy Mukherjee commited on 3 days ago

Commit

1c77a4c

1 Parent(s): 7a376ec

Add prioritized report summaries

Browse files

Files changed (8) hide show

README.md +2 -0
app/agents/docs_agent.py +19 -13
app/agents/synthesizer_agent.py +61 -5
app/schemas.py +4 -0
app/services/report_formatter.py +6 -0
tests/test_docs_agent.py +20 -1
tests/test_security_report.py +3 -0
tests/test_synthesizer_agent.py +48 -0

README.md CHANGED Viewed

@@ -58,6 +58,8 @@ Each finding includes:
 - suggested fix
 - agent source
 ## Current Agents
 - Security Agent: flags hardcoded secrets, disabled TLS verification, and dynamic code execution.

 - suggested fix
 - agent source
+Reports preserve full finding totals while displaying a prioritized subset for readability. High-severity findings are shown first, repeated low-severity findings are summarized, and warnings explain when lower-priority findings are hidden from the demo report.
 ## Current Agents
 - Security Agent: flags hardcoded secrets, disabled TLS verification, and dynamic code execution.

app/agents/docs_agent.py CHANGED Viewed

@@ -86,7 +86,7 @@ class DocsAgent:
         return findings
     def _scan_python_docstrings(self, chunk: CodeChunk) -> list[Finding]:
-        findings: list[Finding] = []
         lines = chunk.content.splitlines()
         for index, line in enumerate(lines):
@@ -101,19 +101,25 @@ class DocsAgent:
                 continue
             line_number = chunk.line_start + index
-            findings.append(
-                self._finding(
-                    "Public Python symbol missing docstring",
-                    Severity.low,
-                    chunk,
-                    line_number,
-                    line_number,
-                    f"`{symbol_name}` is public but does not start with a docstring.",
-                    "Add a short docstring describing purpose, parameters, return value, or side effects.",
-                )
             )
-        return findings
     def _has_docstring(self, lines: list[str], definition_index: int) -> bool:
         for line in lines[definition_index + 1 : definition_index + 5]:

         return findings
     def _scan_python_docstrings(self, chunk: CodeChunk) -> list[Finding]:
+        missing_symbols: list[tuple[str, int]] = []
         lines = chunk.content.splitlines()
         for index, line in enumerate(lines):
                 continue
             line_number = chunk.line_start + index
+            missing_symbols.append((symbol_name, line_number))
+        if not missing_symbols:
+            return []
+        examples = ", ".join(f"`{name}` line {line}" for name, line in missing_symbols[:5])
+        extra_count = len(missing_symbols) - 5
+        extra_note = f" plus {extra_count} more" if extra_count > 0 else ""
+        return [
+            self._finding(
+                "Public Python symbols missing docstrings",
+                Severity.low,
+                chunk,
+                missing_symbols[0][1],
+                missing_symbols[-1][1],
+                f"{len(missing_symbols)} public symbols in this file section are missing docstrings: {examples}{extra_note}.",
+                "Add short docstrings to public functions/classes, starting with exported APIs and complex behavior.",
             )
+        ]
     def _has_docstring(self, lines: list[str], definition_index: int) -> bool:
         for line in lines[definition_index + 1 : definition_index + 5]:

app/agents/synthesizer_agent.py CHANGED Viewed

@@ -8,26 +8,41 @@ SEVERITY_ORDER = {
     Severity.low: 3,
 }
 class SynthesizerAgent:
     name = "Synthesizer Agent"
     async def synthesize(self, repo: RepoScanResult, outputs: list[AgentOutput]) -> AuditReport:
-        findings = self._dedupe([finding for output in outputs for finding in output.findings])
-        findings.sort(key=lambda finding: (SEVERITY_ORDER[finding.severity], finding.file_path, finding.line_start))
         summary = {severity: 0 for severity in Severity}
-        for finding in findings:
             summary[finding.severity] += 1
         return AuditReport(
             repo_url=repo.repo_url,
             scanned_file_count=len(repo.files),
             skipped_file_count=repo.skipped_files,
-            findings=findings,
             severity_summary=summary,
             agents_run=[output.agent_name for output in outputs] + [self.name],
-            warnings=repo.warnings,
         )
     def _dedupe(self, findings: list[Finding]) -> list[Finding]:
@@ -40,3 +55,44 @@ class SynthesizerAgent:
             seen.add(key)
             unique.append(finding)
         return unique

     Severity.low: 3,
 }
+MAX_DISPLAY_FINDINGS = 40
+MAX_DISPLAY_FINDINGS_BY_AGENT = {
+    "Security Agent": 20,
+    "Performance Agent": 12,
+    "Quality Agent": 10,
+    "Docs Agent": 8,
+}
 class SynthesizerAgent:
     name = "Synthesizer Agent"
     async def synthesize(self, repo: RepoScanResult, outputs: list[AgentOutput]) -> AuditReport:
+        all_findings = self._dedupe([finding for output in outputs for finding in output.findings])
+        all_findings.sort(key=self._sort_key)
         summary = {severity: 0 for severity in Severity}
+        for finding in all_findings:
             summary[finding.severity] += 1
+        agent_counts = {output.agent_name: len(output.findings) for output in outputs}
+        display_findings, hidden_count, warnings = self._select_display_findings(all_findings, agent_counts)
         return AuditReport(
             repo_url=repo.repo_url,
             scanned_file_count=len(repo.files),
             skipped_file_count=repo.skipped_files,
+            findings=display_findings,
             severity_summary=summary,
+            total_findings_count=len(all_findings),
+            displayed_findings_count=len(display_findings),
+            hidden_findings_count=hidden_count,
+            agent_finding_counts=agent_counts,
             agents_run=[output.agent_name for output in outputs] + [self.name],
+            warnings=repo.warnings + warnings,
         )
     def _dedupe(self, findings: list[Finding]) -> list[Finding]:
             seen.add(key)
             unique.append(finding)
         return unique
+    def _select_display_findings(
+        self,
+        findings: list[Finding],
+        agent_counts: dict[str, int],
+    ) -> tuple[list[Finding], int, list[str]]:
+        selected: list[Finding] = []
+        selected_by_agent = {agent_name: 0 for agent_name in agent_counts}
+        for finding in findings:
+            agent_limit = MAX_DISPLAY_FINDINGS_BY_AGENT.get(finding.agent_source, MAX_DISPLAY_FINDINGS)
+            if selected_by_agent.get(finding.agent_source, 0) >= agent_limit:
+                continue
+            if len(selected) >= MAX_DISPLAY_FINDINGS:
+                break
+            selected.append(finding)
+            selected_by_agent[finding.agent_source] = selected_by_agent.get(finding.agent_source, 0) + 1
+        hidden_count = max(0, len(findings) - len(selected))
+        warnings: list[str] = []
+        if hidden_count:
+            warnings.append(
+                f"Report display prioritized {len(selected)} of {len(findings)} findings; "
+                f"{hidden_count} lower-priority findings are hidden from the demo report."
+            )
+        for agent_name, total_count in agent_counts.items():
+            displayed_count = selected_by_agent.get(agent_name, 0)
+            hidden_for_agent = total_count - displayed_count
+            if hidden_for_agent > 0:
+                warnings.append(f"{agent_name}: displaying {displayed_count} of {total_count} findings.")
+        return selected, hidden_count, warnings
+    def _sort_key(self, finding: Finding) -> tuple[int, int, str, int]:
+        test_file_penalty = 1 if self._is_test_file(finding.file_path) and finding.severity != Severity.critical else 0
+        return (SEVERITY_ORDER[finding.severity], test_file_penalty, finding.file_path, finding.line_start)
+    def _is_test_file(self, file_path: str) -> bool:
+        normalized = file_path.lower().replace("\\", "/")
+        return "/test" in normalized or normalized.startswith("test") or "_test." in normalized

app/schemas.py CHANGED Viewed

@@ -63,6 +63,10 @@ class AuditReport(BaseModel):
     skipped_file_count: int
     findings: list[Finding]
     severity_summary: dict[Severity, int]
     generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     agents_run: list[str]
     warnings: list[str] = Field(default_factory=list)

     skipped_file_count: int
     findings: list[Finding]
     severity_summary: dict[Severity, int]
+    total_findings_count: int = 0
+    displayed_findings_count: int = 0
+    hidden_findings_count: int = 0
+    agent_finding_counts: dict[str, int] = Field(default_factory=dict)
     generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     agents_run: list[str]
     warnings: list[str] = Field(default_factory=list)

app/services/report_formatter.py CHANGED Viewed

@@ -8,6 +8,7 @@ def format_report_markdown(report: AuditReport) -> str:
         f"Repository: `{report.repo_url}`",
         f"Files scanned: `{report.scanned_file_count}`",
         f"Files skipped: `{report.skipped_file_count}`",
         "",
         "## Severity Summary",
         "",
@@ -16,6 +17,11 @@ def format_report_markdown(report: AuditReport) -> str:
     for severity in [Severity.critical, Severity.high, Severity.medium, Severity.low]:
         lines.append(f"- **{severity.value}**: {report.severity_summary.get(severity, 0)}")
     if report.warnings:
         lines.extend(["", "## Warnings", ""])
         lines.extend(f"- {warning}" for warning in report.warnings)

         f"Repository: `{report.repo_url}`",
         f"Files scanned: `{report.scanned_file_count}`",
         f"Files skipped: `{report.skipped_file_count}`",
+        f"Findings shown: `{report.displayed_findings_count}` of `{report.total_findings_count}`",
         "",
         "## Severity Summary",
         "",
     for severity in [Severity.critical, Severity.high, Severity.medium, Severity.low]:
         lines.append(f"- **{severity.value}**: {report.severity_summary.get(severity, 0)}")
+    if report.agent_finding_counts:
+        lines.extend(["", "## Agent Summary", ""])
+        for agent_name, count in report.agent_finding_counts.items():
+            lines.append(f"- **{agent_name}**: {count}")
     if report.warnings:
         lines.extend(["", "## Warnings", ""])
         lines.extend(f"- {warning}" for warning in report.warnings)

tests/test_docs_agent.py CHANGED Viewed

@@ -49,6 +49,25 @@ async def test_docs_agent_flags_public_python_symbol_without_docstring():
     output = await DocsAgent().analyze([chunk])
-    assert output.findings[0].title == "Public Python symbol missing docstring"
     assert output.findings[0].severity == Severity.low
     assert output.findings[0].line_start == 10

     output = await DocsAgent().analyze([chunk])
+    assert output.findings[0].title == "Public Python symbols missing docstrings"
     assert output.findings[0].severity == Severity.low
     assert output.findings[0].line_start == 10
+@pytest.mark.anyio
+async def test_docs_agent_summarizes_missing_docstrings_per_chunk():
+    chunk = CodeChunk(
+        file_path="service.py",
+        language="Python",
+        line_start=1,
+        line_end=4,
+        content="def first():\n    pass\n\ndef second():\n    pass",
+    )
+    output = await DocsAgent().analyze([chunk])
+    docstring_findings = [
+        finding for finding in output.findings if finding.title == "Public Python symbols missing docstrings"
+    ]
+    assert len(docstring_findings) == 1
+    assert "2 public symbols" in docstring_findings[0].description

tests/test_security_report.py CHANGED Viewed

@@ -26,3 +26,6 @@ async def test_security_agent_and_synthesizer_return_structured_report():
     assert report.findings[0].file_path == "app.py"
     assert report.findings[0].line_start == 10
     assert report.severity_summary[Severity.high] == 1

     assert report.findings[0].file_path == "app.py"
     assert report.findings[0].line_start == 10
     assert report.severity_summary[Severity.high] == 1
+    assert report.total_findings_count == 1
+    assert report.displayed_findings_count == 1
+    assert report.hidden_findings_count == 0

tests/test_synthesizer_agent.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import pytest
+from app.agents.synthesizer_agent import SynthesizerAgent
+from app.schemas import AgentOutput, Finding, RepoScanResult, Severity
+def make_finding(index: int, agent: str = "Docs Agent", severity: Severity = Severity.low) -> Finding:
+    return Finding(
+        title=f"Finding {index}",
+        severity=severity,
+        file_path=f"file_{index}.py",
+        line_start=1,
+        line_end=1,
+        description="Description",
+        why_it_matters="Why",
+        suggested_fix="Fix",
+        agent_source=agent,
+    )
+@pytest.mark.anyio
+async def test_synthesizer_preserves_totals_when_display_is_truncated():
+    output = AgentOutput(
+        agent_name="Docs Agent",
+        findings=[make_finding(index) for index in range(20)],
+    )
+    repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
+    report = await SynthesizerAgent().synthesize(repo, [output])
+    assert report.total_findings_count == 20
+    assert report.displayed_findings_count == 8
+    assert report.hidden_findings_count == 12
+    assert report.agent_finding_counts["Docs Agent"] == 20
+    assert any("displaying 8 of 20" in warning for warning in report.warnings)
+@pytest.mark.anyio
+async def test_synthesizer_keeps_high_severity_before_low_findings():
+    outputs = [
+        AgentOutput(agent_name="Docs Agent", findings=[make_finding(1, severity=Severity.low)]),
+        AgentOutput(agent_name="Security Agent", findings=[make_finding(2, "Security Agent", Severity.high)]),
+    ]
+    repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
+    report = await SynthesizerAgent().synthesize(repo, outputs)
+    assert report.findings[0].severity == Severity.high