SwarmAudit / app /agents /docs_agent.py
Pranoy Mukherjee
Add prioritized report summaries
1c77a4c
import re
from app.schemas import AgentOutput, CodeChunk, Finding, Severity
PYTHON_PUBLIC_DEF = re.compile(r"^(\s*)(async\s+def|def|class)\s+([A-Za-z][A-Za-z0-9_]*)")
README_SETUP_TERMS = ("install", "setup", "quick start", "usage", "run")
README_TEST_TERMS = ("test", "pytest", "unittest")
README_CONFIG_TERMS = ("config", "environment", ".env", "settings")
class DocsAgent:
name = "Docs Agent"
async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
findings: list[Finding] = []
readme_seen = False
for chunk in chunks:
if self._is_readme(chunk.file_path):
readme_seen = True
findings.extend(self._scan_readme(chunk))
elif chunk.language == "Python":
findings.extend(self._scan_python_docstrings(chunk))
if not readme_seen and chunks:
first_chunk = chunks[0]
findings.append(
self._finding(
"README not found in scanned files",
Severity.medium,
first_chunk,
first_chunk.line_start,
first_chunk.line_start,
"The crawler did not find a top-level README file in the scanned repository inputs.",
"Add a README with setup, usage, configuration, and test instructions.",
)
)
return AgentOutput(
agent_name=self.name,
findings=findings,
metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
)
def _scan_readme(self, chunk: CodeChunk) -> list[Finding]:
content = chunk.content.lower()
findings: list[Finding] = []
checks = [
(
any(term in content for term in README_SETUP_TERMS),
"README missing usage/setup guidance",
"A README should quickly tell visitors how to install and run the project.",
"Add a Quick Start or Usage section with the commands needed to run the app.",
),
(
any(term in content for term in README_TEST_TERMS),
"README missing test instructions",
"Developers need a reliable way to verify the project after cloning it.",
"Add a Tests section with the command used to run the test suite.",
),
(
any(term in content for term in README_CONFIG_TERMS),
"README missing configuration notes",
"Environment variables and model/provider settings are easy to misconfigure without documentation.",
"Document required environment variables and include an `.env.example` reference.",
),
]
for passed, title, description, suggested_fix in checks:
if passed:
continue
findings.append(
self._finding(
title,
Severity.low,
chunk,
chunk.line_start,
chunk.line_end,
description,
suggested_fix,
)
)
return findings
def _scan_python_docstrings(self, chunk: CodeChunk) -> list[Finding]:
missing_symbols: list[tuple[str, int]] = []
lines = chunk.content.splitlines()
for index, line in enumerate(lines):
match = PYTHON_PUBLIC_DEF.match(line)
if not match:
continue
symbol_name = match.group(3)
if symbol_name.startswith("_"):
continue
if self._has_docstring(lines, index):
continue
line_number = chunk.line_start + index
missing_symbols.append((symbol_name, line_number))
if not missing_symbols:
return []
examples = ", ".join(f"`{name}` line {line}" for name, line in missing_symbols[:5])
extra_count = len(missing_symbols) - 5
extra_note = f" plus {extra_count} more" if extra_count > 0 else ""
return [
self._finding(
"Public Python symbols missing docstrings",
Severity.low,
chunk,
missing_symbols[0][1],
missing_symbols[-1][1],
f"{len(missing_symbols)} public symbols in this file section are missing docstrings: {examples}{extra_note}.",
"Add short docstrings to public functions/classes, starting with exported APIs and complex behavior.",
)
]
def _has_docstring(self, lines: list[str], definition_index: int) -> bool:
for line in lines[definition_index + 1 : definition_index + 5]:
stripped = line.strip()
if not stripped:
continue
return stripped.startswith(('"""', "'''"))
return False
def _is_readme(self, file_path: str) -> bool:
return file_path.rsplit("/", 1)[-1].lower() in {"readme", "readme.md", "readme.rst", "readme.txt"}
def _finding(
self,
title: str,
severity: Severity,
chunk: CodeChunk,
line_start: int,
line_end: int,
description: str,
suggested_fix: str,
) -> Finding:
return Finding(
title=title,
severity=severity,
file_path=chunk.file_path,
line_start=line_start,
line_end=line_end,
description=description,
why_it_matters="Good documentation helps reviewers, users, and judges understand the project quickly.",
suggested_fix=suggested_fix,
agent_source=self.name,
)