Spaces:

lablab-ai-amd-developer-hackathon
/

SwarmAudit

Running

Pranoy Mukherjee

Add prioritized report summaries

1c77a4c 2 days ago

5.78 kB

	import re

	from app.schemas import AgentOutput, CodeChunk, Finding, Severity


	PYTHON_PUBLIC_DEF = re.compile(r"^(\s)(async\s+def\|def\|class)\s+([A-Za-z][A-Za-z0-9_])")
	README_SETUP_TERMS = ("install", "setup", "quick start", "usage", "run")
	README_TEST_TERMS = ("test", "pytest", "unittest")
	README_CONFIG_TERMS = ("config", "environment", ".env", "settings")


	class DocsAgent:
	name = "Docs Agent"

	async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
	findings: list[Finding] = []
	readme_seen = False

	for chunk in chunks:
	if self._is_readme(chunk.file_path):
	readme_seen = True
	findings.extend(self._scan_readme(chunk))
	elif chunk.language == "Python":
	findings.extend(self._scan_python_docstrings(chunk))

	if not readme_seen and chunks:
	first_chunk = chunks[0]
	findings.append(
	self._finding(
	"README not found in scanned files",
	Severity.medium,
	first_chunk,
	first_chunk.line_start,
	first_chunk.line_start,
	"The crawler did not find a top-level README file in the scanned repository inputs.",
	"Add a README with setup, usage, configuration, and test instructions.",
	)
	)

	return AgentOutput(
	agent_name=self.name,
	findings=findings,
	metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
	)

	def _scan_readme(self, chunk: CodeChunk) -> list[Finding]:
	content = chunk.content.lower()
	findings: list[Finding] = []

	checks = [
	(
	any(term in content for term in README_SETUP_TERMS),
	"README missing usage/setup guidance",
	"A README should quickly tell visitors how to install and run the project.",
	"Add a Quick Start or Usage section with the commands needed to run the app.",
	),
	(
	any(term in content for term in README_TEST_TERMS),
	"README missing test instructions",
	"Developers need a reliable way to verify the project after cloning it.",
	"Add a Tests section with the command used to run the test suite.",
	),
	(
	any(term in content for term in README_CONFIG_TERMS),
	"README missing configuration notes",
	"Environment variables and model/provider settings are easy to misconfigure without documentation.",
	"Document required environment variables and include an `.env.example` reference.",
	),
	]

	for passed, title, description, suggested_fix in checks:
	if passed:
	continue
	findings.append(
	self._finding(
	title,
	Severity.low,
	chunk,
	chunk.line_start,
	chunk.line_end,
	description,
	suggested_fix,
	)
	)

	return findings

	def _scan_python_docstrings(self, chunk: CodeChunk) -> list[Finding]:
	missing_symbols: list[tuple[str, int]] = []
	lines = chunk.content.splitlines()

	for index, line in enumerate(lines):
	match = PYTHON_PUBLIC_DEF.match(line)
	if not match:
	continue

	symbol_name = match.group(3)
	if symbol_name.startswith("_"):
	continue
	if self._has_docstring(lines, index):
	continue

	line_number = chunk.line_start + index
	missing_symbols.append((symbol_name, line_number))

	if not missing_symbols:
	return []

	examples = ", ".join(f"`{name}` line {line}" for name, line in missing_symbols[:5])
	extra_count = len(missing_symbols) - 5
	extra_note = f" plus {extra_count} more" if extra_count > 0 else ""
	return [
	self._finding(
	"Public Python symbols missing docstrings",
	Severity.low,
	chunk,
	missing_symbols[0][1],
	missing_symbols[-1][1],
	f"{len(missing_symbols)} public symbols in this file section are missing docstrings: {examples}{extra_note}.",
	"Add short docstrings to public functions/classes, starting with exported APIs and complex behavior.",
	)
	]

	def _has_docstring(self, lines: list[str], definition_index: int) -> bool:
	for line in lines[definition_index + 1 : definition_index + 5]:
	stripped = line.strip()
	if not stripped:
	continue
	return stripped.startswith(('"""', "'''"))
	return False

	def _is_readme(self, file_path: str) -> bool:
	return file_path.rsplit("/", 1)[-1].lower() in {"readme", "readme.md", "readme.rst", "readme.txt"}

	def _finding(
	self,
	title: str,
	severity: Severity,
	chunk: CodeChunk,
	line_start: int,
	line_end: int,
	description: str,
	suggested_fix: str,
	) -> Finding:
	return Finding(
	title=title,
	severity=severity,
	file_path=chunk.file_path,
	line_start=line_start,
	line_end=line_end,
	description=description,
	why_it_matters="Good documentation helps reviewers, users, and judges understand the project quickly.",
	suggested_fix=suggested_fix,
	agent_source=self.name,
	)