| import re |
|
|
| from app.schemas import AgentOutput, CodeChunk, Finding, Severity |
|
|
|
|
| PYTHON_PUBLIC_DEF = re.compile(r"^(\s*)(async\s+def|def|class)\s+([A-Za-z][A-Za-z0-9_]*)") |
| README_SETUP_TERMS = ("install", "setup", "quick start", "usage", "run") |
| README_TEST_TERMS = ("test", "pytest", "unittest") |
| README_CONFIG_TERMS = ("config", "environment", ".env", "settings") |
|
|
|
|
| class DocsAgent: |
| name = "Docs Agent" |
|
|
| async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput: |
| findings: list[Finding] = [] |
| readme_seen = False |
|
|
| for chunk in chunks: |
| if self._is_readme(chunk.file_path): |
| readme_seen = True |
| findings.extend(self._scan_readme(chunk)) |
| elif chunk.language == "Python": |
| findings.extend(self._scan_python_docstrings(chunk)) |
|
|
| if not readme_seen and chunks: |
| first_chunk = chunks[0] |
| findings.append( |
| self._finding( |
| "README not found in scanned files", |
| Severity.medium, |
| first_chunk, |
| first_chunk.line_start, |
| first_chunk.line_start, |
| "The crawler did not find a top-level README file in the scanned repository inputs.", |
| "Add a README with setup, usage, configuration, and test instructions.", |
| ) |
| ) |
|
|
| return AgentOutput( |
| agent_name=self.name, |
| findings=findings, |
| metadata={"chunks_scanned": len(chunks), "mode": "static-rules"}, |
| ) |
|
|
| def _scan_readme(self, chunk: CodeChunk) -> list[Finding]: |
| content = chunk.content.lower() |
| findings: list[Finding] = [] |
|
|
| checks = [ |
| ( |
| any(term in content for term in README_SETUP_TERMS), |
| "README missing usage/setup guidance", |
| "A README should quickly tell visitors how to install and run the project.", |
| "Add a Quick Start or Usage section with the commands needed to run the app.", |
| ), |
| ( |
| any(term in content for term in README_TEST_TERMS), |
| "README missing test instructions", |
| "Developers need a reliable way to verify the project after cloning it.", |
| "Add a Tests section with the command used to run the test suite.", |
| ), |
| ( |
| any(term in content for term in README_CONFIG_TERMS), |
| "README missing configuration notes", |
| "Environment variables and model/provider settings are easy to misconfigure without documentation.", |
| "Document required environment variables and include an `.env.example` reference.", |
| ), |
| ] |
|
|
| for passed, title, description, suggested_fix in checks: |
| if passed: |
| continue |
| findings.append( |
| self._finding( |
| title, |
| Severity.low, |
| chunk, |
| chunk.line_start, |
| chunk.line_end, |
| description, |
| suggested_fix, |
| ) |
| ) |
|
|
| return findings |
|
|
| def _scan_python_docstrings(self, chunk: CodeChunk) -> list[Finding]: |
| missing_symbols: list[tuple[str, int]] = [] |
| lines = chunk.content.splitlines() |
|
|
| for index, line in enumerate(lines): |
| match = PYTHON_PUBLIC_DEF.match(line) |
| if not match: |
| continue |
|
|
| symbol_name = match.group(3) |
| if symbol_name.startswith("_"): |
| continue |
| if self._has_docstring(lines, index): |
| continue |
|
|
| line_number = chunk.line_start + index |
| missing_symbols.append((symbol_name, line_number)) |
|
|
| if not missing_symbols: |
| return [] |
|
|
| examples = ", ".join(f"`{name}` line {line}" for name, line in missing_symbols[:5]) |
| extra_count = len(missing_symbols) - 5 |
| extra_note = f" plus {extra_count} more" if extra_count > 0 else "" |
| return [ |
| self._finding( |
| "Public Python symbols missing docstrings", |
| Severity.low, |
| chunk, |
| missing_symbols[0][1], |
| missing_symbols[-1][1], |
| f"{len(missing_symbols)} public symbols in this file section are missing docstrings: {examples}{extra_note}.", |
| "Add short docstrings to public functions/classes, starting with exported APIs and complex behavior.", |
| ) |
| ] |
|
|
| def _has_docstring(self, lines: list[str], definition_index: int) -> bool: |
| for line in lines[definition_index + 1 : definition_index + 5]: |
| stripped = line.strip() |
| if not stripped: |
| continue |
| return stripped.startswith(('"""', "'''")) |
| return False |
|
|
| def _is_readme(self, file_path: str) -> bool: |
| return file_path.rsplit("/", 1)[-1].lower() in {"readme", "readme.md", "readme.rst", "readme.txt"} |
|
|
| def _finding( |
| self, |
| title: str, |
| severity: Severity, |
| chunk: CodeChunk, |
| line_start: int, |
| line_end: int, |
| description: str, |
| suggested_fix: str, |
| ) -> Finding: |
| return Finding( |
| title=title, |
| severity=severity, |
| file_path=chunk.file_path, |
| line_start=line_start, |
| line_end=line_end, |
| description=description, |
| why_it_matters="Good documentation helps reviewers, users, and judges understand the project quickly.", |
| suggested_fix=suggested_fix, |
| agent_source=self.name, |
| ) |
|
|