File size: 5,779 Bytes
0e9cb33 a3ecd30 0e9cb33 1c77a4c 0e9cb33 1c77a4c 0e9cb33 1c77a4c 0e9cb33 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | import re
from app.schemas import AgentOutput, CodeChunk, Finding, Severity
PYTHON_PUBLIC_DEF = re.compile(r"^(\s*)(async\s+def|def|class)\s+([A-Za-z][A-Za-z0-9_]*)")
README_SETUP_TERMS = ("install", "setup", "quick start", "usage", "run")
README_TEST_TERMS = ("test", "pytest", "unittest")
README_CONFIG_TERMS = ("config", "environment", ".env", "settings")
class DocsAgent:
name = "Docs Agent"
async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
findings: list[Finding] = []
readme_seen = False
for chunk in chunks:
if self._is_readme(chunk.file_path):
readme_seen = True
findings.extend(self._scan_readme(chunk))
elif chunk.language == "Python":
findings.extend(self._scan_python_docstrings(chunk))
if not readme_seen and chunks:
first_chunk = chunks[0]
findings.append(
self._finding(
"README not found in scanned files",
Severity.medium,
first_chunk,
first_chunk.line_start,
first_chunk.line_start,
"The crawler did not find a top-level README file in the scanned repository inputs.",
"Add a README with setup, usage, configuration, and test instructions.",
)
)
return AgentOutput(
agent_name=self.name,
findings=findings,
metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
)
def _scan_readme(self, chunk: CodeChunk) -> list[Finding]:
content = chunk.content.lower()
findings: list[Finding] = []
checks = [
(
any(term in content for term in README_SETUP_TERMS),
"README missing usage/setup guidance",
"A README should quickly tell visitors how to install and run the project.",
"Add a Quick Start or Usage section with the commands needed to run the app.",
),
(
any(term in content for term in README_TEST_TERMS),
"README missing test instructions",
"Developers need a reliable way to verify the project after cloning it.",
"Add a Tests section with the command used to run the test suite.",
),
(
any(term in content for term in README_CONFIG_TERMS),
"README missing configuration notes",
"Environment variables and model/provider settings are easy to misconfigure without documentation.",
"Document required environment variables and include an `.env.example` reference.",
),
]
for passed, title, description, suggested_fix in checks:
if passed:
continue
findings.append(
self._finding(
title,
Severity.low,
chunk,
chunk.line_start,
chunk.line_end,
description,
suggested_fix,
)
)
return findings
def _scan_python_docstrings(self, chunk: CodeChunk) -> list[Finding]:
missing_symbols: list[tuple[str, int]] = []
lines = chunk.content.splitlines()
for index, line in enumerate(lines):
match = PYTHON_PUBLIC_DEF.match(line)
if not match:
continue
symbol_name = match.group(3)
if symbol_name.startswith("_"):
continue
if self._has_docstring(lines, index):
continue
line_number = chunk.line_start + index
missing_symbols.append((symbol_name, line_number))
if not missing_symbols:
return []
examples = ", ".join(f"`{name}` line {line}" for name, line in missing_symbols[:5])
extra_count = len(missing_symbols) - 5
extra_note = f" plus {extra_count} more" if extra_count > 0 else ""
return [
self._finding(
"Public Python symbols missing docstrings",
Severity.low,
chunk,
missing_symbols[0][1],
missing_symbols[-1][1],
f"{len(missing_symbols)} public symbols in this file section are missing docstrings: {examples}{extra_note}.",
"Add short docstrings to public functions/classes, starting with exported APIs and complex behavior.",
)
]
def _has_docstring(self, lines: list[str], definition_index: int) -> bool:
for line in lines[definition_index + 1 : definition_index + 5]:
stripped = line.strip()
if not stripped:
continue
return stripped.startswith(('"""', "'''"))
return False
def _is_readme(self, file_path: str) -> bool:
return file_path.rsplit("/", 1)[-1].lower() in {"readme", "readme.md", "readme.rst", "readme.txt"}
def _finding(
self,
title: str,
severity: Severity,
chunk: CodeChunk,
line_start: int,
line_end: int,
description: str,
suggested_fix: str,
) -> Finding:
return Finding(
title=title,
severity=severity,
file_path=chunk.file_path,
line_start=line_start,
line_end=line_end,
description=description,
why_it_matters="Good documentation helps reviewers, users, and judges understand the project quickly.",
suggested_fix=suggested_fix,
agent_source=self.name,
)
|