File size: 5,779 Bytes
0e9cb33
 
 
 
 
 
 
 
 
a3ecd30
 
 
 
 
 
0e9cb33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c77a4c
0e9cb33
 
 
 
 
 
 
 
 
 
 
 
 
 
1c77a4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e9cb33
1c77a4c
0e9cb33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import re

from app.schemas import AgentOutput, CodeChunk, Finding, Severity


PYTHON_PUBLIC_DEF = re.compile(r"^(\s*)(async\s+def|def|class)\s+([A-Za-z][A-Za-z0-9_]*)")
README_SETUP_TERMS = ("install", "setup", "quick start", "usage", "run")
README_TEST_TERMS = ("test", "pytest", "unittest")
README_CONFIG_TERMS = ("config", "environment", ".env", "settings")


class DocsAgent:
    name = "Docs Agent"

    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
        findings: list[Finding] = []
        readme_seen = False

        for chunk in chunks:
            if self._is_readme(chunk.file_path):
                readme_seen = True
                findings.extend(self._scan_readme(chunk))
            elif chunk.language == "Python":
                findings.extend(self._scan_python_docstrings(chunk))

        if not readme_seen and chunks:
            first_chunk = chunks[0]
            findings.append(
                self._finding(
                    "README not found in scanned files",
                    Severity.medium,
                    first_chunk,
                    first_chunk.line_start,
                    first_chunk.line_start,
                    "The crawler did not find a top-level README file in the scanned repository inputs.",
                    "Add a README with setup, usage, configuration, and test instructions.",
                )
            )

        return AgentOutput(
            agent_name=self.name,
            findings=findings,
            metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
        )

    def _scan_readme(self, chunk: CodeChunk) -> list[Finding]:
        content = chunk.content.lower()
        findings: list[Finding] = []

        checks = [
            (
                any(term in content for term in README_SETUP_TERMS),
                "README missing usage/setup guidance",
                "A README should quickly tell visitors how to install and run the project.",
                "Add a Quick Start or Usage section with the commands needed to run the app.",
            ),
            (
                any(term in content for term in README_TEST_TERMS),
                "README missing test instructions",
                "Developers need a reliable way to verify the project after cloning it.",
                "Add a Tests section with the command used to run the test suite.",
            ),
            (
                any(term in content for term in README_CONFIG_TERMS),
                "README missing configuration notes",
                "Environment variables and model/provider settings are easy to misconfigure without documentation.",
                "Document required environment variables and include an `.env.example` reference.",
            ),
        ]

        for passed, title, description, suggested_fix in checks:
            if passed:
                continue
            findings.append(
                self._finding(
                    title,
                    Severity.low,
                    chunk,
                    chunk.line_start,
                    chunk.line_end,
                    description,
                    suggested_fix,
                )
            )

        return findings

    def _scan_python_docstrings(self, chunk: CodeChunk) -> list[Finding]:
        missing_symbols: list[tuple[str, int]] = []
        lines = chunk.content.splitlines()

        for index, line in enumerate(lines):
            match = PYTHON_PUBLIC_DEF.match(line)
            if not match:
                continue

            symbol_name = match.group(3)
            if symbol_name.startswith("_"):
                continue
            if self._has_docstring(lines, index):
                continue

            line_number = chunk.line_start + index
            missing_symbols.append((symbol_name, line_number))

        if not missing_symbols:
            return []

        examples = ", ".join(f"`{name}` line {line}" for name, line in missing_symbols[:5])
        extra_count = len(missing_symbols) - 5
        extra_note = f" plus {extra_count} more" if extra_count > 0 else ""
        return [
            self._finding(
                "Public Python symbols missing docstrings",
                Severity.low,
                chunk,
                missing_symbols[0][1],
                missing_symbols[-1][1],
                f"{len(missing_symbols)} public symbols in this file section are missing docstrings: {examples}{extra_note}.",
                "Add short docstrings to public functions/classes, starting with exported APIs and complex behavior.",
            )
        ]

    def _has_docstring(self, lines: list[str], definition_index: int) -> bool:
        for line in lines[definition_index + 1 : definition_index + 5]:
            stripped = line.strip()
            if not stripped:
                continue
            return stripped.startswith(('"""', "'''"))
        return False

    def _is_readme(self, file_path: str) -> bool:
        return file_path.rsplit("/", 1)[-1].lower() in {"readme", "readme.md", "readme.rst", "readme.txt"}

    def _finding(
        self,
        title: str,
        severity: Severity,
        chunk: CodeChunk,
        line_start: int,
        line_end: int,
        description: str,
        suggested_fix: str,
    ) -> Finding:
        return Finding(
            title=title,
            severity=severity,
            file_path=chunk.file_path,
            line_start=line_start,
            line_end=line_end,
            description=description,
            why_it_matters="Good documentation helps reviewers, users, and judges understand the project quickly.",
            suggested_fix=suggested_fix,
            agent_source=self.name,
        )