File size: 4,587 Bytes
ef93755
7257069
 
 
ef93755
7257069
 
ef93755
 
7257069
ef93755
7257069
 
ef93755
7257069
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef93755
7257069
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef93755
7257069
 
 
ef93755
7257069
 
 
ef93755
 
7257069
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
SecureCodeEnv - Metadata Extractor
Uses Python's built-in AST module to extract component metadata for CodeGraph.
No external dependencies required.
"""
import ast
from codegraph.graph import ComponentMetadata


def extract_metadata(code: str, filename: str, step: int) -> ComponentMetadata:
    """
    Parse Python source code and extract structured metadata.
    Returns a ComponentMetadata even on SyntaxError (with error info).
    """
    try:
        tree = ast.parse(code)
    except SyntaxError as e:
        # V2: Return structured error instead of empty object
        return ComponentMetadata(
            file=filename,
            component_type="error",
            imports=[],
            exports=[],
            functions=[],
            api_calls=[],
            conventions={
                "syntax_error": True,
                "error_line": e.lineno,
                "error_msg": str(e.msg),
            },
            created_at_step=step,
        )

    imports: list[str] = []
    exports: list[str] = []
    functions: list[dict] = []
    api_calls: list[str] = []

    for node in ast.walk(tree):
        # --- Imports ---
        if isinstance(node, ast.Import):
            imports += [alias.name for alias in node.names]
        elif isinstance(node, ast.ImportFrom) and node.module:
            module = node.module
            names = [alias.name for alias in node.names]
            imports.append(f"{module}.{names}")

        # --- Functions (def and async def) ---
        elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
            returns_annotation = None
            if node.returns is not None:
                try:
                    returns_annotation = ast.unparse(node.returns)
                except Exception:
                    returns_annotation = str(node.returns)

            has_type_hints = bool(
                node.returns is not None or
                any(a.annotation is not None for a in node.args.args)
            )

            functions.append({
                "name": node.name,
                "args": [a.arg for a in node.args.args],
                "returns": returns_annotation,
                "has_docstring": bool(ast.get_docstring(node)),
                "has_type_hints": has_type_hints,
                "is_async": isinstance(node, ast.AsyncFunctionDef),
            })

        # --- API calls (requests, fetch, httpx, aiohttp) ---
        elif isinstance(node, ast.Call):
            try:
                call_str = ast.unparse(node)
                if any(
                    p in call_str
                    for p in ["requests.get", "requests.post", "requests.put",
                               "httpx.", "aiohttp.", "fetch(", "axios."]
                ):
                    api_calls.append(call_str[:120])
            except Exception:
                pass

    # Detect __all__ exports
    for node in ast.walk(tree):
        if isinstance(node, ast.Assign):
            for target in node.targets:
                if isinstance(target, ast.Name) and target.id == "__all__":
                    try:
                        exports = [elt.s for elt in node.value.elts if isinstance(elt, ast.Constant)]
                    except Exception:
                        pass

    # Style convention detection
    code_lower = code.lower()
    conventions = {
        "uses_try_catch": "try:" in code or "except" in code,
        "uses_type_hints": any(f["has_type_hints"] for f in functions),
        "uses_docstrings": any(f["has_docstring"] for f in functions),
        "no_print_stmts": "print(" not in code,
        "no_hardcoded_secrets": not _has_hardcoded_secrets(code),
        "uses_logging": "logging." in code or "logger." in code,
        "has_main_guard": 'if __name__ == "__main__"' in code or "if __name__ == '__main__'" in code,
    }

    return ComponentMetadata(
        file=filename,
        component_type="module" if len(functions) > 1 else "function",
        imports=imports,
        exports=exports,
        functions=functions,
        api_calls=api_calls,
        conventions=conventions,
        created_at_step=step,
    )


def _has_hardcoded_secrets(code: str) -> bool:
    """Heuristic: detect probable hardcoded credentials."""
    import re
    secret_patterns = [
        r'(?i)(password|passwd|pwd|secret|api_key|apikey|token)\s*=\s*["\'][^"\']{4,}["\']',
        r'(?i)(aws_secret|private_key)\s*=\s*["\'][^"\']{8,}["\']',
    ]
    for pattern in secret_patterns:
        if re.search(pattern, code):
            return True
    return False