""" SecureCodeEnv - Metadata Extractor Uses Python's built-in AST module to extract component metadata for CodeGraph. No external dependencies required. """ import ast from codegraph.graph import ComponentMetadata def extract_metadata(code: str, filename: str, step: int) -> ComponentMetadata: """ Parse Python source code and extract structured metadata. Returns a ComponentMetadata even on SyntaxError (with error info). """ try: tree = ast.parse(code) except SyntaxError as e: # V2: Return structured error instead of empty object return ComponentMetadata( file=filename, component_type="error", imports=[], exports=[], functions=[], api_calls=[], conventions={ "syntax_error": True, "error_line": e.lineno, "error_msg": str(e.msg), }, created_at_step=step, ) imports: list[str] = [] exports: list[str] = [] functions: list[dict] = [] api_calls: list[str] = [] for node in ast.walk(tree): # --- Imports --- if isinstance(node, ast.Import): imports += [alias.name for alias in node.names] elif isinstance(node, ast.ImportFrom) and node.module: module = node.module names = [alias.name for alias in node.names] imports.append(f"{module}.{names}") # --- Functions (def and async def) --- elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): returns_annotation = None if node.returns is not None: try: returns_annotation = ast.unparse(node.returns) except Exception: returns_annotation = str(node.returns) has_type_hints = bool( node.returns is not None or any(a.annotation is not None for a in node.args.args) ) functions.append({ "name": node.name, "args": [a.arg for a in node.args.args], "returns": returns_annotation, "has_docstring": bool(ast.get_docstring(node)), "has_type_hints": has_type_hints, "is_async": isinstance(node, ast.AsyncFunctionDef), }) # --- API calls (requests, fetch, httpx, aiohttp) --- elif isinstance(node, ast.Call): try: call_str = ast.unparse(node) if any( p in call_str for p in ["requests.get", "requests.post", "requests.put", "httpx.", "aiohttp.", "fetch(", "axios."] ): api_calls.append(call_str[:120]) except Exception: pass # Detect __all__ exports for node in ast.walk(tree): if isinstance(node, ast.Assign): for target in node.targets: if isinstance(target, ast.Name) and target.id == "__all__": try: exports = [elt.s for elt in node.value.elts if isinstance(elt, ast.Constant)] except Exception: pass # Style convention detection code_lower = code.lower() conventions = { "uses_try_catch": "try:" in code or "except" in code, "uses_type_hints": any(f["has_type_hints"] for f in functions), "uses_docstrings": any(f["has_docstring"] for f in functions), "no_print_stmts": "print(" not in code, "no_hardcoded_secrets": not _has_hardcoded_secrets(code), "uses_logging": "logging." in code or "logger." in code, "has_main_guard": 'if __name__ == "__main__"' in code or "if __name__ == '__main__'" in code, } return ComponentMetadata( file=filename, component_type="module" if len(functions) > 1 else "function", imports=imports, exports=exports, functions=functions, api_calls=api_calls, conventions=conventions, created_at_step=step, ) def _has_hardcoded_secrets(code: str) -> bool: """Heuristic: detect probable hardcoded credentials.""" import re secret_patterns = [ r'(?i)(password|passwd|pwd|secret|api_key|apikey|token)\s*=\s*["\'][^"\']{4,}["\']', r'(?i)(aws_secret|private_key)\s*=\s*["\'][^"\']{8,}["\']', ] for pattern in secret_patterns: if re.search(pattern, code): return True return False