SecureCodeEnv / codegraph /extractor.py
vishaldhakad's picture
frontend adding
7257069
"""
SecureCodeEnv - Metadata Extractor
Uses Python's built-in AST module to extract component metadata for CodeGraph.
No external dependencies required.
"""
import ast
from codegraph.graph import ComponentMetadata
def extract_metadata(code: str, filename: str, step: int) -> ComponentMetadata:
"""
Parse Python source code and extract structured metadata.
Returns a ComponentMetadata even on SyntaxError (with error info).
"""
try:
tree = ast.parse(code)
except SyntaxError as e:
# V2: Return structured error instead of empty object
return ComponentMetadata(
file=filename,
component_type="error",
imports=[],
exports=[],
functions=[],
api_calls=[],
conventions={
"syntax_error": True,
"error_line": e.lineno,
"error_msg": str(e.msg),
},
created_at_step=step,
)
imports: list[str] = []
exports: list[str] = []
functions: list[dict] = []
api_calls: list[str] = []
for node in ast.walk(tree):
# --- Imports ---
if isinstance(node, ast.Import):
imports += [alias.name for alias in node.names]
elif isinstance(node, ast.ImportFrom) and node.module:
module = node.module
names = [alias.name for alias in node.names]
imports.append(f"{module}.{names}")
# --- Functions (def and async def) ---
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
returns_annotation = None
if node.returns is not None:
try:
returns_annotation = ast.unparse(node.returns)
except Exception:
returns_annotation = str(node.returns)
has_type_hints = bool(
node.returns is not None or
any(a.annotation is not None for a in node.args.args)
)
functions.append({
"name": node.name,
"args": [a.arg for a in node.args.args],
"returns": returns_annotation,
"has_docstring": bool(ast.get_docstring(node)),
"has_type_hints": has_type_hints,
"is_async": isinstance(node, ast.AsyncFunctionDef),
})
# --- API calls (requests, fetch, httpx, aiohttp) ---
elif isinstance(node, ast.Call):
try:
call_str = ast.unparse(node)
if any(
p in call_str
for p in ["requests.get", "requests.post", "requests.put",
"httpx.", "aiohttp.", "fetch(", "axios."]
):
api_calls.append(call_str[:120])
except Exception:
pass
# Detect __all__ exports
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and target.id == "__all__":
try:
exports = [elt.s for elt in node.value.elts if isinstance(elt, ast.Constant)]
except Exception:
pass
# Style convention detection
code_lower = code.lower()
conventions = {
"uses_try_catch": "try:" in code or "except" in code,
"uses_type_hints": any(f["has_type_hints"] for f in functions),
"uses_docstrings": any(f["has_docstring"] for f in functions),
"no_print_stmts": "print(" not in code,
"no_hardcoded_secrets": not _has_hardcoded_secrets(code),
"uses_logging": "logging." in code or "logger." in code,
"has_main_guard": 'if __name__ == "__main__"' in code or "if __name__ == '__main__'" in code,
}
return ComponentMetadata(
file=filename,
component_type="module" if len(functions) > 1 else "function",
imports=imports,
exports=exports,
functions=functions,
api_calls=api_calls,
conventions=conventions,
created_at_step=step,
)
def _has_hardcoded_secrets(code: str) -> bool:
"""Heuristic: detect probable hardcoded credentials."""
import re
secret_patterns = [
r'(?i)(password|passwd|pwd|secret|api_key|apikey|token)\s*=\s*["\'][^"\']{4,}["\']',
r'(?i)(aws_secret|private_key)\s*=\s*["\'][^"\']{8,}["\']',
]
for pattern in secret_patterns:
if re.search(pattern, code):
return True
return False