Spaces:
Sleeping
Sleeping
File size: 4,587 Bytes
ef93755 7257069 ef93755 7257069 ef93755 7257069 ef93755 7257069 ef93755 7257069 ef93755 7257069 ef93755 7257069 ef93755 7257069 ef93755 7257069 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | """
SecureCodeEnv - Metadata Extractor
Uses Python's built-in AST module to extract component metadata for CodeGraph.
No external dependencies required.
"""
import ast
from codegraph.graph import ComponentMetadata
def extract_metadata(code: str, filename: str, step: int) -> ComponentMetadata:
"""
Parse Python source code and extract structured metadata.
Returns a ComponentMetadata even on SyntaxError (with error info).
"""
try:
tree = ast.parse(code)
except SyntaxError as e:
# V2: Return structured error instead of empty object
return ComponentMetadata(
file=filename,
component_type="error",
imports=[],
exports=[],
functions=[],
api_calls=[],
conventions={
"syntax_error": True,
"error_line": e.lineno,
"error_msg": str(e.msg),
},
created_at_step=step,
)
imports: list[str] = []
exports: list[str] = []
functions: list[dict] = []
api_calls: list[str] = []
for node in ast.walk(tree):
# --- Imports ---
if isinstance(node, ast.Import):
imports += [alias.name for alias in node.names]
elif isinstance(node, ast.ImportFrom) and node.module:
module = node.module
names = [alias.name for alias in node.names]
imports.append(f"{module}.{names}")
# --- Functions (def and async def) ---
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
returns_annotation = None
if node.returns is not None:
try:
returns_annotation = ast.unparse(node.returns)
except Exception:
returns_annotation = str(node.returns)
has_type_hints = bool(
node.returns is not None or
any(a.annotation is not None for a in node.args.args)
)
functions.append({
"name": node.name,
"args": [a.arg for a in node.args.args],
"returns": returns_annotation,
"has_docstring": bool(ast.get_docstring(node)),
"has_type_hints": has_type_hints,
"is_async": isinstance(node, ast.AsyncFunctionDef),
})
# --- API calls (requests, fetch, httpx, aiohttp) ---
elif isinstance(node, ast.Call):
try:
call_str = ast.unparse(node)
if any(
p in call_str
for p in ["requests.get", "requests.post", "requests.put",
"httpx.", "aiohttp.", "fetch(", "axios."]
):
api_calls.append(call_str[:120])
except Exception:
pass
# Detect __all__ exports
for node in ast.walk(tree):
if isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and target.id == "__all__":
try:
exports = [elt.s for elt in node.value.elts if isinstance(elt, ast.Constant)]
except Exception:
pass
# Style convention detection
code_lower = code.lower()
conventions = {
"uses_try_catch": "try:" in code or "except" in code,
"uses_type_hints": any(f["has_type_hints"] for f in functions),
"uses_docstrings": any(f["has_docstring"] for f in functions),
"no_print_stmts": "print(" not in code,
"no_hardcoded_secrets": not _has_hardcoded_secrets(code),
"uses_logging": "logging." in code or "logger." in code,
"has_main_guard": 'if __name__ == "__main__"' in code or "if __name__ == '__main__'" in code,
}
return ComponentMetadata(
file=filename,
component_type="module" if len(functions) > 1 else "function",
imports=imports,
exports=exports,
functions=functions,
api_calls=api_calls,
conventions=conventions,
created_at_step=step,
)
def _has_hardcoded_secrets(code: str) -> bool:
"""Heuristic: detect probable hardcoded credentials."""
import re
secret_patterns = [
r'(?i)(password|passwd|pwd|secret|api_key|apikey|token)\s*=\s*["\'][^"\']{4,}["\']',
r'(?i)(aws_secret|private_key)\s*=\s*["\'][^"\']{8,}["\']',
]
for pattern in secret_patterns:
if re.search(pattern, code):
return True
return False
|