Spaces:
Sleeping
Sleeping
| """ | |
| SecureCodeEnv - CodeGraph V2 | |
| A structured in-memory database of everything the agent has written in the current episode. | |
| This is the innovation that makes SecureCodeEnv unique among ALL RL environments. | |
| Without CodeGraph: Agent writes UserAuth.py in camelCase, Dashboard.py in snake_case. | |
| No existing RL environment penalizes this inconsistency. | |
| With CodeGraph: Every convention violation costs reward. Agent learns to be consistent. | |
| """ | |
| from dataclasses import dataclass, field | |
| from typing import Dict, List, Optional, Any | |
| class FunctionSignature: | |
| name: str | |
| args: List[str] | |
| returns: Optional[str] | |
| has_docstring: bool | |
| has_type_hints: bool | |
| is_async: bool = False | |
| class ComponentMetadata: | |
| file: str | |
| component_type: str # 'function' | 'class' | 'module' | |
| imports: List[str] | |
| exports: List[str] | |
| functions: List[dict] # FunctionSignature as dicts for JSON serialization | |
| api_calls: List[str] | |
| conventions: dict # Detected style conventions | |
| created_at_step: int | |
| language: str = "python" # 'python' | 'javascript' | 'typescript' | |
| def to_dict(self) -> dict: | |
| return { | |
| "file": self.file, | |
| "component_type": self.component_type, | |
| "imports": self.imports, | |
| "exports": self.exports, | |
| "functions": self.functions, | |
| "api_calls": self.api_calls, | |
| "conventions": self.conventions, | |
| "created_at_step": self.created_at_step, | |
| "language": self.language, | |
| } | |
| class CodeGraph: | |
| components: Dict[str, ComponentMetadata] = field(default_factory=dict) | |
| conventions: dict = field(default_factory=dict) # Inferred dominant codebase style | |
| dependencies: dict = field(default_factory=dict) # Imported package names | |
| episode_seed: int = 0 | |
| def update(self, filename: str, metadata: ComponentMetadata): | |
| """Add or replace a component and re-derive dominant conventions.""" | |
| name = filename.split("/")[-1] | |
| for ext in (".py", ".js", ".ts", ".tsx", ".jsx"): | |
| name = name.replace(ext, "") | |
| self.components[name] = metadata | |
| self._infer_conventions() | |
| self._track_dependencies(metadata) | |
| def _infer_conventions(self): | |
| """ | |
| Derive dominant code style from ALL existing components. | |
| Threshold: >60% majority (not >50%) to avoid false positives on small samples. | |
| Adds 'mixed' state when split is too close. | |
| """ | |
| all_fns = [f for c in self.components.values() for f in c.functions] | |
| if not all_fns: | |
| return | |
| total = len(all_fns) | |
| threshold = 0.60 # V2: raised from 50% to 60% | |
| # Naming convention | |
| snake = sum(1 for f in all_fns if "_" in f["name"] or f["name"].islower()) | |
| camel = sum(1 for f in all_fns if f["name"] and f["name"][0].islower() and any(c.isupper() for c in f["name"])) | |
| if snake / total > threshold: | |
| self.conventions["naming"] = "snake_case" | |
| elif camel / total > threshold: | |
| self.conventions["naming"] = "camelCase" | |
| else: | |
| self.conventions["naming"] = "mixed" | |
| # Error handling | |
| uses_try = [c for c in self.components.values() if c.conventions.get("uses_try_catch")] | |
| self.conventions["error_handling"] = "try_catch" if len(uses_try) > 0 else "none" | |
| # Type hints | |
| typed = [c for c in self.components.values() if c.conventions.get("uses_type_hints")] | |
| self.conventions["uses_type_hints"] = len(typed) / max(len(self.components), 1) > threshold | |
| # Docstrings | |
| documented = [c for c in self.components.values() if c.conventions.get("uses_docstrings")] | |
| self.conventions["uses_docstrings"] = len(documented) / max(len(self.components), 1) > threshold | |
| def _track_dependencies(self, metadata: ComponentMetadata): | |
| """Track all imported packages for supply chain security checks.""" | |
| for imp in metadata.imports: | |
| pkg = imp.split(".")[0] | |
| if pkg: | |
| self.dependencies[pkg] = True | |
| def to_context_prompt(self) -> str: | |
| """Serialize to natural language for the agent's observation.""" | |
| if not self.components: | |
| return "=== CODEBASE CONTEXT: Empty (this is the first component) ===" | |
| lines = ["=== EXISTING CODEBASE CONTEXT ==="] | |
| lines.append(f"Conventions: {self.conventions}") | |
| lines.append("") | |
| for name, comp in list(self.components.items())[:5]: # Cap at 5 most recent | |
| lines.append(f"Component: {name} ({comp.file})") | |
| fn_names = [f["name"] for f in comp.functions[:5]] | |
| lines.append(f" Functions: {fn_names}") | |
| lines.append(f" Imports: {comp.imports[:4]}") | |
| lines.append(f" Conventions: {comp.conventions}") | |
| return "\n".join(lines) | |