Spaces:
Build error
Build error
| """Typed models for Python code review and repair environment.""" | |
| from __future__ import annotations | |
| from typing import Any, Dict, List, Literal, Optional | |
| from pydantic import BaseModel, Field | |
| from compat import Action, Observation, State | |
| Difficulty = Literal["easy", "medium", "hard"] | |
| TaskKind = Literal["syntax_fix", "bug_fix", "optimization"] | |
| ActionType = Literal["analyze_code", "edit_code", "run_tests", "submit_solution"] | |
| Category = Literal["bug", "security", "performance", "maintainability", "style", "testing"] | |
| Severity = Literal["critical", "warning", "info"] | |
| class HistoryEntry(BaseModel): | |
| """Record of one action taken during an episode.""" | |
| step: int = Field(..., ge=0) | |
| action_type: ActionType | |
| status: str = Field(..., description="Outcome message") | |
| reward: float = Field(...) | |
| class RewardDetails(BaseModel): | |
| """Detailed reward breakdown for transparent agent feedback. | |
| The reward system is dynamic and multi-component, with 6 independent sources: | |
| 1. Progress Reward (max +0.25) | |
| - Awarded for score improvement from previous step | |
| - Formula: min(PROGRESS_SCALE * score_delta, 0.25) | |
| - Encourages continuous improvement | |
| 2. Syntax Reward (max +0.35) | |
| - One-time bonus for fixing syntax errors (first compile) | |
| - Applied when code transitions from uncompilable to compilable | |
| - Acknowledges the critical first step of valid code | |
| 3. Test Reward (max +0.20) | |
| - Based on improvement in test pass rate | |
| - Formula: min(TEST_PASS_REWARD_SCALE * test_improvement, 0.20) | |
| - Rewards incremental test progress | |
| 4. Quality Reward (max +0.15) | |
| - Based on AST-detected code quality metrics | |
| - Rewards improvements in structure, readability, best practices | |
| - Uses deterministic grader feedback | |
| 5. Stagnation Penalty (−0.10) | |
| - Applied when agent acts but code doesn't change | |
| - Encourages editing rather than repeated analysis | |
| - Configurable via STAGNATION_PENALTY constant | |
| 6. Regression Penalty (scale −0.20) | |
| - Applied when score decreases from previous step | |
| - Formula: REGRESSION_PENALTY_SCALE * abs(score_delta) | |
| - Discourages actions that make code worse | |
| Final Reward: clamp(progress + syntax + test + quality - stagnation - regression, -1.0, +1.0) | |
| The result is always bounded in [-1.0, +1.0], providing interpretable feedback for learning. | |
| """ | |
| value: float = Field(..., description="Net scalar reward for this step (bounded in [-1.0, +1.0])") | |
| syntax_reward: float = Field(default=0.0, description="Bonus for fixing syntax errors (max +0.35)") | |
| test_reward: float = Field(default=0.0, description="Reward from test improvements (max +0.20)") | |
| quality_bonus: float = Field(default=0.0, description="Bonus for code quality improvements (max +0.15)") | |
| correctness_bonus: float = Field(default=0.0, description="Bonus for full correctness (max +0.50)") | |
| progress_delta: float = Field(default=0.0, description="Reward from score improvement (max +0.25)") | |
| stagnation_penalty: float = Field(default=0.0, description="Penalty for unchanged code (−0.10)") | |
| regression_penalty: float = Field(default=0.0, description="Penalty for score decline (scale −0.20)") | |
| invalid_action_penalty: float = Field(default=0.0, description="Penalty for invalid actions (−0.15)") | |
| timeout_penalty: float = Field(default=0.0, description="Penalty for execution timeout (−0.15)") | |
| reason: str = Field(..., description="Human-readable explanation of the reward") | |
| # Debug information for transparency | |
| prev_score: float = Field(default=0.0, description="Score before this step") | |
| curr_score: float = Field(default=0.0, description="Score after this step") | |
| code_changed: bool = Field(default=False, description="Whether the action modified the code") | |
| class PythonCodeReviewAction(Action): | |
| """Action space for code review environment.""" | |
| action_type: ActionType = Field(..., description="Type of action to perform") | |
| code: Optional[str] = Field(default=None, description="New code for edit_code actions") | |
| class PythonCodeReviewObservation(Observation): | |
| """Observation returned by reset() and step().""" | |
| task_id: str = Field(..., description="Current task identifier") | |
| title: str = Field(default="", description="Human-readable task title") | |
| difficulty: Difficulty = Field(..., description="Task difficulty level") | |
| task_kind: Optional[TaskKind] = Field(default=None, description="Task type") | |
| task_description: str = Field(..., description="Detailed task description") | |
| current_code: str = Field(..., description="Current code state") | |
| errors: str = Field(..., description="Syntax/compilation errors, if any") | |
| test_results: str = Field(..., description="Results from test execution") | |
| visible_tests: List[str] = Field(default_factory=list, description="Public test cases") | |
| history: List[HistoryEntry] = Field(default_factory=list, description="Action history") | |
| attempts_remaining: int = Field(..., ge=0, description="Actions left in episode") | |
| last_action_status: str = Field(default="", description="Outcome message from the last action") | |
| score: float = Field(..., ge=0.0, le=1.0, description="Current episode score") | |
| reward_details: RewardDetails = Field( | |
| default_factory=lambda: RewardDetails(value=0.0, reason="Reset"), | |
| description="Detailed reward breakdown for the last action", | |
| ) | |
| class PythonCodeReviewState(State): | |
| """Exposed environment state.""" | |
| episode_id: str = Field(..., description="Unique episode identifier") | |
| step_count: int = Field(default=0, ge=0) | |
| task_id: Optional[str] = Field(default=None) | |
| difficulty: Optional[Difficulty] = Field(default=None) | |
| task_kind: Optional[TaskKind] = Field(default=None) | |
| attempts_remaining: int = Field(default=0, ge=0) | |
| current_code: str = Field(default="") | |
| errors: str = Field(default="") | |
| test_results: str = Field(default="") | |
| history: List[HistoryEntry] = Field(default_factory=list) | |
| score: float = Field(default=0.0, ge=0.0, le=1.0) | |
| done: bool = Field(default=False) | |
| class TaskDescriptor(BaseModel): | |
| """Public task metadata.""" | |
| task_id: str = Field(..., description="Stable task identifier") | |
| title: str = Field(..., description="Human-readable title") | |
| difficulty: Difficulty = Field(..., description="Difficulty level") | |
| task_kind: Optional[TaskKind] = Field(default=None, description="Type of task") | |
| task_description: str = Field(default="", description="Full task description") | |
| starter_code: str = Field(default="", description="Initial broken code") | |
| visible_tests: List[str] = Field(default_factory=list, description="Public test cases") | |
| goal: str = Field(default="", description="Optional goal summary for review-style tasks") | |
| repo_summary: str = Field(default="", description="Optional repository context") | |
| changed_files: List[str] = Field(default_factory=list, description="Changed files for review-style tasks") | |
| available_files: List[str] = Field(default_factory=list, description="Browsable files for review-style tasks") | |
| max_steps: int = Field(..., ge=1, description="Maximum steps allowed") | |
| class TaskSummary(BaseModel): | |
| """Lightweight task metadata for list endpoints.""" | |
| task_id: str = Field(..., description="Stable task identifier") | |
| difficulty: Difficulty = Field(..., description="Difficulty level") | |
| title: str = Field(..., description="Human-readable title") | |
| goal: str = Field(default="", description="Optional task goal") | |
| class ReviewFinding(BaseModel): | |
| """Structured code review finding used by auxiliary review utilities.""" | |
| title: str = Field(..., description="Short human-readable finding title") | |
| file_path: str = Field(default="", description="Optional file path") | |
| line: Optional[int] = Field(default=None, ge=1, description="Optional 1-based line number") | |
| category: Category = Field(default="bug", description="Finding category") | |
| severity: Severity = Field(default="warning", description="Finding severity") | |
| rationale: str = Field(default="", description="Why this matters") | |
| recommendation: str = Field(default="", description="Suggested remediation") | |
| rule_id: str = Field(default="", description="Stable detector or rubric identifier") | |
| def explanation(self) -> str: | |
| """Backward-compatible alias used by older grading helpers.""" | |
| return self.rationale | |
| def suggested_fix(self) -> str: | |
| """Backward-compatible alias used by older grading helpers.""" | |
| return self.recommendation | |
| class DirectReviewResponse(BaseModel): | |
| """Response payload for deterministic direct-review utilities.""" | |
| issues: List[ReviewFinding] = Field(default_factory=list) | |
| summary: str = Field(default="") | |
| score: float = Field(default=0.0, ge=0.0, le=1.0) | |
| improved_code: Optional[str] = Field(default=None) | |
| class TaskGrade(BaseModel): | |
| """Grading result for task submission.""" | |
| score: float = Field(..., ge=0.0, le=1.0, description="Overall score") | |
| syntax_score: float = Field(default=0.0, ge=0.0, le=1.0) | |
| tests_passed: int = Field(default=0, ge=0) | |
| tests_total: int = Field(default=0, ge=0) | |
| quality_score: float = Field(default=0.0, ge=0.0, le=1.0) | |
| runtime_score: float = Field(default=0.0, ge=0.0, le=1.0) | |
| timed_out: bool = Field(default=False) | |
| matched_issue_ids: List[str] = Field(default_factory=list) | |
| false_positives: int = Field(default=0, ge=0) | |
| duplicate_findings: int = Field(default=0, ge=0) | |
| matched_weight: float = Field(default=0.0, ge=0.0, le=1.0) | |
| details: Dict[str, Any] = Field(default_factory=dict) | |
| class HealthResponse(BaseModel): | |
| """Health check response.""" | |
| status: Literal["ok"] = "ok" | |
| environment: str = "python_code_review_env" | |
| task_count: int = Field(default=0, ge=0) | |