Spaces:
Build error
Build error
File size: 10,145 Bytes
c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f 1c8b7f1 c8e832f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | """Typed models for Python code review and repair environment."""
from __future__ import annotations
from typing import Any, Dict, List, Literal, Optional
from pydantic import BaseModel, Field
from compat import Action, Observation, State
Difficulty = Literal["easy", "medium", "hard"]
TaskKind = Literal["syntax_fix", "bug_fix", "optimization"]
ActionType = Literal["analyze_code", "edit_code", "run_tests", "submit_solution"]
Category = Literal["bug", "security", "performance", "maintainability", "style", "testing"]
Severity = Literal["critical", "warning", "info"]
class HistoryEntry(BaseModel):
"""Record of one action taken during an episode."""
step: int = Field(..., ge=0)
action_type: ActionType
status: str = Field(..., description="Outcome message")
reward: float = Field(...)
class RewardDetails(BaseModel):
"""Detailed reward breakdown for transparent agent feedback.
The reward system is dynamic and multi-component, with 6 independent sources:
1. Progress Reward (max +0.25)
- Awarded for score improvement from previous step
- Formula: min(PROGRESS_SCALE * score_delta, 0.25)
- Encourages continuous improvement
2. Syntax Reward (max +0.35)
- One-time bonus for fixing syntax errors (first compile)
- Applied when code transitions from uncompilable to compilable
- Acknowledges the critical first step of valid code
3. Test Reward (max +0.20)
- Based on improvement in test pass rate
- Formula: min(TEST_PASS_REWARD_SCALE * test_improvement, 0.20)
- Rewards incremental test progress
4. Quality Reward (max +0.15)
- Based on AST-detected code quality metrics
- Rewards improvements in structure, readability, best practices
- Uses deterministic grader feedback
5. Stagnation Penalty (−0.10)
- Applied when agent acts but code doesn't change
- Encourages editing rather than repeated analysis
- Configurable via STAGNATION_PENALTY constant
6. Regression Penalty (scale −0.20)
- Applied when score decreases from previous step
- Formula: REGRESSION_PENALTY_SCALE * abs(score_delta)
- Discourages actions that make code worse
Final Reward: clamp(progress + syntax + test + quality - stagnation - regression, -1.0, +1.0)
The result is always bounded in [-1.0, +1.0], providing interpretable feedback for learning.
"""
value: float = Field(..., description="Net scalar reward for this step (bounded in [-1.0, +1.0])")
syntax_reward: float = Field(default=0.0, description="Bonus for fixing syntax errors (max +0.35)")
test_reward: float = Field(default=0.0, description="Reward from test improvements (max +0.20)")
quality_bonus: float = Field(default=0.0, description="Bonus for code quality improvements (max +0.15)")
correctness_bonus: float = Field(default=0.0, description="Bonus for full correctness (max +0.50)")
progress_delta: float = Field(default=0.0, description="Reward from score improvement (max +0.25)")
stagnation_penalty: float = Field(default=0.0, description="Penalty for unchanged code (−0.10)")
regression_penalty: float = Field(default=0.0, description="Penalty for score decline (scale −0.20)")
invalid_action_penalty: float = Field(default=0.0, description="Penalty for invalid actions (−0.15)")
timeout_penalty: float = Field(default=0.0, description="Penalty for execution timeout (−0.15)")
reason: str = Field(..., description="Human-readable explanation of the reward")
# Debug information for transparency
prev_score: float = Field(default=0.0, description="Score before this step")
curr_score: float = Field(default=0.0, description="Score after this step")
code_changed: bool = Field(default=False, description="Whether the action modified the code")
class PythonCodeReviewAction(Action):
"""Action space for code review environment."""
action_type: ActionType = Field(..., description="Type of action to perform")
code: Optional[str] = Field(default=None, description="New code for edit_code actions")
class PythonCodeReviewObservation(Observation):
"""Observation returned by reset() and step()."""
task_id: str = Field(..., description="Current task identifier")
title: str = Field(default="", description="Human-readable task title")
difficulty: Difficulty = Field(..., description="Task difficulty level")
task_kind: Optional[TaskKind] = Field(default=None, description="Task type")
task_description: str = Field(..., description="Detailed task description")
current_code: str = Field(..., description="Current code state")
errors: str = Field(..., description="Syntax/compilation errors, if any")
test_results: str = Field(..., description="Results from test execution")
visible_tests: List[str] = Field(default_factory=list, description="Public test cases")
history: List[HistoryEntry] = Field(default_factory=list, description="Action history")
attempts_remaining: int = Field(..., ge=0, description="Actions left in episode")
last_action_status: str = Field(default="", description="Outcome message from the last action")
score: float = Field(..., ge=0.0, le=1.0, description="Current episode score")
reward_details: RewardDetails = Field(
default_factory=lambda: RewardDetails(value=0.0, reason="Reset"),
description="Detailed reward breakdown for the last action",
)
class PythonCodeReviewState(State):
"""Exposed environment state."""
episode_id: str = Field(..., description="Unique episode identifier")
step_count: int = Field(default=0, ge=0)
task_id: Optional[str] = Field(default=None)
difficulty: Optional[Difficulty] = Field(default=None)
task_kind: Optional[TaskKind] = Field(default=None)
attempts_remaining: int = Field(default=0, ge=0)
current_code: str = Field(default="")
errors: str = Field(default="")
test_results: str = Field(default="")
history: List[HistoryEntry] = Field(default_factory=list)
score: float = Field(default=0.0, ge=0.0, le=1.0)
done: bool = Field(default=False)
class TaskDescriptor(BaseModel):
"""Public task metadata."""
task_id: str = Field(..., description="Stable task identifier")
title: str = Field(..., description="Human-readable title")
difficulty: Difficulty = Field(..., description="Difficulty level")
task_kind: Optional[TaskKind] = Field(default=None, description="Type of task")
task_description: str = Field(default="", description="Full task description")
starter_code: str = Field(default="", description="Initial broken code")
visible_tests: List[str] = Field(default_factory=list, description="Public test cases")
goal: str = Field(default="", description="Optional goal summary for review-style tasks")
repo_summary: str = Field(default="", description="Optional repository context")
changed_files: List[str] = Field(default_factory=list, description="Changed files for review-style tasks")
available_files: List[str] = Field(default_factory=list, description="Browsable files for review-style tasks")
max_steps: int = Field(..., ge=1, description="Maximum steps allowed")
class TaskSummary(BaseModel):
"""Lightweight task metadata for list endpoints."""
task_id: str = Field(..., description="Stable task identifier")
difficulty: Difficulty = Field(..., description="Difficulty level")
title: str = Field(..., description="Human-readable title")
goal: str = Field(default="", description="Optional task goal")
class ReviewFinding(BaseModel):
"""Structured code review finding used by auxiliary review utilities."""
title: str = Field(..., description="Short human-readable finding title")
file_path: str = Field(default="", description="Optional file path")
line: Optional[int] = Field(default=None, ge=1, description="Optional 1-based line number")
category: Category = Field(default="bug", description="Finding category")
severity: Severity = Field(default="warning", description="Finding severity")
rationale: str = Field(default="", description="Why this matters")
recommendation: str = Field(default="", description="Suggested remediation")
rule_id: str = Field(default="", description="Stable detector or rubric identifier")
@property
def explanation(self) -> str:
"""Backward-compatible alias used by older grading helpers."""
return self.rationale
@property
def suggested_fix(self) -> str:
"""Backward-compatible alias used by older grading helpers."""
return self.recommendation
class DirectReviewResponse(BaseModel):
"""Response payload for deterministic direct-review utilities."""
issues: List[ReviewFinding] = Field(default_factory=list)
summary: str = Field(default="")
score: float = Field(default=0.0, ge=0.0, le=1.0)
improved_code: Optional[str] = Field(default=None)
class TaskGrade(BaseModel):
"""Grading result for task submission."""
score: float = Field(..., ge=0.0, le=1.0, description="Overall score")
syntax_score: float = Field(default=0.0, ge=0.0, le=1.0)
tests_passed: int = Field(default=0, ge=0)
tests_total: int = Field(default=0, ge=0)
quality_score: float = Field(default=0.0, ge=0.0, le=1.0)
runtime_score: float = Field(default=0.0, ge=0.0, le=1.0)
timed_out: bool = Field(default=False)
matched_issue_ids: List[str] = Field(default_factory=list)
false_positives: int = Field(default=0, ge=0)
duplicate_findings: int = Field(default=0, ge=0)
matched_weight: float = Field(default=0.0, ge=0.0, le=1.0)
details: Dict[str, Any] = Field(default_factory=dict)
class HealthResponse(BaseModel):
"""Health check response."""
status: Literal["ok"] = "ok"
environment: str = "python_code_review_env"
task_count: int = Field(default=0, ge=0)
|