File size: 10,145 Bytes
c8e832f
1c8b7f1
c8e832f
1c8b7f1
c8e832f
1c8b7f1
 
 
c8e832f
1c8b7f1
 
c8e832f
 
 
 
1c8b7f1
c8e832f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c8b7f1
c8e832f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c8b7f1
c8e832f
 
 
 
1c8b7f1
 
 
c8e832f
1c8b7f1
c8e832f
 
 
 
 
 
 
 
1c8b7f1
c8e832f
 
 
 
1c8b7f1
c8e832f
 
 
 
1c8b7f1
 
 
c8e832f
1c8b7f1
 
 
 
 
 
 
c8e832f
 
1c8b7f1
c8e832f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
"""Typed models for Python code review and repair environment."""

from __future__ import annotations

from typing import Any, Dict, List, Literal, Optional

from pydantic import BaseModel, Field

from compat import Action, Observation, State


Difficulty = Literal["easy", "medium", "hard"]
TaskKind = Literal["syntax_fix", "bug_fix", "optimization"]
ActionType = Literal["analyze_code", "edit_code", "run_tests", "submit_solution"]
Category = Literal["bug", "security", "performance", "maintainability", "style", "testing"]
Severity = Literal["critical", "warning", "info"]


class HistoryEntry(BaseModel):
    """Record of one action taken during an episode."""

    step: int = Field(..., ge=0)
    action_type: ActionType
    status: str = Field(..., description="Outcome message")
    reward: float = Field(...)


class RewardDetails(BaseModel):
    """Detailed reward breakdown for transparent agent feedback.

    

    The reward system is dynamic and multi-component, with 6 independent sources:

    

    1. Progress Reward (max +0.25)

       - Awarded for score improvement from previous step

       - Formula: min(PROGRESS_SCALE * score_delta, 0.25)

       - Encourages continuous improvement

    

    2. Syntax Reward (max +0.35)

       - One-time bonus for fixing syntax errors (first compile)

       - Applied when code transitions from uncompilable to compilable

       - Acknowledges the critical first step of valid code

    

    3. Test Reward (max +0.20)

       - Based on improvement in test pass rate

       - Formula: min(TEST_PASS_REWARD_SCALE * test_improvement, 0.20)

       - Rewards incremental test progress

    

    4. Quality Reward (max +0.15)

       - Based on AST-detected code quality metrics

       - Rewards improvements in structure, readability, best practices

       - Uses deterministic grader feedback

    

    5. Stagnation Penalty (−0.10)

       - Applied when agent acts but code doesn't change

       - Encourages editing rather than repeated analysis

       - Configurable via STAGNATION_PENALTY constant

    

    6. Regression Penalty (scale −0.20)

       - Applied when score decreases from previous step

       - Formula: REGRESSION_PENALTY_SCALE * abs(score_delta)

       - Discourages actions that make code worse

    

    Final Reward: clamp(progress + syntax + test + quality - stagnation - regression, -1.0, +1.0)

    

    The result is always bounded in [-1.0, +1.0], providing interpretable feedback for learning.

    """

    value: float = Field(..., description="Net scalar reward for this step (bounded in [-1.0, +1.0])")
    syntax_reward: float = Field(default=0.0, description="Bonus for fixing syntax errors (max +0.35)")
    test_reward: float = Field(default=0.0, description="Reward from test improvements (max +0.20)")
    quality_bonus: float = Field(default=0.0, description="Bonus for code quality improvements (max +0.15)")
    correctness_bonus: float = Field(default=0.0, description="Bonus for full correctness (max +0.50)")
    progress_delta: float = Field(default=0.0, description="Reward from score improvement (max +0.25)")
    stagnation_penalty: float = Field(default=0.0, description="Penalty for unchanged code (−0.10)")
    regression_penalty: float = Field(default=0.0, description="Penalty for score decline (scale −0.20)")
    invalid_action_penalty: float = Field(default=0.0, description="Penalty for invalid actions (−0.15)")
    timeout_penalty: float = Field(default=0.0, description="Penalty for execution timeout (−0.15)")
    reason: str = Field(..., description="Human-readable explanation of the reward")
    
    # Debug information for transparency
    prev_score: float = Field(default=0.0, description="Score before this step")
    curr_score: float = Field(default=0.0, description="Score after this step")
    code_changed: bool = Field(default=False, description="Whether the action modified the code")


class PythonCodeReviewAction(Action):
    """Action space for code review environment."""

    action_type: ActionType = Field(..., description="Type of action to perform")
    code: Optional[str] = Field(default=None, description="New code for edit_code actions")


class PythonCodeReviewObservation(Observation):
    """Observation returned by reset() and step()."""

    task_id: str = Field(..., description="Current task identifier")
    title: str = Field(default="", description="Human-readable task title")
    difficulty: Difficulty = Field(..., description="Task difficulty level")
    task_kind: Optional[TaskKind] = Field(default=None, description="Task type")
    task_description: str = Field(..., description="Detailed task description")
    current_code: str = Field(..., description="Current code state")
    errors: str = Field(..., description="Syntax/compilation errors, if any")
    test_results: str = Field(..., description="Results from test execution")
    visible_tests: List[str] = Field(default_factory=list, description="Public test cases")
    history: List[HistoryEntry] = Field(default_factory=list, description="Action history")
    attempts_remaining: int = Field(..., ge=0, description="Actions left in episode")
    last_action_status: str = Field(default="", description="Outcome message from the last action")
    score: float = Field(..., ge=0.0, le=1.0, description="Current episode score")
    reward_details: RewardDetails = Field(
        default_factory=lambda: RewardDetails(value=0.0, reason="Reset"),
        description="Detailed reward breakdown for the last action",
    )


class PythonCodeReviewState(State):
    """Exposed environment state."""

    episode_id: str = Field(..., description="Unique episode identifier")
    step_count: int = Field(default=0, ge=0)
    task_id: Optional[str] = Field(default=None)
    difficulty: Optional[Difficulty] = Field(default=None)
    task_kind: Optional[TaskKind] = Field(default=None)
    attempts_remaining: int = Field(default=0, ge=0)
    current_code: str = Field(default="")
    errors: str = Field(default="")
    test_results: str = Field(default="")
    history: List[HistoryEntry] = Field(default_factory=list)
    score: float = Field(default=0.0, ge=0.0, le=1.0)
    done: bool = Field(default=False)


class TaskDescriptor(BaseModel):
    """Public task metadata."""

    task_id: str = Field(..., description="Stable task identifier")
    title: str = Field(..., description="Human-readable title")
    difficulty: Difficulty = Field(..., description="Difficulty level")
    task_kind: Optional[TaskKind] = Field(default=None, description="Type of task")
    task_description: str = Field(default="", description="Full task description")
    starter_code: str = Field(default="", description="Initial broken code")
    visible_tests: List[str] = Field(default_factory=list, description="Public test cases")
    goal: str = Field(default="", description="Optional goal summary for review-style tasks")
    repo_summary: str = Field(default="", description="Optional repository context")
    changed_files: List[str] = Field(default_factory=list, description="Changed files for review-style tasks")
    available_files: List[str] = Field(default_factory=list, description="Browsable files for review-style tasks")
    max_steps: int = Field(..., ge=1, description="Maximum steps allowed")


class TaskSummary(BaseModel):
    """Lightweight task metadata for list endpoints."""

    task_id: str = Field(..., description="Stable task identifier")
    difficulty: Difficulty = Field(..., description="Difficulty level")
    title: str = Field(..., description="Human-readable title")
    goal: str = Field(default="", description="Optional task goal")


class ReviewFinding(BaseModel):
    """Structured code review finding used by auxiliary review utilities."""

    title: str = Field(..., description="Short human-readable finding title")
    file_path: str = Field(default="", description="Optional file path")
    line: Optional[int] = Field(default=None, ge=1, description="Optional 1-based line number")
    category: Category = Field(default="bug", description="Finding category")
    severity: Severity = Field(default="warning", description="Finding severity")
    rationale: str = Field(default="", description="Why this matters")
    recommendation: str = Field(default="", description="Suggested remediation")
    rule_id: str = Field(default="", description="Stable detector or rubric identifier")

    @property
    def explanation(self) -> str:
        """Backward-compatible alias used by older grading helpers."""
        return self.rationale

    @property
    def suggested_fix(self) -> str:
        """Backward-compatible alias used by older grading helpers."""
        return self.recommendation


class DirectReviewResponse(BaseModel):
    """Response payload for deterministic direct-review utilities."""

    issues: List[ReviewFinding] = Field(default_factory=list)
    summary: str = Field(default="")
    score: float = Field(default=0.0, ge=0.0, le=1.0)
    improved_code: Optional[str] = Field(default=None)


class TaskGrade(BaseModel):
    """Grading result for task submission."""

    score: float = Field(..., ge=0.0, le=1.0, description="Overall score")
    syntax_score: float = Field(default=0.0, ge=0.0, le=1.0)
    tests_passed: int = Field(default=0, ge=0)
    tests_total: int = Field(default=0, ge=0)
    quality_score: float = Field(default=0.0, ge=0.0, le=1.0)
    runtime_score: float = Field(default=0.0, ge=0.0, le=1.0)
    timed_out: bool = Field(default=False)
    matched_issue_ids: List[str] = Field(default_factory=list)
    false_positives: int = Field(default=0, ge=0)
    duplicate_findings: int = Field(default=0, ge=0)
    matched_weight: float = Field(default=0.0, ge=0.0, le=1.0)
    details: Dict[str, Any] = Field(default_factory=dict)


class HealthResponse(BaseModel):
    """Health check response."""

    status: Literal["ok"] = "ok"
    environment: str = "python_code_review_env"
    task_count: int = Field(default=0, ge=0)