| from pydantic import BaseModel | |
| from typing import Optional, List | |
| from enum import Enum | |
| class ReviewDecision(str, Enum): | |
| APPROVE = "approve" | |
| REQUEST_CHANGES = "request_changes" | |
| ESCALATE = "escalate" | |
| class PRAction(BaseModel): | |
| decision: ReviewDecision | |
| comment: str # Full reviewer comment — must identify root cause, not just symptom | |
| issue_category: str # One of: "logic", "security", "correctness", "performance", "none" | |
| class PRObservation(BaseModel): | |
| turn: int | |
| diff: str | |
| pr_title: str | |
| pr_description: str | |
| review_history: List[dict] # {"role": "reviewer"|"author", "content": str} | |
| author_response: Optional[str] | |
| done: bool | |
| message: str | |
| class PRState(BaseModel): | |
| episode_id: str | |
| task_name: str | |
| turn: int | |
| max_turns: int | |
| review_history: List[dict] | |
| done: bool | |
| success: bool | |
| cumulative_reward: float | |