"""Typed models for the python_code_review_env environment.""" from __future__ import annotations from typing import Any, Dict, List, Literal, Optional from pydantic import BaseModel, Field from openenv.core.env_server.types import Action, Observation, State Difficulty = Literal["easy", "medium", "hard"] TaskKind = Literal["syntax_fix", "bug_fix", "optimization"] ActionType = Literal["analyze_code", "edit_code", "run_tests", "submit_solution"] class HistoryEntry(BaseModel): """One environment transition recorded for the agent.""" step: int = Field(..., ge=0) action_type: ActionType status: str = Field(..., description="Short outcome summary.") reward: float = Field(..., gt=0.0, lt=1.0, description="Reward returned for the step.") class RewardDetails(BaseModel): """Transparent reward decomposition for debugging and training.""" value: float = Field(..., gt=0.0, lt=1.0, description="Clamped net reward in (0.0, 1.0).") syntax_reward: float = Field(default=0.0) test_reward: float = Field(default=0.0) correctness_bonus: float = Field(default=0.0) quality_bonus: float = Field(default=0.0) progress_delta: float = Field(default=0.0) invalid_action_penalty: float = Field(default=0.0) timeout_penalty: float = Field(default=0.0) regression_penalty: float = Field(default=0.0) stagnation_penalty: float = Field(default=0.0) reason: str = Field(..., description="Human-readable reward explanation.") prev_score: float = Field(default=0.01, gt=0.0, lt=1.0) curr_score: float = Field(default=0.01, gt=0.0, lt=1.0) code_changed: bool = Field(default=False) class PythonCodeReviewAction(Action): """Action schema exposed to the agent.""" action_type: ActionType = Field(..., description="Environment action to take.") code: Optional[str] = Field( default=None, description="Updated Python source for edit_code or submit_solution actions.", ) class PythonCodeReviewObservation(Observation): """Observation returned by reset and step.""" task_id: str = Field(..., description="Stable task identifier.") title: str = Field(..., description="Human-readable task title.") difficulty: Difficulty task_kind: TaskKind task_description: str = Field(..., description="Task instructions shown to the agent.") current_code: str = Field(..., description="Latest code under review.") errors: str = Field(default="", description="Syntax or execution errors.") test_results: str = Field(default="", description="Public test and benchmark feedback.") visible_tests: List[str] = Field(default_factory=list) history: List[HistoryEntry] = Field(default_factory=list) attempts_remaining: int = Field(..., ge=0) last_action_status: str = Field(default="") score: float = Field(..., gt=0.0, lt=1.0) reward_details: RewardDetails = Field( default_factory=lambda: RewardDetails(value=0.1, reason="Environment reset.") ) class PythonCodeReviewState(State): """Internal environment state exposed through /state.""" task_id: Optional[str] = Field(default=None) difficulty: Optional[Difficulty] = Field(default=None) task_kind: Optional[TaskKind] = Field(default=None) attempts_remaining: int = Field(default=0, ge=0) current_code: str = Field(default="") errors: str = Field(default="") test_results: str = Field(default="") history: List[HistoryEntry] = Field(default_factory=list) score: float = Field(default=0.01, gt=0.0, lt=1.0) done: bool = Field(default=False) class TaskDescriptor(BaseModel): """Static task metadata.""" task_id: str title: str difficulty: Difficulty task_kind: TaskKind task_description: str starter_code: str visible_tests: List[str] = Field(default_factory=list) repo_summary: str = Field(default="") changed_files: List[str] = Field(default_factory=list) available_files: List[str] = Field(default_factory=list) goal: str = Field(default="") max_steps: int = Field(..., ge=1) class TaskSummary(BaseModel): """Compact task listing entry.""" task_id: str difficulty: Difficulty title: str goal: str = Field(default="") class TaskGrade(BaseModel): """Deterministic grader output.""" score: float = Field(..., gt=0.0, lt=1.0) syntax_score: float = Field(default=0.01, gt=0.0, lt=1.0) tests_passed: int = Field(default=0, ge=0) tests_total: int = Field(default=0, ge=0) quality_score: float = Field(default=0.01, gt=0.0, lt=1.0) runtime_score: float = Field(default=0.01, gt=0.0, lt=1.0) timed_out: bool = Field(default=False) details: Dict[str, Any] = Field(default_factory=dict) class HealthResponse(BaseModel): """Health payload for smoke tests.""" status: Literal["ok"] = "ok" environment: str = "python_code_review_env" task_count: int = Field(default=0, ge=0) PythonAction = PythonCodeReviewAction PythonObservation = PythonCodeReviewObservation PythonState = PythonCodeReviewState