from pydantic import BaseModel, Field from typing import Optional, Dict, List, Literal class Action(BaseModel): tool: Literal["READ_FILE", "WRITE_FILE", "RUN_LINTER", "RUN_TESTS", "SUBMIT"] = Field( ..., description="The tool to use." ) filepath: Optional[str] = Field(None, description="The file to read or write.") content: Optional[str] = Field(None, description="The full resolved content to save (if WRITE_FILE).") class Observation(BaseModel): current_status: str tool_output: str files_with_markers: List[str] available_files: List[str] class Reward(BaseModel): score: float = Field(..., ge=0.0, le=1.0) feedback: str class State(BaseModel): task_id: str files: Dict[str, str] step_count: int is_done: bool