# environment/models.py # Typed Pydantic models for OpenEnv interface # Implements Action, Observation, and Reward schemas from typing import Optional from pydantic import BaseModel, model_validator class Action(BaseModel): """ Action model for the SQL Analyst environment. The agent must provide EXACTLY ONE of: - sql_query: Execute a SQL query against the database - submit_answer: Submit a final answer for grading Edge Case Shield: Pydantic model_validator enforces mutual exclusivity. """ sql_query: Optional[str] = None submit_answer: Optional[str] = None @model_validator(mode='after') def validate_exactly_one_action(self) -> 'Action': """ Enforce that the agent provides exactly one of sql_query or submit_answer. This prevents ambiguous actions and ensures clean state transitions. """ has_sql = self.sql_query is not None and self.sql_query.strip() != "" has_answer = self.submit_answer is not None and self.submit_answer.strip() != "" if has_sql and has_answer: raise ValueError( "Invalid action: Provide exactly ONE of 'sql_query' or 'submit_answer', not both." ) if not has_sql and not has_answer: raise ValueError( "Invalid action: Must provide exactly ONE of 'sql_query' or 'submit_answer'." ) return self class Observation(BaseModel): """ Observation model representing the current state visible to the agent. Fields: - schema_info: Database schema information (tables, columns, types) - current_question: The task question the agent must answer - last_query_result: Result from the most recent SQL query execution - error_message: Any error from the last action (empty string if none) """ schema_info: str current_question: str last_query_result: str error_message: str class Reward(BaseModel): """ Reward model containing a single float value. Reward shaping follows the PRD specification: - +0.1: Successful, error-free SQL query - -0.1: SQLite syntax error - -1.0: Destructive action detected (done=True) - -0.5: Step count >= 15 (infinite loop shield, done=True) """ value: float