Spaces:
Sleeping
Sleeping
| # environment/models.py | |
| # Typed Pydantic models for OpenEnv interface | |
| # Implements Action, Observation, and Reward schemas | |
| from typing import Optional | |
| from pydantic import BaseModel, model_validator | |
| class Action(BaseModel): | |
| """ | |
| Action model for the SQL Analyst environment. | |
| The agent must provide EXACTLY ONE of: | |
| - sql_query: Execute a SQL query against the database | |
| - submit_answer: Submit a final answer for grading | |
| Edge Case Shield: Pydantic model_validator enforces mutual exclusivity. | |
| """ | |
| sql_query: Optional[str] = None | |
| submit_answer: Optional[str] = None | |
| def validate_exactly_one_action(self) -> 'Action': | |
| """ | |
| Enforce that the agent provides exactly one of sql_query or submit_answer. | |
| This prevents ambiguous actions and ensures clean state transitions. | |
| """ | |
| has_sql = self.sql_query is not None and self.sql_query.strip() != "" | |
| has_answer = self.submit_answer is not None and self.submit_answer.strip() != "" | |
| if has_sql and has_answer: | |
| raise ValueError( | |
| "Invalid action: Provide exactly ONE of 'sql_query' or 'submit_answer', not both." | |
| ) | |
| if not has_sql and not has_answer: | |
| raise ValueError( | |
| "Invalid action: Must provide exactly ONE of 'sql_query' or 'submit_answer'." | |
| ) | |
| return self | |
| class Observation(BaseModel): | |
| """ | |
| Observation model representing the current state visible to the agent. | |
| Fields: | |
| - schema_info: Database schema information (tables, columns, types) | |
| - current_question: The task question the agent must answer | |
| - last_query_result: Result from the most recent SQL query execution | |
| - error_message: Any error from the last action (empty string if none) | |
| """ | |
| schema_info: str | |
| current_question: str | |
| last_query_result: str | |
| error_message: str | |
| class Reward(BaseModel): | |
| """ | |
| Reward model containing a single float value. | |
| Reward shaping follows the PRD specification: | |
| - +0.1: Successful, error-free SQL query | |
| - -0.1: SQLite syntax error | |
| - -1.0: Destructive action detected (done=True) | |
| - -0.5: Step count >= 15 (infinite loop shield, done=True) | |
| """ | |
| value: float | |