from typing import Dict, List, Literal, Optional from openenv.core.env_server import Action, Observation, State from pydantic import Field class ToolUseAction(Action): action_type: Literal[ "review_ticket", "inspect_artifact", "search_policy", "draft_reply", "submit_resolution", ] = Field(..., description="The action the agent wants to execute.") artifact_id: Optional[str] = Field( default=None, description="Artifact identifier for inspect_artifact, such as order or risk_log.", ) query: Optional[str] = Field( default=None, description="Policy name or search query for search_policy.", ) message: Optional[str] = Field( default=None, description="Customer-facing reply draft used with draft_reply.", ) resolution_code: Optional[str] = Field( default=None, description="Final resolution code used with submit_resolution.", ) class ToolUseObservation(Observation): task_id: str = Field(..., description="Deterministic task identifier.") difficulty: Literal["easy", "medium", "hard"] = Field( ..., description="Difficulty tier for the active task." ) objective: str = Field(..., description="Concrete task objective for the agent.") customer_message: str = Field(..., description="The raw customer support ticket.") workspace_summary: str = Field( ..., description="Short summary of known evidence and remaining work." ) available_actions: List[str] = Field( default_factory=list, description="Available environment actions." ) available_resolution_codes: List[str] = Field( default_factory=list, description="Resolution codes accepted by submit_resolution.", ) collected_evidence: List[str] = Field( default_factory=list, description="Evidence keys collected so far, such as ticket or payment.", ) last_tool_result: Optional[str] = Field( default=None, description="Most recent tool or grader output shown to the agent.", ) last_action_error: Optional[str] = Field( default=None, description="Validation error for the last action, if any." ) remaining_steps: int = Field( ..., description="How many steps are left before the episode ends." ) current_score: float = Field( default=0.0, description="Current deterministic grader score in the [0, 1] range.", ) class ToolUseState(State): task_id: str = "" task_name: str = "" difficulty: str = "" objective: str = "" cumulative_reward: float = 0.0 final_score: float = 0.0 drafted_reply: Optional[str] = None resolution_code: Optional[str] = None expected_resolution_code: str = "" required_evidence: List[str] = Field(default_factory=list) collected_evidence: List[str] = Field(default_factory=list) action_history: List[str] = Field(default_factory=list) repeat_action_count: int = 0 last_action_error: Optional[str] = None known_artifacts: Dict[str, str] = Field(default_factory=dict) known_policies: Dict[str, str] = Field(default_factory=dict)