from pydantic import BaseModel, Field from typing import Dict, Any, Optional class Email(BaseModel): id: str sender: str subject: str body: str is_urgent: bool = False is_spam: bool = False is_internal: bool = False class Observation(BaseModel): emails_remaining: int current_email: Optional[Email] = Field(None, description="The current email to triage, or None if inbox is empty") time_elapsed: float = Field(0.0, description="Time elapsed in the episode") class Action(BaseModel): action_type: int = Field(..., description="0: Ignore, 1: Reply, 2: Forward, 3: Archive, 4: Delete") class Reward(BaseModel): step_reward: float = Field(..., description="Reward for the current step") total_reward: float = Field(..., description="Total accumulated reward") message: str = Field("", description="Reasoning or description for the reward given") class EnvState(BaseModel): observation: Observation reward: Reward terminated: bool truncated: bool info: Dict[str, Any]