| """Typed models for the Supermail environment.""" |
|
|
| from __future__ import annotations |
|
|
| from typing import Any, Dict, List, Literal |
|
|
| from pydantic import BaseModel, Field |
|
|
| try: |
| from openenv.core.env_server.types import Action, Observation, State |
| except ImportError: |
| class Action(BaseModel): |
| """Fallback OpenEnv Action model.""" |
|
|
| class Observation(BaseModel): |
| """Fallback OpenEnv Observation model.""" |
|
|
| done: bool = False |
| reward: float | None = None |
| metadata: Dict[str, Any] = Field(default_factory=dict) |
|
|
| class State(BaseModel): |
| """Fallback OpenEnv State model.""" |
|
|
| episode_id: str |
| step_count: int = 0 |
|
|
|
|
| PriorityLabel = Literal["urgent", "normal", "spam"] |
| CategoryLabel = Literal["billing", "delivery", "technical", "general"] |
| ResolutionLabel = Literal["respond_immediately", "assign_to_team", "ignore"] |
|
|
|
|
| class SupportAction(Action): |
| """Action submitted by the agent on each step.""" |
|
|
| priority: PriorityLabel | None = Field( |
| default=None, |
| description="Priority decision for the email.", |
| ) |
| category: CategoryLabel | None = Field( |
| default=None, |
| description="Category decision for the email when required.", |
| ) |
| action: ResolutionLabel | None = Field( |
| default=None, |
| description="Recommended operational action when required.", |
| ) |
| notes: str = Field( |
| default="", |
| description="Optional short explanation for audit logging.", |
| ) |
|
|
|
|
| class SupportObservation(Observation): |
| """Observation returned by the environment.""" |
|
|
| task_id: str = Field(default="", description="Stable task identifier.") |
| task_type: str = Field(default="", description="Difficulty level.") |
| benchmark: str = Field(default="supermail", description="Benchmark name.") |
| objective: str = Field(default="", description="What the agent must decide.") |
| email: str = Field(default="", description="Incoming support email body.") |
| context: Dict[str, str] = Field( |
| default_factory=dict, |
| description="Structured metadata about the customer or ticket.", |
| ) |
| required_fields: List[str] = Field( |
| default_factory=list, |
| description="Decision fields required to finish the task.", |
| ) |
| allowed_values: Dict[str, List[str]] = Field( |
| default_factory=dict, |
| description="Allowed label values for each decision field.", |
| ) |
| history: List[str] = Field( |
| default_factory=list, |
| description="Compact summaries of prior attempts in the episode.", |
| ) |
| feedback: str = Field(default="", description="Step-level grader feedback.") |
| score: float = Field(default=0.01, description="Current cumulative score.") |
| attempts_remaining: int = Field( |
| default=0, |
| description="How many attempts remain before the episode ends.", |
| ) |
|
|
|
|
| class SupportState(State): |
| """Server-side state exposed by the environment.""" |
|
|
| task_id: str | None = None |
| difficulty: str | None = None |
| score: float = 0.01 |
| matched_fields: List[str] = Field(default_factory=list) |
| attempts_remaining: int = 0 |
|
|