"""ClaimsEnvPro — typed payloads for the 12-verb adjudication gym. The action surface is a single ``message`` string carrying up to three semicolon-separated commands (one of the 12 verbs, optional argument). The observation surface is 18 typed fields plus a ``revealed_info`` dict that grows as the agent invokes information-gathering verbs. Reserved OpenEnv names (``reset``, ``step``, ``state``, ``close``) are deliberately *not* used as verbs — only the 11 insurance verbs from the upstream source plus the new ``VERIFY_PURCHASE`` (Plaid bank audit). """ from __future__ import annotations from typing import Any, Dict, List from openenv.core.env_server.types import Action, Observation, State from pydantic import Field # --------------------------------------------------------------------------- # Verb vocabulary (12) # --------------------------------------------------------------------------- INFORMATION_VERBS: tuple[str, ...] = ( "VIEW_QUEUE", "OPEN_CLAIM", "REVIEW_DOCUMENTS", "CHECK_POLICY", "INVESTIGATE_FRAUD", "REQUEST_INFO", "VERIFY_PURCHASE", # Plaid bank audit (new in claims-env-pro) "HANDLE_APPEAL", ) TERMINAL_VERBS: tuple[str, ...] = ( "APPROVE", "DENY", "ESCALATE", "END_SHIFT", ) ALL_VERBS: tuple[str, ...] = INFORMATION_VERBS + TERMINAL_VERBS assert len(ALL_VERBS) == 12, "claims-env-pro defines exactly 12 verbs" # --------------------------------------------------------------------------- # Action # --------------------------------------------------------------------------- class ClaimsAction(Action): """Single turn from the adjudicator agent. ``message`` may carry up to three semicolon-separated commands. Each command is one of the 12 verbs, optionally followed by an argument: OPEN_CLAIM 3 REVIEW_DOCUMENTS; CHECK_POLICY VERIFY_PURCHASE APPROVE 8500 DENY fraud_detected END_SHIFT """ message: str = Field( ..., description=( "Adjuster command(s), semicolon-separated (max 3 per step). " "Verbs: " + ", ".join(ALL_VERBS) + ". " "Examples: 'OPEN_CLAIM 3; REVIEW_DOCUMENTS; CHECK_POLICY', " "'VERIFY_PURCHASE', 'APPROVE 8500', 'DENY fraud_detected', " "'HANDLE_APPEAL 4', 'END_SHIFT'." ), ) # --------------------------------------------------------------------------- # Observation (18 fields + revealed_info) # --------------------------------------------------------------------------- class ClaimsObservation(Observation): """18-field typed observation returned after each step. The ``dashboard`` field carries the human-readable view including the ``>>> NEXT STEP:`` hint. ``revealed_info`` carries the partial- observability bundle (policy lookup, fraud signals, plaid hit, etc.). ``dense_step_reward`` is the signed cost/bonus signal that GRPO trains on; ``reward`` is the clamped headline number. """ # 1. Dashboard (rendered text + NEXT STEP hint) dashboard: str = Field(default="", description="Rendered dashboard") # 2-3. Step meters step_number: int = Field(default=0) total_steps: int = Field(default=50) # 4-7. Queue meters claims_in_queue: int = Field(default=0) claims_processed: int = Field(default=0) claims_appealed: int = Field(default=0) active_claim_id: int = Field(default=-1) # 8-11. Decision meters correct_decisions: int = Field(default=0) wrong_decisions: int = Field(default=0) fraud_caught: int = Field(default=0) fraud_missed: int = Field(default=0) # 12-14. Live scores accuracy_score: float = Field(default=0.0) fraud_score: float = Field(default=0.0) payout_accuracy: float = Field(default=0.0) # 15. Scenario name task_name: str = Field(default="") # 16. Pressure label (normal / busy / critical) shift_pressure: str = Field(default="normal") # 17. Dense per-step reward (signed; what GRPO env_reward_fn sums) dense_step_reward: float = Field(default=0.0) # 18. Final score (set on terminal step; 0 otherwise) final_score: float = Field(default=0.0) # Partial-observability bundle (not counted in 18 typed meters). revealed_info: Dict[str, Any] = Field(default_factory=dict) # --------------------------------------------------------------------------- # Compatibility aliases — keep older import paths happy # --------------------------------------------------------------------------- ClaimsEnvProAction = ClaimsAction ClaimsEnvProObservation = ClaimsObservation ClaimsEnvProState = State __all__ = [ "ClaimsAction", "ClaimsObservation", "ClaimsEnvProAction", "ClaimsEnvProObservation", "ClaimsEnvProState", "INFORMATION_VERBS", "TERMINAL_VERBS", "ALL_VERBS", ]