Spaces:
Sleeping
Sleeping
| """ClaimsEnvPro — typed payloads for the 12-verb adjudication gym. | |
| The action surface is a single ``message`` string carrying up to three | |
| semicolon-separated commands (one of the 12 verbs, optional argument). | |
| The observation surface is 18 typed fields plus a ``revealed_info`` dict | |
| that grows as the agent invokes information-gathering verbs. | |
| Reserved OpenEnv names (``reset``, ``step``, ``state``, ``close``) are | |
| deliberately *not* used as verbs — only the 11 insurance verbs from the | |
| upstream source plus the new ``VERIFY_PURCHASE`` (Plaid bank audit). | |
| """ | |
| from __future__ import annotations | |
| from typing import Any, Dict, List | |
| from openenv.core.env_server.types import Action, Observation, State | |
| from pydantic import Field | |
| # --------------------------------------------------------------------------- | |
| # Verb vocabulary (12) | |
| # --------------------------------------------------------------------------- | |
| INFORMATION_VERBS: tuple[str, ...] = ( | |
| "VIEW_QUEUE", | |
| "OPEN_CLAIM", | |
| "REVIEW_DOCUMENTS", | |
| "CHECK_POLICY", | |
| "INVESTIGATE_FRAUD", | |
| "REQUEST_INFO", | |
| "VERIFY_PURCHASE", # Plaid bank audit (new in claims-env-pro) | |
| "HANDLE_APPEAL", | |
| ) | |
| TERMINAL_VERBS: tuple[str, ...] = ( | |
| "APPROVE", | |
| "DENY", | |
| "ESCALATE", | |
| "END_SHIFT", | |
| ) | |
| ALL_VERBS: tuple[str, ...] = INFORMATION_VERBS + TERMINAL_VERBS | |
| assert len(ALL_VERBS) == 12, "claims-env-pro defines exactly 12 verbs" | |
| # --------------------------------------------------------------------------- | |
| # Action | |
| # --------------------------------------------------------------------------- | |
| class ClaimsAction(Action): | |
| """Single turn from the adjudicator agent. | |
| ``message`` may carry up to three semicolon-separated commands. Each | |
| command is one of the 12 verbs, optionally followed by an argument: | |
| OPEN_CLAIM 3 | |
| REVIEW_DOCUMENTS; CHECK_POLICY | |
| VERIFY_PURCHASE | |
| APPROVE 8500 | |
| DENY fraud_detected | |
| END_SHIFT | |
| """ | |
| message: str = Field( | |
| ..., | |
| description=( | |
| "Adjuster command(s), semicolon-separated (max 3 per step). " | |
| "Verbs: " + ", ".join(ALL_VERBS) + ". " | |
| "Examples: 'OPEN_CLAIM 3; REVIEW_DOCUMENTS; CHECK_POLICY', " | |
| "'VERIFY_PURCHASE', 'APPROVE 8500', 'DENY fraud_detected', " | |
| "'HANDLE_APPEAL 4', 'END_SHIFT'." | |
| ), | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Observation (18 fields + revealed_info) | |
| # --------------------------------------------------------------------------- | |
| class ClaimsObservation(Observation): | |
| """18-field typed observation returned after each step. | |
| The ``dashboard`` field carries the human-readable view including the | |
| ``>>> NEXT STEP:`` hint. ``revealed_info`` carries the partial- | |
| observability bundle (policy lookup, fraud signals, plaid hit, etc.). | |
| ``dense_step_reward`` is the signed cost/bonus signal that GRPO trains | |
| on; ``reward`` is the clamped headline number. | |
| """ | |
| # 1. Dashboard (rendered text + NEXT STEP hint) | |
| dashboard: str = Field(default="", description="Rendered dashboard") | |
| # 2-3. Step meters | |
| step_number: int = Field(default=0) | |
| total_steps: int = Field(default=50) | |
| # 4-7. Queue meters | |
| claims_in_queue: int = Field(default=0) | |
| claims_processed: int = Field(default=0) | |
| claims_appealed: int = Field(default=0) | |
| active_claim_id: int = Field(default=-1) | |
| # 8-11. Decision meters | |
| correct_decisions: int = Field(default=0) | |
| wrong_decisions: int = Field(default=0) | |
| fraud_caught: int = Field(default=0) | |
| fraud_missed: int = Field(default=0) | |
| # 12-14. Live scores | |
| accuracy_score: float = Field(default=0.0) | |
| fraud_score: float = Field(default=0.0) | |
| payout_accuracy: float = Field(default=0.0) | |
| # 15. Scenario name | |
| task_name: str = Field(default="") | |
| # 16. Pressure label (normal / busy / critical) | |
| shift_pressure: str = Field(default="normal") | |
| # 17. Dense per-step reward (signed; what GRPO env_reward_fn sums) | |
| dense_step_reward: float = Field(default=0.0) | |
| # 18. Final score (set on terminal step; 0 otherwise) | |
| final_score: float = Field(default=0.0) | |
| # Partial-observability bundle (not counted in 18 typed meters). | |
| revealed_info: Dict[str, Any] = Field(default_factory=dict) | |
| # --------------------------------------------------------------------------- | |
| # Compatibility aliases — keep older import paths happy | |
| # --------------------------------------------------------------------------- | |
| ClaimsEnvProAction = ClaimsAction | |
| ClaimsEnvProObservation = ClaimsObservation | |
| ClaimsEnvProState = State | |
| __all__ = [ | |
| "ClaimsAction", | |
| "ClaimsObservation", | |
| "ClaimsEnvProAction", | |
| "ClaimsEnvProObservation", | |
| "ClaimsEnvProState", | |
| "INFORMATION_VERBS", | |
| "TERMINAL_VERBS", | |
| "ALL_VERBS", | |
| ] | |