claims-env-pro / models.py
akhiilll's picture
claims-env-pro v2.0.0 — initial port (12 verbs / 5 tasks / 18 obs / 6-comp reward / Plaid)
027ea1a verified
"""ClaimsEnvPro — typed payloads for the 12-verb adjudication gym.
The action surface is a single ``message`` string carrying up to three
semicolon-separated commands (one of the 12 verbs, optional argument).
The observation surface is 18 typed fields plus a ``revealed_info`` dict
that grows as the agent invokes information-gathering verbs.
Reserved OpenEnv names (``reset``, ``step``, ``state``, ``close``) are
deliberately *not* used as verbs — only the 11 insurance verbs from the
upstream source plus the new ``VERIFY_PURCHASE`` (Plaid bank audit).
"""
from __future__ import annotations
from typing import Any, Dict, List
from openenv.core.env_server.types import Action, Observation, State
from pydantic import Field
# ---------------------------------------------------------------------------
# Verb vocabulary (12)
# ---------------------------------------------------------------------------
INFORMATION_VERBS: tuple[str, ...] = (
"VIEW_QUEUE",
"OPEN_CLAIM",
"REVIEW_DOCUMENTS",
"CHECK_POLICY",
"INVESTIGATE_FRAUD",
"REQUEST_INFO",
"VERIFY_PURCHASE", # Plaid bank audit (new in claims-env-pro)
"HANDLE_APPEAL",
)
TERMINAL_VERBS: tuple[str, ...] = (
"APPROVE",
"DENY",
"ESCALATE",
"END_SHIFT",
)
ALL_VERBS: tuple[str, ...] = INFORMATION_VERBS + TERMINAL_VERBS
assert len(ALL_VERBS) == 12, "claims-env-pro defines exactly 12 verbs"
# ---------------------------------------------------------------------------
# Action
# ---------------------------------------------------------------------------
class ClaimsAction(Action):
"""Single turn from the adjudicator agent.
``message`` may carry up to three semicolon-separated commands. Each
command is one of the 12 verbs, optionally followed by an argument:
OPEN_CLAIM 3
REVIEW_DOCUMENTS; CHECK_POLICY
VERIFY_PURCHASE
APPROVE 8500
DENY fraud_detected
END_SHIFT
"""
message: str = Field(
...,
description=(
"Adjuster command(s), semicolon-separated (max 3 per step). "
"Verbs: " + ", ".join(ALL_VERBS) + ". "
"Examples: 'OPEN_CLAIM 3; REVIEW_DOCUMENTS; CHECK_POLICY', "
"'VERIFY_PURCHASE', 'APPROVE 8500', 'DENY fraud_detected', "
"'HANDLE_APPEAL 4', 'END_SHIFT'."
),
)
# ---------------------------------------------------------------------------
# Observation (18 fields + revealed_info)
# ---------------------------------------------------------------------------
class ClaimsObservation(Observation):
"""18-field typed observation returned after each step.
The ``dashboard`` field carries the human-readable view including the
``>>> NEXT STEP:`` hint. ``revealed_info`` carries the partial-
observability bundle (policy lookup, fraud signals, plaid hit, etc.).
``dense_step_reward`` is the signed cost/bonus signal that GRPO trains
on; ``reward`` is the clamped headline number.
"""
# 1. Dashboard (rendered text + NEXT STEP hint)
dashboard: str = Field(default="", description="Rendered dashboard")
# 2-3. Step meters
step_number: int = Field(default=0)
total_steps: int = Field(default=50)
# 4-7. Queue meters
claims_in_queue: int = Field(default=0)
claims_processed: int = Field(default=0)
claims_appealed: int = Field(default=0)
active_claim_id: int = Field(default=-1)
# 8-11. Decision meters
correct_decisions: int = Field(default=0)
wrong_decisions: int = Field(default=0)
fraud_caught: int = Field(default=0)
fraud_missed: int = Field(default=0)
# 12-14. Live scores
accuracy_score: float = Field(default=0.0)
fraud_score: float = Field(default=0.0)
payout_accuracy: float = Field(default=0.0)
# 15. Scenario name
task_name: str = Field(default="")
# 16. Pressure label (normal / busy / critical)
shift_pressure: str = Field(default="normal")
# 17. Dense per-step reward (signed; what GRPO env_reward_fn sums)
dense_step_reward: float = Field(default=0.0)
# 18. Final score (set on terminal step; 0 otherwise)
final_score: float = Field(default=0.0)
# Partial-observability bundle (not counted in 18 typed meters).
revealed_info: Dict[str, Any] = Field(default_factory=dict)
# ---------------------------------------------------------------------------
# Compatibility aliases — keep older import paths happy
# ---------------------------------------------------------------------------
ClaimsEnvProAction = ClaimsAction
ClaimsEnvProObservation = ClaimsObservation
ClaimsEnvProState = State
__all__ = [
"ClaimsAction",
"ClaimsObservation",
"ClaimsEnvProAction",
"ClaimsEnvProObservation",
"ClaimsEnvProState",
"INFORMATION_VERBS",
"TERMINAL_VERBS",
"ALL_VERBS",
]