Spaces:

akhiilll
/

claims-env-pro

Sleeping

App Files Files Community

claims-env-pro / models.py

akhiilll

claims-env-pro v2.0.0 — initial port (12 verbs / 5 tasks / 18 obs / 6-comp reward / Plaid)

027ea1a verified 12 days ago

raw

history blame contribute delete

4.85 kB

	"""ClaimsEnvPro — typed payloads for the 12-verb adjudication gym.

	The action surface is a single ``message`` string carrying up to three
	semicolon-separated commands (one of the 12 verbs, optional argument).
	The observation surface is 18 typed fields plus a ``revealed_info`` dict
	that grows as the agent invokes information-gathering verbs.

	Reserved OpenEnv names (``reset``, ``step``, ``state``, ``close``) are
	deliberately not used as verbs — only the 11 insurance verbs from the
	upstream source plus the new ``VERIFY_PURCHASE`` (Plaid bank audit).
	"""

	from __future__ import annotations

	from typing import Any, Dict, List

	from openenv.core.env_server.types import Action, Observation, State
	from pydantic import Field


	# ---------------------------------------------------------------------------
	# Verb vocabulary (12)
	# ---------------------------------------------------------------------------

	INFORMATION_VERBS: tuple[str, ...] = (
	"VIEW_QUEUE",
	"OPEN_CLAIM",
	"REVIEW_DOCUMENTS",
	"CHECK_POLICY",
	"INVESTIGATE_FRAUD",
	"REQUEST_INFO",
	"VERIFY_PURCHASE", # Plaid bank audit (new in claims-env-pro)
	"HANDLE_APPEAL",
	)

	TERMINAL_VERBS: tuple[str, ...] = (
	"APPROVE",
	"DENY",
	"ESCALATE",
	"END_SHIFT",
	)

	ALL_VERBS: tuple[str, ...] = INFORMATION_VERBS + TERMINAL_VERBS
	assert len(ALL_VERBS) == 12, "claims-env-pro defines exactly 12 verbs"


	# ---------------------------------------------------------------------------
	# Action
	# ---------------------------------------------------------------------------


	class ClaimsAction(Action):
	"""Single turn from the adjudicator agent.

	``message`` may carry up to three semicolon-separated commands. Each
	command is one of the 12 verbs, optionally followed by an argument:

	OPEN_CLAIM 3
	REVIEW_DOCUMENTS; CHECK_POLICY
	VERIFY_PURCHASE
	APPROVE 8500
	DENY fraud_detected
	END_SHIFT
	"""

	message: str = Field(
	...,
	description=(
	"Adjuster command(s), semicolon-separated (max 3 per step). "
	"Verbs: " + ", ".join(ALL_VERBS) + ". "
	"Examples: 'OPEN_CLAIM 3; REVIEW_DOCUMENTS; CHECK_POLICY', "
	"'VERIFY_PURCHASE', 'APPROVE 8500', 'DENY fraud_detected', "
	"'HANDLE_APPEAL 4', 'END_SHIFT'."
	),
	)


	# ---------------------------------------------------------------------------
	# Observation (18 fields + revealed_info)
	# ---------------------------------------------------------------------------


	class ClaimsObservation(Observation):
	"""18-field typed observation returned after each step.

	The ``dashboard`` field carries the human-readable view including the
	``>>> NEXT STEP:`` hint. ``revealed_info`` carries the partial-
	observability bundle (policy lookup, fraud signals, plaid hit, etc.).
	``dense_step_reward`` is the signed cost/bonus signal that GRPO trains
	on; ``reward`` is the clamped headline number.
	"""

	# 1. Dashboard (rendered text + NEXT STEP hint)
	dashboard: str = Field(default="", description="Rendered dashboard")
	# 2-3. Step meters
	step_number: int = Field(default=0)
	total_steps: int = Field(default=50)
	# 4-7. Queue meters
	claims_in_queue: int = Field(default=0)
	claims_processed: int = Field(default=0)
	claims_appealed: int = Field(default=0)
	active_claim_id: int = Field(default=-1)
	# 8-11. Decision meters
	correct_decisions: int = Field(default=0)
	wrong_decisions: int = Field(default=0)
	fraud_caught: int = Field(default=0)
	fraud_missed: int = Field(default=0)
	# 12-14. Live scores
	accuracy_score: float = Field(default=0.0)
	fraud_score: float = Field(default=0.0)
	payout_accuracy: float = Field(default=0.0)
	# 15. Scenario name
	task_name: str = Field(default="")
	# 16. Pressure label (normal / busy / critical)
	shift_pressure: str = Field(default="normal")
	# 17. Dense per-step reward (signed; what GRPO env_reward_fn sums)
	dense_step_reward: float = Field(default=0.0)
	# 18. Final score (set on terminal step; 0 otherwise)
	final_score: float = Field(default=0.0)

	# Partial-observability bundle (not counted in 18 typed meters).
	revealed_info: Dict[str, Any] = Field(default_factory=dict)


	# ---------------------------------------------------------------------------
	# Compatibility aliases — keep older import paths happy
	# ---------------------------------------------------------------------------

	ClaimsEnvProAction = ClaimsAction
	ClaimsEnvProObservation = ClaimsObservation
	ClaimsEnvProState = State


	__all__ = [
	"ClaimsAction",
	"ClaimsObservation",
	"ClaimsEnvProAction",
	"ClaimsEnvProObservation",
	"ClaimsEnvProState",
	"INFORMATION_VERBS",
	"TERMINAL_VERBS",
	"ALL_VERBS",
	]