"""ClaimsEnvPro — typed payloads for the 12-verb adjudication gym.

The action surface is a single ``message`` string carrying up to three
semicolon-separated commands (one of the 12 verbs, optional argument).
The observation surface is 18 typed fields plus a ``revealed_info`` dict
that grows as the agent invokes information-gathering verbs.

Reserved OpenEnv names (``reset``, ``step``, ``state``, ``close``) are
deliberately *not* used as verbs — only the 11 insurance verbs from the
upstream source plus the new ``VERIFY_PURCHASE`` (Plaid bank audit).
"""

from __future__ import annotations

from typing import Any, Dict, List

from openenv.core.env_server.types import Action, Observation, State
from pydantic import Field


# ---------------------------------------------------------------------------
# Verb vocabulary (12)
# ---------------------------------------------------------------------------

INFORMATION_VERBS: tuple[str, ...] = (
    "VIEW_QUEUE",
    "OPEN_CLAIM",
    "REVIEW_DOCUMENTS",
    "CHECK_POLICY",
    "INVESTIGATE_FRAUD",
    "REQUEST_INFO",
    "VERIFY_PURCHASE",       # Plaid bank audit (new in claims-env-pro)
    "HANDLE_APPEAL",
)

TERMINAL_VERBS: tuple[str, ...] = (
    "APPROVE",
    "DENY",
    "ESCALATE",
    "END_SHIFT",
)

ALL_VERBS: tuple[str, ...] = INFORMATION_VERBS + TERMINAL_VERBS
assert len(ALL_VERBS) == 12, "claims-env-pro defines exactly 12 verbs"


# ---------------------------------------------------------------------------
# Action
# ---------------------------------------------------------------------------


class ClaimsAction(Action):
    """Single turn from the adjudicator agent.

    ``message`` may carry up to three semicolon-separated commands. Each
    command is one of the 12 verbs, optionally followed by an argument:

        OPEN_CLAIM 3
        REVIEW_DOCUMENTS; CHECK_POLICY
        VERIFY_PURCHASE
        APPROVE 8500
        DENY fraud_detected
        END_SHIFT
    """

    message: str = Field(
        ...,
        description=(
            "Adjuster command(s), semicolon-separated (max 3 per step). "
            "Verbs: " + ", ".join(ALL_VERBS) + ". "
            "Examples: 'OPEN_CLAIM 3; REVIEW_DOCUMENTS; CHECK_POLICY', "
            "'VERIFY_PURCHASE', 'APPROVE 8500', 'DENY fraud_detected', "
            "'HANDLE_APPEAL 4', 'END_SHIFT'."
        ),
    )


# ---------------------------------------------------------------------------
# Observation (18 fields + revealed_info)
# ---------------------------------------------------------------------------


class ClaimsObservation(Observation):
    """18-field typed observation returned after each step.

    The ``dashboard`` field carries the human-readable view including the
    ``>>> NEXT STEP:`` hint. ``revealed_info`` carries the partial-
    observability bundle (policy lookup, fraud signals, plaid hit, etc.).
    ``dense_step_reward`` is the signed cost/bonus signal that GRPO trains
    on; ``reward`` is the clamped headline number.
    """

    # 1. Dashboard (rendered text + NEXT STEP hint)
    dashboard: str = Field(default="", description="Rendered dashboard")
    # 2-3. Step meters
    step_number: int = Field(default=0)
    total_steps: int = Field(default=50)
    # 4-7. Queue meters
    claims_in_queue: int = Field(default=0)
    claims_processed: int = Field(default=0)
    claims_appealed: int = Field(default=0)
    active_claim_id: int = Field(default=-1)
    # 8-11. Decision meters
    correct_decisions: int = Field(default=0)
    wrong_decisions: int = Field(default=0)
    fraud_caught: int = Field(default=0)
    fraud_missed: int = Field(default=0)
    # 12-14. Live scores
    accuracy_score: float = Field(default=0.0)
    fraud_score: float = Field(default=0.0)
    payout_accuracy: float = Field(default=0.0)
    # 15. Scenario name
    task_name: str = Field(default="")
    # 16. Pressure label (normal / busy / critical)
    shift_pressure: str = Field(default="normal")
    # 17. Dense per-step reward (signed; what GRPO env_reward_fn sums)
    dense_step_reward: float = Field(default=0.0)
    # 18. Final score (set on terminal step; 0 otherwise)
    final_score: float = Field(default=0.0)

    # Partial-observability bundle (not counted in 18 typed meters).
    revealed_info: Dict[str, Any] = Field(default_factory=dict)


# ---------------------------------------------------------------------------
# Compatibility aliases — keep older import paths happy
# ---------------------------------------------------------------------------

ClaimsEnvProAction = ClaimsAction
ClaimsEnvProObservation = ClaimsObservation
ClaimsEnvProState = State


__all__ = [
    "ClaimsAction",
    "ClaimsObservation",
    "ClaimsEnvProAction",
    "ClaimsEnvProObservation",
    "ClaimsEnvProState",
    "INFORMATION_VERBS",
    "TERMINAL_VERBS",
    "ALL_VERBS",
]