from enum import Enum from typing import Optional from pydantic import BaseModel, ConfigDict, Field class PersonaType(str, Enum): SHARK = "shark" DIPLOMAT = "diplomat" VETERAN = "veteran" class TacticalMove(str, Enum): ANCHOR_HIGH = "anchor_high" BATNA_REVEAL = "batna_reveal" SILENCE = "silence" class HiddenState(BaseModel): model_config = ConfigDict(validate_assignment=True) budget_ceiling: float = Field(description="True max the opponent will pay") walk_away_price: float = Field(description="Opponent's true BATNA") urgency_score: float = Field(ge=0.0, le=1.0, description="How badly they need to close") has_alternative: bool = Field(description="Whether opponent has a competing offer") persona_drifted: bool = Field(default=False) last_stated_batna: Optional[float] = None event_impacts: Optional[dict[str, float]] = None class BeliefState(BaseModel): """Agent's current belief about opponent hidden state (Theory of Mind).""" est_budget: float est_walk_away: float est_urgency: float = Field(ge=0.0, le=1.0) est_has_alternative: bool confidence: float = Field(ge=0.0, le=1.0, description="Overall belief confidence") class ParlayObservation(BaseModel): step_count: int episode_done: bool current_offer: float opponent_offer: float zopa_lower: float zopa_upper: float nash_point: float = Field(description="Nash Bargaining Solution price") tension_score: float = Field(ge=0.0, le=100.0) belief_state: BeliefState last_utterance: str available_moves: list[TacticalMove] credibility_points: int = Field(ge=0, le=100) reward: float cumulative_reward: float drift_event: Optional[str] = None zopa_erosion_ticks: int = 0 zopa_width_pct_remaining: float = 1.0 class ParlayAction(BaseModel): utterance: str offer_amount: Optional[float] = None tactical_move: Optional[TacticalMove] = None class ParlayState(BaseModel): session_id: str scenario_id: str persona: PersonaType step_count: int cumulative_reward: float hidden_state: HiddenState belief_history: list[BeliefState] offer_history: list[float] drift_events_fired: int episode_done: bool termination_reason: Optional[str] = None credibility_points: int = 100 tension_score: float = 0.0 deal_reached: bool = False walk_away: bool = False high_tension_streak: int = 0 zopa_erosion_ticks: int = 0 zopa_width_pct_remaining: float = 1.0 original_zopa_width: float = 0.0 bluffs_caught: int = 0