from enum import Enum
from typing import Optional

from pydantic import BaseModel, ConfigDict, Field


class PersonaType(str, Enum):
    SHARK = "shark"
    DIPLOMAT = "diplomat"
    VETERAN = "veteran"


class TacticalMove(str, Enum):
    ANCHOR_HIGH = "anchor_high"
    BATNA_REVEAL = "batna_reveal"
    SILENCE = "silence"


class HiddenState(BaseModel):
    model_config = ConfigDict(validate_assignment=True)

    budget_ceiling: float = Field(description="True max the opponent will pay")
    walk_away_price: float = Field(description="Opponent's true BATNA")
    urgency_score: float = Field(ge=0.0, le=1.0, description="How badly they need to close")
    has_alternative: bool = Field(description="Whether opponent has a competing offer")
    persona_drifted: bool = Field(default=False)
    last_stated_batna: Optional[float] = None
    event_impacts: Optional[dict[str, float]] = None


class BeliefState(BaseModel):
    """Agent's current belief about opponent hidden state (Theory of Mind)."""

    est_budget: float
    est_walk_away: float
    est_urgency: float = Field(ge=0.0, le=1.0)
    est_has_alternative: bool
    confidence: float = Field(ge=0.0, le=1.0, description="Overall belief confidence")


class ParlayObservation(BaseModel):
    step_count: int
    episode_done: bool
    current_offer: float
    opponent_offer: float
    zopa_lower: float
    zopa_upper: float
    nash_point: float = Field(description="Nash Bargaining Solution price")
    tension_score: float = Field(ge=0.0, le=100.0)
    belief_state: BeliefState
    last_utterance: str
    available_moves: list[TacticalMove]
    credibility_points: int = Field(ge=0, le=100)
    reward: float
    cumulative_reward: float
    drift_event: Optional[str] = None
    zopa_erosion_ticks: int = 0
    zopa_width_pct_remaining: float = 1.0


class ParlayAction(BaseModel):
    utterance: str
    offer_amount: Optional[float] = None
    tactical_move: Optional[TacticalMove] = None


class ParlayState(BaseModel):
    session_id: str
    scenario_id: str
    persona: PersonaType
    step_count: int
    cumulative_reward: float
    hidden_state: HiddenState
    belief_history: list[BeliefState]
    offer_history: list[float]
    drift_events_fired: int
    episode_done: bool
    termination_reason: Optional[str] = None
    credibility_points: int = 100
    tension_score: float = 0.0
    deal_reached: bool = False
    walk_away: bool = False
    high_tension_streak: int = 0
    zopa_erosion_ticks: int = 0
    zopa_width_pct_remaining: float = 1.0
    original_zopa_width: float = 0.0
    bluffs_caught: int = 0