Parlay / parlay_env /models.py
sh4shv4t's picture
feat: streamline parlay for demo mode and add spectator negotiation mechanics
2568517
from enum import Enum
from typing import Optional
from pydantic import BaseModel, ConfigDict, Field
class PersonaType(str, Enum):
SHARK = "shark"
DIPLOMAT = "diplomat"
VETERAN = "veteran"
class TacticalMove(str, Enum):
ANCHOR_HIGH = "anchor_high"
BATNA_REVEAL = "batna_reveal"
SILENCE = "silence"
class HiddenState(BaseModel):
model_config = ConfigDict(validate_assignment=True)
budget_ceiling: float = Field(description="True max the opponent will pay")
walk_away_price: float = Field(description="Opponent's true BATNA")
urgency_score: float = Field(ge=0.0, le=1.0, description="How badly they need to close")
has_alternative: bool = Field(description="Whether opponent has a competing offer")
persona_drifted: bool = Field(default=False)
last_stated_batna: Optional[float] = None
event_impacts: Optional[dict[str, float]] = None
class BeliefState(BaseModel):
"""Agent's current belief about opponent hidden state (Theory of Mind)."""
est_budget: float
est_walk_away: float
est_urgency: float = Field(ge=0.0, le=1.0)
est_has_alternative: bool
confidence: float = Field(ge=0.0, le=1.0, description="Overall belief confidence")
class ParlayObservation(BaseModel):
step_count: int
episode_done: bool
current_offer: float
opponent_offer: float
zopa_lower: float
zopa_upper: float
nash_point: float = Field(description="Nash Bargaining Solution price")
tension_score: float = Field(ge=0.0, le=100.0)
belief_state: BeliefState
last_utterance: str
available_moves: list[TacticalMove]
credibility_points: int = Field(ge=0, le=100)
reward: float
cumulative_reward: float
drift_event: Optional[str] = None
zopa_erosion_ticks: int = 0
zopa_width_pct_remaining: float = 1.0
class ParlayAction(BaseModel):
utterance: str
offer_amount: Optional[float] = None
tactical_move: Optional[TacticalMove] = None
class ParlayState(BaseModel):
session_id: str
scenario_id: str
persona: PersonaType
step_count: int
cumulative_reward: float
hidden_state: HiddenState
belief_history: list[BeliefState]
offer_history: list[float]
drift_events_fired: int
episode_done: bool
termination_reason: Optional[str] = None
credibility_points: int = 100
tension_score: float = 0.0
deal_reached: bool = False
walk_away: bool = False
high_tension_streak: int = 0
zopa_erosion_ticks: int = 0
zopa_width_pct_remaining: float = 1.0
original_zopa_width: float = 0.0
bluffs_caught: int = 0