File size: 5,899 Bytes
698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 698f4d8 2568517 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | """Tests for parlay_env/grader.py."""
from dashboard.api import _apply_zopa_erosion
from parlay_env.grader import (
EpisodeGrade,
compute_step_reward,
compute_terminal_reward,
detect_bluff_challenge,
grade_episode,
)
from parlay_env.models import BeliefState, HiddenState, ParlayAction, ParlayState, PersonaType
from parlay_env.reward import OMEGA, PSI
def _make_hidden(
budget: float = 165_000,
walk: float = 125_000,
last_stated_batna: float | None = None,
) -> HiddenState:
return HiddenState(
budget_ceiling=budget,
walk_away_price=walk,
urgency_score=0.5,
has_alternative=False,
persona_drifted=False,
last_stated_batna=last_stated_batna,
)
def _make_belief(budget: float = 140_000, walk: float = 130_000) -> BeliefState:
return BeliefState(
est_budget=budget,
est_walk_away=walk,
est_urgency=0.5,
est_has_alternative=False,
confidence=0.5,
)
def _make_state(
step: int = 0,
cumulative: float = 0.0,
offers: list[float] | None = None,
beliefs: list[BeliefState] | None = None,
hidden: HiddenState | None = None,
) -> ParlayState:
actual_hidden = hidden or _make_hidden()
return ParlayState(
session_id="test-session",
scenario_id="saas_enterprise",
persona=PersonaType.SHARK,
step_count=step,
cumulative_reward=cumulative,
hidden_state=actual_hidden,
belief_history=beliefs or [_make_belief()],
offer_history=offers or [],
drift_events_fired=0,
episode_done=False,
credibility_points=100,
original_zopa_width=actual_hidden.budget_ceiling - actual_hidden.walk_away_price,
)
class TestComputeStepReward:
def test_happy_path_returns_float(self):
state = _make_state()
action = ParlayAction(utterance="I propose 145000.", offer_amount=145_000.0)
next_state = _make_state(step=1, offers=[145_000.0])
result = compute_step_reward(state, action, next_state)
assert isinstance(result, float), f"Expected float, got {type(result)}"
def test_noise_penalty_applied(self):
state = _make_state(offers=[140_000.0])
action = ParlayAction(utterance="xyz", offer_amount=140_000.0)
next_state = _make_state(step=1, offers=[140_000.0, 140_000.0])
result = compute_step_reward(state, action, next_state)
assert isinstance(result, float), f"Expected float, got {type(result)}"
def test_bluff_detection_awards_psi(self):
hidden = _make_hidden(last_stated_batna=198_000.0)
state = _make_state(hidden=hidden)
next_state = _make_state(step=1, hidden=hidden)
action = ParlayAction(
utterance="I don't believe that's your walk-away.",
offer_amount=None,
tactical_move=None,
)
caught = detect_bluff_challenge(
utterance=action.utterance,
opponent_stated_batna=198_000.0,
opponent_true_batna=165_000.0,
)
reward = compute_step_reward(state, action, next_state)
assert caught is True, f"Expected True, got {caught}"
assert reward >= PSI, f"Expected at least PSI={PSI}, got {reward}"
class TestComputeTerminalReward:
def test_good_deal_positive_reward(self):
state = _make_state()
result = compute_terminal_reward(state, final_price=145_000.0, t_close=10, t_max=20)
assert result > 0, f"Expected positive reward, got {result}"
def test_capitulation_returns_negative_omega(self):
state = _make_state()
result = compute_terminal_reward(state, final_price=120_000.0, t_close=10)
assert result == -OMEGA, f"Expected -{OMEGA}, got {result}"
def test_speed_bonus_for_early_close(self):
state = _make_state()
fast = compute_terminal_reward(state, final_price=145_000.0, t_close=5, t_max=20)
slow = compute_terminal_reward(state, final_price=145_000.0, t_close=18, t_max=20)
assert fast > slow, f"Expected fast close > slow close: {fast} vs {slow}"
class TestGradeEpisode:
def test_grade_episode_returns_episodegrade(self):
state = _make_state(step=10, offers=[145_000.0])
grade = grade_episode(state, final_price=145_000.0, t_close=10)
assert isinstance(grade, EpisodeGrade), f"Expected EpisodeGrade, got {type(grade)}"
def test_deal_efficiency_in_range(self):
state = _make_state(step=10, offers=[145_000.0])
grade = grade_episode(state, final_price=145_000.0, t_close=10)
assert 0.0 <= grade.deal_efficiency <= 1.0, f"Expected [0,1], got {grade.deal_efficiency}"
def test_no_deal_zero_efficiency(self):
state = _make_state(step=20)
grade = grade_episode(state, final_price=None)
assert grade.deal_efficiency == 0.0, f"Expected 0.0, got {grade.deal_efficiency}"
def test_bluffs_caught_passed_through(self):
state = _make_state(step=10, offers=[145_000.0])
grade = grade_episode(state, final_price=145_000.0, bluffs_caught=3)
assert grade.bluffs_caught == 3, f"Expected 3, got {grade.bluffs_caught}"
def test_zopa_collapse_walk_away(self):
hidden = _make_hidden(budget=103.0, walk=100.0)
state = _make_state(hidden=hidden)
for _ in range(3):
state.tension_score = 80.0
state.high_tension_streak = 2
_apply_zopa_erosion(state)
assert state.zopa_erosion_ticks >= 1, f"Expected >=1, got {state.zopa_erosion_ticks}"
while not state.walk_away and state.zopa_erosion_ticks < 100:
state.tension_score = 80.0
state.high_tension_streak = 2
_apply_zopa_erosion(state)
assert state.termination_reason == "zopa_collapsed", f"Expected zopa_collapsed, got {state.termination_reason}"
|