| """Tests for parlay_env/grader.py.""" |
| from dashboard.api import _apply_zopa_erosion |
| from parlay_env.grader import ( |
| EpisodeGrade, |
| compute_step_reward, |
| compute_terminal_reward, |
| detect_bluff_challenge, |
| grade_episode, |
| ) |
| from parlay_env.models import BeliefState, HiddenState, ParlayAction, ParlayState, PersonaType |
| from parlay_env.reward import OMEGA, PSI |
|
|
|
|
| def _make_hidden( |
| budget: float = 165_000, |
| walk: float = 125_000, |
| last_stated_batna: float | None = None, |
| ) -> HiddenState: |
| return HiddenState( |
| budget_ceiling=budget, |
| walk_away_price=walk, |
| urgency_score=0.5, |
| has_alternative=False, |
| persona_drifted=False, |
| last_stated_batna=last_stated_batna, |
| ) |
|
|
|
|
| def _make_belief(budget: float = 140_000, walk: float = 130_000) -> BeliefState: |
| return BeliefState( |
| est_budget=budget, |
| est_walk_away=walk, |
| est_urgency=0.5, |
| est_has_alternative=False, |
| confidence=0.5, |
| ) |
|
|
|
|
| def _make_state( |
| step: int = 0, |
| cumulative: float = 0.0, |
| offers: list[float] | None = None, |
| beliefs: list[BeliefState] | None = None, |
| hidden: HiddenState | None = None, |
| ) -> ParlayState: |
| actual_hidden = hidden or _make_hidden() |
| return ParlayState( |
| session_id="test-session", |
| scenario_id="saas_enterprise", |
| persona=PersonaType.SHARK, |
| step_count=step, |
| cumulative_reward=cumulative, |
| hidden_state=actual_hidden, |
| belief_history=beliefs or [_make_belief()], |
| offer_history=offers or [], |
| drift_events_fired=0, |
| episode_done=False, |
| credibility_points=100, |
| original_zopa_width=actual_hidden.budget_ceiling - actual_hidden.walk_away_price, |
| ) |
|
|
|
|
| class TestComputeStepReward: |
| def test_happy_path_returns_float(self): |
| state = _make_state() |
| action = ParlayAction(utterance="I propose 145000.", offer_amount=145_000.0) |
| next_state = _make_state(step=1, offers=[145_000.0]) |
| result = compute_step_reward(state, action, next_state) |
| assert isinstance(result, float), f"Expected float, got {type(result)}" |
|
|
| def test_noise_penalty_applied(self): |
| state = _make_state(offers=[140_000.0]) |
| action = ParlayAction(utterance="xyz", offer_amount=140_000.0) |
| next_state = _make_state(step=1, offers=[140_000.0, 140_000.0]) |
| result = compute_step_reward(state, action, next_state) |
| assert isinstance(result, float), f"Expected float, got {type(result)}" |
|
|
| def test_bluff_detection_awards_psi(self): |
| hidden = _make_hidden(last_stated_batna=198_000.0) |
| state = _make_state(hidden=hidden) |
| next_state = _make_state(step=1, hidden=hidden) |
| action = ParlayAction( |
| utterance="I don't believe that's your walk-away.", |
| offer_amount=None, |
| tactical_move=None, |
| ) |
|
|
| caught = detect_bluff_challenge( |
| utterance=action.utterance, |
| opponent_stated_batna=198_000.0, |
| opponent_true_batna=165_000.0, |
| ) |
| reward = compute_step_reward(state, action, next_state) |
|
|
| assert caught is True, f"Expected True, got {caught}" |
| assert reward >= PSI, f"Expected at least PSI={PSI}, got {reward}" |
|
|
|
|
| class TestComputeTerminalReward: |
| def test_good_deal_positive_reward(self): |
| state = _make_state() |
| result = compute_terminal_reward(state, final_price=145_000.0, t_close=10, t_max=20) |
| assert result > 0, f"Expected positive reward, got {result}" |
|
|
| def test_capitulation_returns_negative_omega(self): |
| state = _make_state() |
| result = compute_terminal_reward(state, final_price=120_000.0, t_close=10) |
| assert result == -OMEGA, f"Expected -{OMEGA}, got {result}" |
|
|
| def test_speed_bonus_for_early_close(self): |
| state = _make_state() |
| fast = compute_terminal_reward(state, final_price=145_000.0, t_close=5, t_max=20) |
| slow = compute_terminal_reward(state, final_price=145_000.0, t_close=18, t_max=20) |
| assert fast > slow, f"Expected fast close > slow close: {fast} vs {slow}" |
|
|
|
|
| class TestGradeEpisode: |
| def test_grade_episode_returns_episodegrade(self): |
| state = _make_state(step=10, offers=[145_000.0]) |
| grade = grade_episode(state, final_price=145_000.0, t_close=10) |
| assert isinstance(grade, EpisodeGrade), f"Expected EpisodeGrade, got {type(grade)}" |
|
|
| def test_deal_efficiency_in_range(self): |
| state = _make_state(step=10, offers=[145_000.0]) |
| grade = grade_episode(state, final_price=145_000.0, t_close=10) |
| assert 0.0 <= grade.deal_efficiency <= 1.0, f"Expected [0,1], got {grade.deal_efficiency}" |
|
|
| def test_no_deal_zero_efficiency(self): |
| state = _make_state(step=20) |
| grade = grade_episode(state, final_price=None) |
| assert grade.deal_efficiency == 0.0, f"Expected 0.0, got {grade.deal_efficiency}" |
|
|
| def test_bluffs_caught_passed_through(self): |
| state = _make_state(step=10, offers=[145_000.0]) |
| grade = grade_episode(state, final_price=145_000.0, bluffs_caught=3) |
| assert grade.bluffs_caught == 3, f"Expected 3, got {grade.bluffs_caught}" |
|
|
| def test_zopa_collapse_walk_away(self): |
| hidden = _make_hidden(budget=103.0, walk=100.0) |
| state = _make_state(hidden=hidden) |
|
|
| for _ in range(3): |
| state.tension_score = 80.0 |
| state.high_tension_streak = 2 |
| _apply_zopa_erosion(state) |
|
|
| assert state.zopa_erosion_ticks >= 1, f"Expected >=1, got {state.zopa_erosion_ticks}" |
|
|
| while not state.walk_away and state.zopa_erosion_ticks < 100: |
| state.tension_score = 80.0 |
| state.high_tension_streak = 2 |
| _apply_zopa_erosion(state) |
|
|
| assert state.termination_reason == "zopa_collapsed", f"Expected zopa_collapsed, got {state.termination_reason}" |
|
|