File size: 5,899 Bytes
698f4d8
2568517
698f4d8
2568517
 
 
 
 
698f4d8
2568517
 
698f4d8
 
2568517
 
 
 
 
698f4d8
 
 
 
 
 
2568517
698f4d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2568517
698f4d8
2568517
698f4d8
 
 
 
 
 
2568517
698f4d8
 
 
 
 
2568517
698f4d8
 
 
 
 
 
 
 
 
 
 
 
 
 
2568517
698f4d8
2568517
698f4d8
2568517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698f4d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2568517
698f4d8
 
 
 
 
 
 
 
 
 
2568517
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""Tests for parlay_env/grader.py."""
from dashboard.api import _apply_zopa_erosion
from parlay_env.grader import (
    EpisodeGrade,
    compute_step_reward,
    compute_terminal_reward,
    detect_bluff_challenge,
    grade_episode,
)
from parlay_env.models import BeliefState, HiddenState, ParlayAction, ParlayState, PersonaType
from parlay_env.reward import OMEGA, PSI


def _make_hidden(
    budget: float = 165_000,
    walk: float = 125_000,
    last_stated_batna: float | None = None,
) -> HiddenState:
    return HiddenState(
        budget_ceiling=budget,
        walk_away_price=walk,
        urgency_score=0.5,
        has_alternative=False,
        persona_drifted=False,
        last_stated_batna=last_stated_batna,
    )


def _make_belief(budget: float = 140_000, walk: float = 130_000) -> BeliefState:
    return BeliefState(
        est_budget=budget,
        est_walk_away=walk,
        est_urgency=0.5,
        est_has_alternative=False,
        confidence=0.5,
    )


def _make_state(
    step: int = 0,
    cumulative: float = 0.0,
    offers: list[float] | None = None,
    beliefs: list[BeliefState] | None = None,
    hidden: HiddenState | None = None,
) -> ParlayState:
    actual_hidden = hidden or _make_hidden()
    return ParlayState(
        session_id="test-session",
        scenario_id="saas_enterprise",
        persona=PersonaType.SHARK,
        step_count=step,
        cumulative_reward=cumulative,
        hidden_state=actual_hidden,
        belief_history=beliefs or [_make_belief()],
        offer_history=offers or [],
        drift_events_fired=0,
        episode_done=False,
        credibility_points=100,
        original_zopa_width=actual_hidden.budget_ceiling - actual_hidden.walk_away_price,
    )


class TestComputeStepReward:
    def test_happy_path_returns_float(self):
        state = _make_state()
        action = ParlayAction(utterance="I propose 145000.", offer_amount=145_000.0)
        next_state = _make_state(step=1, offers=[145_000.0])
        result = compute_step_reward(state, action, next_state)
        assert isinstance(result, float), f"Expected float, got {type(result)}"

    def test_noise_penalty_applied(self):
        state = _make_state(offers=[140_000.0])
        action = ParlayAction(utterance="xyz", offer_amount=140_000.0)
        next_state = _make_state(step=1, offers=[140_000.0, 140_000.0])
        result = compute_step_reward(state, action, next_state)
        assert isinstance(result, float), f"Expected float, got {type(result)}"

    def test_bluff_detection_awards_psi(self):
        hidden = _make_hidden(last_stated_batna=198_000.0)
        state = _make_state(hidden=hidden)
        next_state = _make_state(step=1, hidden=hidden)
        action = ParlayAction(
            utterance="I don't believe that's your walk-away.",
            offer_amount=None,
            tactical_move=None,
        )

        caught = detect_bluff_challenge(
            utterance=action.utterance,
            opponent_stated_batna=198_000.0,
            opponent_true_batna=165_000.0,
        )
        reward = compute_step_reward(state, action, next_state)

        assert caught is True, f"Expected True, got {caught}"
        assert reward >= PSI, f"Expected at least PSI={PSI}, got {reward}"


class TestComputeTerminalReward:
    def test_good_deal_positive_reward(self):
        state = _make_state()
        result = compute_terminal_reward(state, final_price=145_000.0, t_close=10, t_max=20)
        assert result > 0, f"Expected positive reward, got {result}"

    def test_capitulation_returns_negative_omega(self):
        state = _make_state()
        result = compute_terminal_reward(state, final_price=120_000.0, t_close=10)
        assert result == -OMEGA, f"Expected -{OMEGA}, got {result}"

    def test_speed_bonus_for_early_close(self):
        state = _make_state()
        fast = compute_terminal_reward(state, final_price=145_000.0, t_close=5, t_max=20)
        slow = compute_terminal_reward(state, final_price=145_000.0, t_close=18, t_max=20)
        assert fast > slow, f"Expected fast close > slow close: {fast} vs {slow}"


class TestGradeEpisode:
    def test_grade_episode_returns_episodegrade(self):
        state = _make_state(step=10, offers=[145_000.0])
        grade = grade_episode(state, final_price=145_000.0, t_close=10)
        assert isinstance(grade, EpisodeGrade), f"Expected EpisodeGrade, got {type(grade)}"

    def test_deal_efficiency_in_range(self):
        state = _make_state(step=10, offers=[145_000.0])
        grade = grade_episode(state, final_price=145_000.0, t_close=10)
        assert 0.0 <= grade.deal_efficiency <= 1.0, f"Expected [0,1], got {grade.deal_efficiency}"

    def test_no_deal_zero_efficiency(self):
        state = _make_state(step=20)
        grade = grade_episode(state, final_price=None)
        assert grade.deal_efficiency == 0.0, f"Expected 0.0, got {grade.deal_efficiency}"

    def test_bluffs_caught_passed_through(self):
        state = _make_state(step=10, offers=[145_000.0])
        grade = grade_episode(state, final_price=145_000.0, bluffs_caught=3)
        assert grade.bluffs_caught == 3, f"Expected 3, got {grade.bluffs_caught}"

    def test_zopa_collapse_walk_away(self):
        hidden = _make_hidden(budget=103.0, walk=100.0)
        state = _make_state(hidden=hidden)

        for _ in range(3):
            state.tension_score = 80.0
            state.high_tension_streak = 2
            _apply_zopa_erosion(state)

        assert state.zopa_erosion_ticks >= 1, f"Expected >=1, got {state.zopa_erosion_ticks}"

        while not state.walk_away and state.zopa_erosion_ticks < 100:
            state.tension_score = 80.0
            state.high_tension_streak = 2
            _apply_zopa_erosion(state)

        assert state.termination_reason == "zopa_collapsed", f"Expected zopa_collapsed, got {state.termination_reason}"