File size: 3,320 Bytes
df97e68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from app.models import ActionModel, ActionType
from app.simulator import LiveSimulationSession
from app.engine import _repair_action_for_observation


def test_reallocate_payload_is_repaired_to_valid_shape() -> None:
    session = LiveSimulationSession(
        task_id="district_backlog_easy",
        agent_mode="baseline_policy",
        max_steps=5,
        seed=42,
    )
    try:
        raw = ActionModel(action_type=ActionType.REALLOCATE_OFFICERS)
        fixed, note = _repair_action_for_observation(raw, session.obs)
        # Repair should either keep REALLOCATE_OFFICERS with valid payload
        # or fall back to a high-impact action
        assert fixed.action_type in {
            ActionType.REALLOCATE_OFFICERS,
            ActionType.ADVANCE_TIME,
            ActionType.REQUEST_MISSING_DOCUMENTS,
            ActionType.ASSIGN_CAPACITY,
            ActionType.ESCALATE_SERVICE,
        }
        if fixed.action_type == ActionType.REALLOCATE_OFFICERS:
            # v2 uses reallocation_delta dict
            assert fixed.reallocation_delta is not None
        assert note is not None
    finally:
        session.close()


def test_assign_capacity_switches_to_advance_time_if_no_reserve() -> None:
    session = LiveSimulationSession(
        task_id="district_backlog_easy",
        agent_mode="baseline_policy",
        max_steps=5,
        seed=42,
    )
    try:
        # Drain idle officers by filling allocated to match available
        pool = session.obs.officer_pool
        # Make idle_officers return 0 by maxing out allocations
        total_alloc = sum(pool.allocated.values())
        remaining = pool.available_officers - total_alloc
        if remaining > 0:
            # Add remaining to first allocated service
            first_key = next(iter(pool.allocated))
            pool.allocated[first_key] = pool.allocated[first_key] + remaining

        raw = ActionModel(action_type=ActionType.ASSIGN_CAPACITY,
                          capacity_assignment={"passport": 2})
        fixed, note = _repair_action_for_observation(raw, session.obs)
        assert fixed.action_type in {
            ActionType.ADVANCE_TIME,
            ActionType.REQUEST_MISSING_DOCUMENTS,
            ActionType.REALLOCATE_OFFICERS,
            ActionType.ESCALATE_SERVICE,
        }
        assert note is not None
    finally:
        session.close()


def test_llm_mode_enforces_recommended_min_steps_for_hard_task() -> None:
    session = LiveSimulationSession(
        task_id="cross_department_hard",
        agent_mode="llm_inference",
        max_steps=20,
        seed=42,
    )
    try:
        assert session.max_steps >= 70
    finally:
        session.close()


def test_llm_step_core_handles_none_action_without_crash() -> None:
    session = LiveSimulationSession(
        task_id="district_backlog_easy",
        agent_mode="llm_inference",
        max_steps=10,
        seed=11,
    )
    try:
        # Simulate a malformed llm policy output.
        session.policy = lambda _obs: (None, {"decision_source": "llm", "provider": "test", "model_used": "bad"})
        row, _log, done = session.step_once()
        assert isinstance(row, dict)
        assert row["action_type"] in {a.value for a in ActionType}
        assert isinstance(done, bool)
    finally:
        session.close()