Spaces:
Running
Running
| from app.models import ActionModel, ActionType | |
| from app.simulator import LiveSimulationSession | |
| from app.engine import _repair_action_for_observation | |
| def test_reallocate_payload_is_repaired_to_valid_shape() -> None: | |
| session = LiveSimulationSession( | |
| task_id="district_backlog_easy", | |
| agent_mode="baseline_policy", | |
| max_steps=5, | |
| seed=42, | |
| ) | |
| try: | |
| raw = ActionModel(action_type=ActionType.REALLOCATE_OFFICERS) | |
| fixed, note = _repair_action_for_observation(raw, session.obs) | |
| # Repair should either keep REALLOCATE_OFFICERS with valid payload | |
| # or fall back to a high-impact action | |
| assert fixed.action_type in { | |
| ActionType.REALLOCATE_OFFICERS, | |
| ActionType.ADVANCE_TIME, | |
| ActionType.REQUEST_MISSING_DOCUMENTS, | |
| ActionType.ASSIGN_CAPACITY, | |
| ActionType.ESCALATE_SERVICE, | |
| } | |
| if fixed.action_type == ActionType.REALLOCATE_OFFICERS: | |
| # v2 uses reallocation_delta dict | |
| assert fixed.reallocation_delta is not None | |
| assert note is not None | |
| finally: | |
| session.close() | |
| def test_assign_capacity_switches_to_advance_time_if_no_reserve() -> None: | |
| session = LiveSimulationSession( | |
| task_id="district_backlog_easy", | |
| agent_mode="baseline_policy", | |
| max_steps=5, | |
| seed=42, | |
| ) | |
| try: | |
| # Drain idle officers by filling allocated to match available | |
| pool = session.obs.officer_pool | |
| # Make idle_officers return 0 by maxing out allocations | |
| total_alloc = sum(pool.allocated.values()) | |
| remaining = pool.available_officers - total_alloc | |
| if remaining > 0: | |
| # Add remaining to first allocated service | |
| first_key = next(iter(pool.allocated)) | |
| pool.allocated[first_key] = pool.allocated[first_key] + remaining | |
| raw = ActionModel(action_type=ActionType.ASSIGN_CAPACITY, | |
| capacity_assignment={"passport": 2}) | |
| fixed, note = _repair_action_for_observation(raw, session.obs) | |
| assert fixed.action_type in { | |
| ActionType.ADVANCE_TIME, | |
| ActionType.REQUEST_MISSING_DOCUMENTS, | |
| ActionType.REALLOCATE_OFFICERS, | |
| ActionType.ESCALATE_SERVICE, | |
| } | |
| assert note is not None | |
| finally: | |
| session.close() | |
| def test_llm_mode_enforces_recommended_min_steps_for_hard_task() -> None: | |
| session = LiveSimulationSession( | |
| task_id="cross_department_hard", | |
| agent_mode="llm_inference", | |
| max_steps=20, | |
| seed=42, | |
| ) | |
| try: | |
| assert session.max_steps >= 70 | |
| finally: | |
| session.close() | |
| def test_llm_step_core_handles_none_action_without_crash() -> None: | |
| session = LiveSimulationSession( | |
| task_id="district_backlog_easy", | |
| agent_mode="llm_inference", | |
| max_steps=10, | |
| seed=11, | |
| ) | |
| try: | |
| # Simulate a malformed llm policy output. | |
| session.policy = lambda _obs: (None, {"decision_source": "llm", "provider": "test", "model_used": "bad"}) | |
| row, _log, done = session.step_once() | |
| assert isinstance(row, dict) | |
| assert row["action_type"] in {a.value for a in ActionType} | |
| assert isinstance(done, bool) | |
| finally: | |
| session.close() | |