Spaces:
Running
Running
File size: 3,320 Bytes
df97e68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | from app.models import ActionModel, ActionType
from app.simulator import LiveSimulationSession
from app.engine import _repair_action_for_observation
def test_reallocate_payload_is_repaired_to_valid_shape() -> None:
session = LiveSimulationSession(
task_id="district_backlog_easy",
agent_mode="baseline_policy",
max_steps=5,
seed=42,
)
try:
raw = ActionModel(action_type=ActionType.REALLOCATE_OFFICERS)
fixed, note = _repair_action_for_observation(raw, session.obs)
# Repair should either keep REALLOCATE_OFFICERS with valid payload
# or fall back to a high-impact action
assert fixed.action_type in {
ActionType.REALLOCATE_OFFICERS,
ActionType.ADVANCE_TIME,
ActionType.REQUEST_MISSING_DOCUMENTS,
ActionType.ASSIGN_CAPACITY,
ActionType.ESCALATE_SERVICE,
}
if fixed.action_type == ActionType.REALLOCATE_OFFICERS:
# v2 uses reallocation_delta dict
assert fixed.reallocation_delta is not None
assert note is not None
finally:
session.close()
def test_assign_capacity_switches_to_advance_time_if_no_reserve() -> None:
session = LiveSimulationSession(
task_id="district_backlog_easy",
agent_mode="baseline_policy",
max_steps=5,
seed=42,
)
try:
# Drain idle officers by filling allocated to match available
pool = session.obs.officer_pool
# Make idle_officers return 0 by maxing out allocations
total_alloc = sum(pool.allocated.values())
remaining = pool.available_officers - total_alloc
if remaining > 0:
# Add remaining to first allocated service
first_key = next(iter(pool.allocated))
pool.allocated[first_key] = pool.allocated[first_key] + remaining
raw = ActionModel(action_type=ActionType.ASSIGN_CAPACITY,
capacity_assignment={"passport": 2})
fixed, note = _repair_action_for_observation(raw, session.obs)
assert fixed.action_type in {
ActionType.ADVANCE_TIME,
ActionType.REQUEST_MISSING_DOCUMENTS,
ActionType.REALLOCATE_OFFICERS,
ActionType.ESCALATE_SERVICE,
}
assert note is not None
finally:
session.close()
def test_llm_mode_enforces_recommended_min_steps_for_hard_task() -> None:
session = LiveSimulationSession(
task_id="cross_department_hard",
agent_mode="llm_inference",
max_steps=20,
seed=42,
)
try:
assert session.max_steps >= 70
finally:
session.close()
def test_llm_step_core_handles_none_action_without_crash() -> None:
session = LiveSimulationSession(
task_id="district_backlog_easy",
agent_mode="llm_inference",
max_steps=10,
seed=11,
)
try:
# Simulate a malformed llm policy output.
session.policy = lambda _obs: (None, {"decision_source": "llm", "provider": "test", "model_used": "bad"})
row, _log, done = session.step_once()
assert isinstance(row, dict)
assert row["action_type"] in {a.value for a in ActionType}
assert isinstance(done, bool)
finally:
session.close()
|