Spaces:

Otter21
/

Gov_Workflow_RL

Running

Gov_Workflow_RL / tests /test_simulator_guardrails.py

Siddharaj Shirke

deploy: clean code-only snapshot for HF Space

df97e68 10 days ago

3.32 kB

	from app.models import ActionModel, ActionType
	from app.simulator import LiveSimulationSession
	from app.engine import _repair_action_for_observation


	def test_reallocate_payload_is_repaired_to_valid_shape() -> None:
	session = LiveSimulationSession(
	task_id="district_backlog_easy",
	agent_mode="baseline_policy",
	max_steps=5,
	seed=42,
	)
	try:
	raw = ActionModel(action_type=ActionType.REALLOCATE_OFFICERS)
	fixed, note = _repair_action_for_observation(raw, session.obs)
	# Repair should either keep REALLOCATE_OFFICERS with valid payload
	# or fall back to a high-impact action
	assert fixed.action_type in {
	ActionType.REALLOCATE_OFFICERS,
	ActionType.ADVANCE_TIME,
	ActionType.REQUEST_MISSING_DOCUMENTS,
	ActionType.ASSIGN_CAPACITY,
	ActionType.ESCALATE_SERVICE,
	}
	if fixed.action_type == ActionType.REALLOCATE_OFFICERS:
	# v2 uses reallocation_delta dict
	assert fixed.reallocation_delta is not None
	assert note is not None
	finally:
	session.close()


	def test_assign_capacity_switches_to_advance_time_if_no_reserve() -> None:
	session = LiveSimulationSession(
	task_id="district_backlog_easy",
	agent_mode="baseline_policy",
	max_steps=5,
	seed=42,
	)
	try:
	# Drain idle officers by filling allocated to match available
	pool = session.obs.officer_pool
	# Make idle_officers return 0 by maxing out allocations
	total_alloc = sum(pool.allocated.values())
	remaining = pool.available_officers - total_alloc
	if remaining > 0:
	# Add remaining to first allocated service
	first_key = next(iter(pool.allocated))
	pool.allocated[first_key] = pool.allocated[first_key] + remaining

	raw = ActionModel(action_type=ActionType.ASSIGN_CAPACITY,
	capacity_assignment={"passport": 2})
	fixed, note = _repair_action_for_observation(raw, session.obs)
	assert fixed.action_type in {
	ActionType.ADVANCE_TIME,
	ActionType.REQUEST_MISSING_DOCUMENTS,
	ActionType.REALLOCATE_OFFICERS,
	ActionType.ESCALATE_SERVICE,
	}
	assert note is not None
	finally:
	session.close()


	def test_llm_mode_enforces_recommended_min_steps_for_hard_task() -> None:
	session = LiveSimulationSession(
	task_id="cross_department_hard",
	agent_mode="llm_inference",
	max_steps=20,
	seed=42,
	)
	try:
	assert session.max_steps >= 70
	finally:
	session.close()


	def test_llm_step_core_handles_none_action_without_crash() -> None:
	session = LiveSimulationSession(
	task_id="district_backlog_easy",
	agent_mode="llm_inference",
	max_steps=10,
	seed=11,
	)
	try:
	# Simulate a malformed llm policy output.
	session.policy = lambda _obs: (None, {"decision_source": "llm", "provider": "test", "model_used": "bad"})
	row, _log, done = session.step_once()
	assert isinstance(row, dict)
	assert row["action_type"] in {a.value for a in ActionType}
	assert isinstance(done, bool)
	finally:
	session.close()