r"""Validate dense reward function.""" from civicai.environment import CivicAIEnv from civicai.models import Action from civicai.reward import compute_reward, get_named_scores print("=== Dense Reward Validation ===") env = CivicAIEnv() env.reset(task_id="stabilize_economy") # Test 1: named scores present for _ in range(3): env.step(Action()) state = env.state() r = compute_reward(state, Action()) ns = get_named_scores(r) assert set(ns.keys()) == {"economic_score", "health_score", "satisfaction_score", "crime_score"} print("[OK] Named scores:", {k: round(v, 4) for k, v in ns.items()}) assert all(0.0 <= v <= 1.0 for v in ns.values()), "Named scores out of [0,1]" print("[OK] All named scores in [0, 1]") # Test 2: budget overcommit penalty bad = Action(healthcare_budget=0.5, education_budget=0.4, police_budget=0.3) r2 = compute_reward(state, bad) assert "budget_overcommit" in r2.penalties, f"Expected budget_overcommit, got {r2.penalties}" print(f"[OK] budget_overcommit penalty: {r2.penalties['budget_overcommit']}") # Test 3: extreme tax penalty tax_action = Action(tax_rate=0.80) r3 = compute_reward(state, tax_action) assert "extreme_tax" in r3.penalties, f"Expected extreme_tax, got {r3.penalties}" print(f"[OK] extreme_tax penalty: {r3.penalties['extreme_tax']}") # Test 4: loop penalty after 6 identical actions env.reset() loop_action = Action(tax_rate=0.30, healthcare_budget=0.25, education_budget=0.15, police_budget=0.10) for _ in range(7): env.step(loop_action) r4 = compute_reward(env.state(), loop_action) assert "action_loop" in r4.penalties, f"Expected action_loop, got {r4.penalties}" print(f"[OK] action_loop penalty: {r4.penalties['action_loop']}") # Test 5: reward in [0,1] for all tasks for task in ["stabilize_economy", "manage_pandemic", "control_crisis"]: env.reset(task_id=task) for _ in range(5): env.step(Action()) r = compute_reward(env.state(), Action()) assert 0.0 <= r.score <= 1.0, f"score={r.score} out of [0,1]" ns = get_named_scores(r) for k, v in ns.items(): assert 0.0 <= v <= 1.0, f"{k}={v} out of [0,1]" print(f"[OK] {task}: score={r.score:.4f} all components valid") # Test 6: rubric keys match required names rubric_keys = set(r.rubrics.keys()) assert "economic" in rubric_keys and "health" in rubric_keys assert "satisfaction" in rubric_keys and "crime" in rubric_keys print(f"[OK] Rubric keys: {sorted(rubric_keys)}") # Test 7: density check — varied states produce different reward scores from civicai.models import SocietyState scores = set() for i in range(10): varied_state = SocietyState( inflation=0.03 + i * 0.02, # 3% → 21% across samples employment_rate=0.70 + i * 0.02, # 70% → 88% gdp=300.0 + i * 30.0, public_satisfaction=0.40 + i * 0.04, ) scores.add(compute_reward(varied_state, Action()).score) assert len(scores) > 5, f"Reward not dense enough — only {len(scores)} distinct values" print(f"[OK] Dense reward: {len(scores)} distinct values from 10 varied states (not binary)") print() print("=" * 50) print(" ALL REWARD CHECKS PASSED") print("=" * 50)