Spaces:
Sleeping
Sleeping
| r"""Validate dense reward function.""" | |
| from civicai.environment import CivicAIEnv | |
| from civicai.models import Action | |
| from civicai.reward import compute_reward, get_named_scores | |
| print("=== Dense Reward Validation ===") | |
| env = CivicAIEnv() | |
| env.reset(task_id="stabilize_economy") | |
| # Test 1: named scores present | |
| for _ in range(3): | |
| env.step(Action()) | |
| state = env.state() | |
| r = compute_reward(state, Action()) | |
| ns = get_named_scores(r) | |
| assert set(ns.keys()) == {"economic_score", "health_score", "satisfaction_score", "crime_score"} | |
| print("[OK] Named scores:", {k: round(v, 4) for k, v in ns.items()}) | |
| assert all(0.0 <= v <= 1.0 for v in ns.values()), "Named scores out of [0,1]" | |
| print("[OK] All named scores in [0, 1]") | |
| # Test 2: budget overcommit penalty | |
| bad = Action(healthcare_budget=0.5, education_budget=0.4, police_budget=0.3) | |
| r2 = compute_reward(state, bad) | |
| assert "budget_overcommit" in r2.penalties, f"Expected budget_overcommit, got {r2.penalties}" | |
| print(f"[OK] budget_overcommit penalty: {r2.penalties['budget_overcommit']}") | |
| # Test 3: extreme tax penalty | |
| tax_action = Action(tax_rate=0.80) | |
| r3 = compute_reward(state, tax_action) | |
| assert "extreme_tax" in r3.penalties, f"Expected extreme_tax, got {r3.penalties}" | |
| print(f"[OK] extreme_tax penalty: {r3.penalties['extreme_tax']}") | |
| # Test 4: loop penalty after 6 identical actions | |
| env.reset() | |
| loop_action = Action(tax_rate=0.30, healthcare_budget=0.25, education_budget=0.15, police_budget=0.10) | |
| for _ in range(7): | |
| env.step(loop_action) | |
| r4 = compute_reward(env.state(), loop_action) | |
| assert "action_loop" in r4.penalties, f"Expected action_loop, got {r4.penalties}" | |
| print(f"[OK] action_loop penalty: {r4.penalties['action_loop']}") | |
| # Test 5: reward in [0,1] for all tasks | |
| for task in ["stabilize_economy", "manage_pandemic", "control_crisis"]: | |
| env.reset(task_id=task) | |
| for _ in range(5): | |
| env.step(Action()) | |
| r = compute_reward(env.state(), Action()) | |
| assert 0.0 <= r.score <= 1.0, f"score={r.score} out of [0,1]" | |
| ns = get_named_scores(r) | |
| for k, v in ns.items(): | |
| assert 0.0 <= v <= 1.0, f"{k}={v} out of [0,1]" | |
| print(f"[OK] {task}: score={r.score:.4f} all components valid") | |
| # Test 6: rubric keys match required names | |
| rubric_keys = set(r.rubrics.keys()) | |
| assert "economic" in rubric_keys and "health" in rubric_keys | |
| assert "satisfaction" in rubric_keys and "crime" in rubric_keys | |
| print(f"[OK] Rubric keys: {sorted(rubric_keys)}") | |
| # Test 7: density check β varied states produce different reward scores | |
| from civicai.models import SocietyState | |
| scores = set() | |
| for i in range(10): | |
| varied_state = SocietyState( | |
| inflation=0.03 + i * 0.02, # 3% β 21% across samples | |
| employment_rate=0.70 + i * 0.02, # 70% β 88% | |
| gdp=300.0 + i * 30.0, | |
| public_satisfaction=0.40 + i * 0.04, | |
| ) | |
| scores.add(compute_reward(varied_state, Action()).score) | |
| assert len(scores) > 5, f"Reward not dense enough β only {len(scores)} distinct values" | |
| print(f"[OK] Dense reward: {len(scores)} distinct values from 10 varied states (not binary)") | |
| print() | |
| print("=" * 50) | |
| print(" ALL REWARD CHECKS PASSED") | |
| print("=" * 50) | |