CivicAI / validate_reward.py
mahammadaftab's picture
Final updated
6298125
r"""Validate dense reward function."""
from civicai.environment import CivicAIEnv
from civicai.models import Action
from civicai.reward import compute_reward, get_named_scores
print("=== Dense Reward Validation ===")
env = CivicAIEnv()
env.reset(task_id="stabilize_economy")
# Test 1: named scores present
for _ in range(3):
env.step(Action())
state = env.state()
r = compute_reward(state, Action())
ns = get_named_scores(r)
assert set(ns.keys()) == {"economic_score", "health_score", "satisfaction_score", "crime_score"}
print("[OK] Named scores:", {k: round(v, 4) for k, v in ns.items()})
assert all(0.0 <= v <= 1.0 for v in ns.values()), "Named scores out of [0,1]"
print("[OK] All named scores in [0, 1]")
# Test 2: budget overcommit penalty
bad = Action(healthcare_budget=0.5, education_budget=0.4, police_budget=0.3)
r2 = compute_reward(state, bad)
assert "budget_overcommit" in r2.penalties, f"Expected budget_overcommit, got {r2.penalties}"
print(f"[OK] budget_overcommit penalty: {r2.penalties['budget_overcommit']}")
# Test 3: extreme tax penalty
tax_action = Action(tax_rate=0.80)
r3 = compute_reward(state, tax_action)
assert "extreme_tax" in r3.penalties, f"Expected extreme_tax, got {r3.penalties}"
print(f"[OK] extreme_tax penalty: {r3.penalties['extreme_tax']}")
# Test 4: loop penalty after 6 identical actions
env.reset()
loop_action = Action(tax_rate=0.30, healthcare_budget=0.25, education_budget=0.15, police_budget=0.10)
for _ in range(7):
env.step(loop_action)
r4 = compute_reward(env.state(), loop_action)
assert "action_loop" in r4.penalties, f"Expected action_loop, got {r4.penalties}"
print(f"[OK] action_loop penalty: {r4.penalties['action_loop']}")
# Test 5: reward in [0,1] for all tasks
for task in ["stabilize_economy", "manage_pandemic", "control_crisis"]:
env.reset(task_id=task)
for _ in range(5):
env.step(Action())
r = compute_reward(env.state(), Action())
assert 0.0 <= r.score <= 1.0, f"score={r.score} out of [0,1]"
ns = get_named_scores(r)
for k, v in ns.items():
assert 0.0 <= v <= 1.0, f"{k}={v} out of [0,1]"
print(f"[OK] {task}: score={r.score:.4f} all components valid")
# Test 6: rubric keys match required names
rubric_keys = set(r.rubrics.keys())
assert "economic" in rubric_keys and "health" in rubric_keys
assert "satisfaction" in rubric_keys and "crime" in rubric_keys
print(f"[OK] Rubric keys: {sorted(rubric_keys)}")
# Test 7: density check β€” varied states produce different reward scores
from civicai.models import SocietyState
scores = set()
for i in range(10):
varied_state = SocietyState(
inflation=0.03 + i * 0.02, # 3% β†’ 21% across samples
employment_rate=0.70 + i * 0.02, # 70% β†’ 88%
gdp=300.0 + i * 30.0,
public_satisfaction=0.40 + i * 0.04,
)
scores.add(compute_reward(varied_state, Action()).score)
assert len(scores) > 5, f"Reward not dense enough β€” only {len(scores)} distinct values"
print(f"[OK] Dense reward: {len(scores)} distinct values from 10 varied states (not binary)")
print()
print("=" * 50)
print(" ALL REWARD CHECKS PASSED")
print("=" * 50)