r"""Validate dense reward function."""
from civicai.environment import CivicAIEnv
from civicai.models import Action
from civicai.reward import compute_reward, get_named_scores

print("=== Dense Reward Validation ===")
env = CivicAIEnv()
env.reset(task_id="stabilize_economy")

# Test 1: named scores present
for _ in range(3):
    env.step(Action())
state = env.state()
r = compute_reward(state, Action())
ns = get_named_scores(r)
assert set(ns.keys()) == {"economic_score", "health_score", "satisfaction_score", "crime_score"}
print("[OK] Named scores:", {k: round(v, 4) for k, v in ns.items()})
assert all(0.0 <= v <= 1.0 for v in ns.values()), "Named scores out of [0,1]"
print("[OK] All named scores in [0, 1]")

# Test 2: budget overcommit penalty
bad = Action(healthcare_budget=0.5, education_budget=0.4, police_budget=0.3)
r2 = compute_reward(state, bad)
assert "budget_overcommit" in r2.penalties, f"Expected budget_overcommit, got {r2.penalties}"
print(f"[OK] budget_overcommit penalty: {r2.penalties['budget_overcommit']}")

# Test 3: extreme tax penalty
tax_action = Action(tax_rate=0.80)
r3 = compute_reward(state, tax_action)
assert "extreme_tax" in r3.penalties, f"Expected extreme_tax, got {r3.penalties}"
print(f"[OK] extreme_tax penalty: {r3.penalties['extreme_tax']}")

# Test 4: loop penalty after 6 identical actions
env.reset()
loop_action = Action(tax_rate=0.30, healthcare_budget=0.25, education_budget=0.15, police_budget=0.10)
for _ in range(7):
    env.step(loop_action)
r4 = compute_reward(env.state(), loop_action)
assert "action_loop" in r4.penalties, f"Expected action_loop, got {r4.penalties}"
print(f"[OK] action_loop penalty: {r4.penalties['action_loop']}")

# Test 5: reward in [0,1] for all tasks
for task in ["stabilize_economy", "manage_pandemic", "control_crisis"]:
    env.reset(task_id=task)
    for _ in range(5):
        env.step(Action())
    r = compute_reward(env.state(), Action())
    assert 0.0 <= r.score <= 1.0, f"score={r.score} out of [0,1]"
    ns = get_named_scores(r)
    for k, v in ns.items():
        assert 0.0 <= v <= 1.0, f"{k}={v} out of [0,1]"
    print(f"[OK] {task}: score={r.score:.4f} all components valid")

# Test 6: rubric keys match required names
rubric_keys = set(r.rubrics.keys())
assert "economic" in rubric_keys and "health" in rubric_keys
assert "satisfaction" in rubric_keys and "crime" in rubric_keys
print(f"[OK] Rubric keys: {sorted(rubric_keys)}")

# Test 7: density check — varied states produce different reward scores
from civicai.models import SocietyState
scores = set()
for i in range(10):
    varied_state = SocietyState(
        inflation=0.03 + i * 0.02,          # 3% → 21% across samples
        employment_rate=0.70 + i * 0.02,    # 70% → 88%
        gdp=300.0 + i * 30.0,
        public_satisfaction=0.40 + i * 0.04,
    )
    scores.add(compute_reward(varied_state, Action()).score)
assert len(scores) > 5, f"Reward not dense enough — only {len(scores)} distinct values"
print(f"[OK] Dense reward: {len(scores)} distinct values from 10 varied states (not binary)")

print()
print("=" * 50)
print("  ALL REWARD CHECKS PASSED")
print("=" * 50)