"""
Comprehensive tests for SmartPayEnv v2 graders, data generation, and environment.
Run from the repo root:  python test_graders.py
"""
import sys, math
sys.path.insert(0, ".")
sys.path.insert(0, "./server")

import numpy as np
from server.graders import (
    RoutingEfficacyGrader,
    FraudDetectionGrader,
    UserRetentionGrader,
    process_combined_reward,
)
from server.SmartPayEnv_environment import SmartpayenvEnvironment, DIFFICULTY_CONFIG
from models import SmartpayenvAction

SEP = "=" * 60

# ── 1. RoutingEfficacyGrader (deterministic expected_outcome) ────────
print(f"\n{SEP}\n[1] RoutingEfficacyGrader — deterministic expected_outcome\n{SEP}")
rg = RoutingEfficacyGrader()

gw_rates = [0.70, 0.85, 0.95]   # GatewayC is best (index 2)

# Optimal choice: choose best gateway, high expected outcome
s_opt  = rg.evaluate(expected_outcome=0.90, cost=0.5, retries=0, chosen_gateway=2, gateway_rates=gw_rates)
# Suboptimal choice: choose worst gateway, same exp outcome for fairness (though in practice it would be lower)
s_sub  = rg.evaluate(expected_outcome=0.90, cost=0.5, retries=0, chosen_gateway=0, gateway_rates=gw_rates)
# Optimal choice, low expected outcome
s_low  = rg.evaluate(expected_outcome=0.20, cost=0.5, retries=0, chosen_gateway=2, gateway_rates=gw_rates)
# Worst: suboptimal + low outcome + retry + expensive
s_bad  = rg.evaluate(expected_outcome=0.10, cost=4.0, retries=2, chosen_gateway=0, gateway_rates=gw_rates)

print(f"  optimal gw + high outcome  → {s_opt:.4f}")
print(f"  suboptimal gw + same cost  → {s_sub:.4f}  (lower: worse gateway choice)")
print(f"  optimal gw + low outcome   → {s_low:.4f}  (mid)")
print(f"  worst case                 → {s_bad:.4f}  (expect lowest)")

for s in [s_opt, s_sub, s_low, s_bad]:
    assert 0.0 <= s <= 1.0, f"Out of [0,1]: {s}"
assert s_opt > s_sub, "Optimal gateway should outscore suboptimal"
assert s_opt > s_low, "High expected outcome should outscore low"
assert s_low > s_bad, "Any reasonable choice beats the worst case"

# DETERMINISM check: same inputs must always give same score
assert rg.evaluate(0.7, 1.5, 0, 1, gw_rates) == rg.evaluate(0.7, 1.5, 0, 1, gw_rates), "Not deterministic!"
print("  ✅ RoutingEfficacyGrader deterministic OK")

# ── 2. FraudDetectionGrader ──────────────────────────────────
print(f"\n{SEP}\n[2] FraudDetectionGrader\n{SEP}")
fg = FraudDetectionGrader()
for _ in range(70): fg.add_step(False, False)
for _ in range(30): fg.add_step(True,  True)
assert abs(fg.evaluate() - 1.0) < 1e-9, f"Perfect: {fg.evaluate()}"

fg2 = FraudDetectionGrader()
for _ in range(70): fg2.add_step(True,  False)
for _ in range(30): fg2.add_step(False, True)
assert abs(fg2.evaluate() - 0.0) < 1e-9, f"Worst: {fg2.evaluate()}"

fg3 = FraudDetectionGrader()
for _ in range(100): fg3.add_step(True, True)
assert abs(fg3.evaluate() - 0.5) < 1e-9, f"Neutral: {fg3.evaluate()}"

print(f"  perfect=1.0 worst=0.0 neutral=0.5  ✅")

# ── 3. UserRetentionGrader ───────────────────────────────────
print(f"\n{SEP}\n[3] UserRetentionGrader\n{SEP}")
urg = UserRetentionGrader(churn_rate=0.1, initial_users=100)
assert abs(urg.evaluate() - 1.0) < 1e-9
urg.add_step(0); assert abs(urg.evaluate() - 1.0) < 1e-9
urg.add_step(3); assert urg.evaluate() < 1.0
print(f"  initial=1.0, no-failure=1.0, 3-failures={urg.evaluate():.4f}  ✅")

# ── 4. process_combined_reward ────────────────────────────────
print(f"\n{SEP}\n[4] process_combined_reward\n{SEP}")
r_best  = process_combined_reward(1.0, True,  False, 0)
r_worst = process_combined_reward(0.0, False, True,  5)
assert 0.0 <= r_best  <= 1.0
assert 0.0 <= r_worst <= 1.0
assert r_best > r_worst
print(f"  best={r_best:.4f}  worst={r_worst:.4f}  ✅")

# ── 5. Multi-factor fraud risk ────────────────────────────────
print(f"\n{SEP}\n[5] Multi-factor fraud risk via environment\n{SEP}")
rng_seed = np.random.default_rng(42)
env = SmartpayenvEnvironment()

# Collect 200 transactions in easy mode and check fraud_risk ranges
env.reset(difficulty=0)
risks_easy = []
for _ in range(50):
    obs = env._generate_transaction()
    risks_easy.append(obs.fraud_risk_score)
    assert 0.0 <= obs.fraud_risk_score <= 1.0
    assert obs.merchant_category in range(6)
    assert obs.device_type in (0, 1, 2)
    assert isinstance(obs.is_international, bool)
    assert isinstance(obs.card_present, bool)

env.reset(difficulty=2)
risks_hard = []
for _ in range(50):
    obs = env._generate_transaction()
    risks_hard.append(obs.fraud_risk_score)

mean_easy = sum(risks_easy) / len(risks_easy)
mean_hard  = sum(risks_hard) / len(risks_hard)
print(f"  avg fraud_risk easy={mean_easy:.3f}  hard={mean_hard:.3f}")
assert mean_hard > mean_easy, "Hard mode should have higher avg fraud risk"
print("  ✅ Multi-factor fraud + difficulty scaling OK")

# ── 6. Gateway state machine ──────────────────────────────────
print(f"\n{SEP}\n[6] Gateway state machine\n{SEP}")
env.reset(difficulty=2)   # high degrade_p for quick test
states_seen = set()
for _ in range(80):
    for gw in env._gateways:
        gw.step()
        states_seen.add(gw.state)
        assert 0.0 <= gw.current_rate <= 1.0

print(f"  States observed: {states_seen}")
assert "degraded" in states_seen or "recovering" in states_seen, \
    "Hard mode should see degraded/recovering states"
print("  ✅ Gateway state machine OK")

# ── 7. Transaction velocity tracking ─────────────────────────
print(f"\n{SEP}\n[7] Transaction velocity tracking\n{SEP}")
env.reset(difficulty=0)
velocities = []
for _ in range(20):
    obs = env._generate_transaction()
    velocities.append(obs.transaction_velocity)
    assert 0.0 <= obs.transaction_velocity <= 1.0

print(f"  velocity range: [{min(velocities):.2f}, {max(velocities):.2f}]  ✅")

# ── 8. Episode smoke test — all 3 difficulty tiers ───────────
print(f"\n{SEP}\n[8] Full episode smoke test (15 steps × 3 difficulties)\n{SEP}")
for diff in [0, 1, 2]:
    obs = env.reset(difficulty=diff)
    assert obs.difficulty == diff
    rewards = []
    for step in range(15):
        action = SmartpayenvAction(
            gateway=int(np.argmax(obs.gateway_success_rates)),  # always choose best gw
            retry_strategy=1,
            fraud_decision=1 if obs.fraud_risk_score > 0.65 else 0,
        )
        obs = env.step(action)
        assert 0.0 <= obs.reward <= 1.0, f"reward out of [0,1]: {obs.reward}"
        assert 0.0 <= obs.task_routing_score <= 1.0
        assert 0.0 <= obs.task_fraud_mcc_score <= 1.0
        assert 0.0 <= obs.task_retention_score <= 1.0
        rewards.append(obs.reward)
        if obs.done:
            break
    avg = sum(rewards) / len(rewards)
    print(f"  difficulty={diff}: {len(rewards)} steps, avg_reward={avg:.4f}")
    assert any(r > 0 for r in rewards), "All rewards are still 0!"

print(f"\n  ✅ All difficulty tiers produce non-zero rewards")

# ── 9. Block → done=True immediately ─────────────────────────
print(f"\n{SEP}\n[9] fraud_decision=1 ends episode immediately\n{SEP}")
env.reset(difficulty=0)
obs = env.step(SmartpayenvAction(gateway=0, retry_strategy=0, fraud_decision=1))
assert obs.done is True, f"Expected done=True after block, got {obs.done}"
print(f"  Block step done={obs.done}  ✅")

print(f"\n{SEP}")
print("  ALL TESTS PASSED ✅")
print(f"{SEP}\n")