SmartPayEnv / tests /test_graders.py
Pratap-K's picture
SmartPayEnv
39c0d5b
"""
Comprehensive tests for SmartPayEnv v2 graders, data generation, and environment.
Run from the repo root: python test_graders.py
"""
import sys, math
sys.path.insert(0, ".")
sys.path.insert(0, "./server")
import numpy as np
from server.graders import (
RoutingEfficacyGrader,
FraudDetectionGrader,
UserRetentionGrader,
process_combined_reward,
)
from server.SmartPayEnv_environment import SmartpayenvEnvironment, DIFFICULTY_CONFIG
from models import SmartpayenvAction
SEP = "=" * 60
# ── 1. RoutingEfficacyGrader (deterministic expected_outcome) ────────
print(f"\n{SEP}\n[1] RoutingEfficacyGrader — deterministic expected_outcome\n{SEP}")
rg = RoutingEfficacyGrader()
gw_rates = [0.70, 0.85, 0.95] # GatewayC is best (index 2)
# Optimal choice: choose best gateway, high expected outcome
s_opt = rg.evaluate(expected_outcome=0.90, cost=0.5, retries=0, chosen_gateway=2, gateway_rates=gw_rates)
# Suboptimal choice: choose worst gateway, same exp outcome for fairness (though in practice it would be lower)
s_sub = rg.evaluate(expected_outcome=0.90, cost=0.5, retries=0, chosen_gateway=0, gateway_rates=gw_rates)
# Optimal choice, low expected outcome
s_low = rg.evaluate(expected_outcome=0.20, cost=0.5, retries=0, chosen_gateway=2, gateway_rates=gw_rates)
# Worst: suboptimal + low outcome + retry + expensive
s_bad = rg.evaluate(expected_outcome=0.10, cost=4.0, retries=2, chosen_gateway=0, gateway_rates=gw_rates)
print(f" optimal gw + high outcome → {s_opt:.4f}")
print(f" suboptimal gw + same cost → {s_sub:.4f} (lower: worse gateway choice)")
print(f" optimal gw + low outcome → {s_low:.4f} (mid)")
print(f" worst case → {s_bad:.4f} (expect lowest)")
for s in [s_opt, s_sub, s_low, s_bad]:
assert 0.0 <= s <= 1.0, f"Out of [0,1]: {s}"
assert s_opt > s_sub, "Optimal gateway should outscore suboptimal"
assert s_opt > s_low, "High expected outcome should outscore low"
assert s_low > s_bad, "Any reasonable choice beats the worst case"
# DETERMINISM check: same inputs must always give same score
assert rg.evaluate(0.7, 1.5, 0, 1, gw_rates) == rg.evaluate(0.7, 1.5, 0, 1, gw_rates), "Not deterministic!"
print(" ✅ RoutingEfficacyGrader deterministic OK")
# ── 2. FraudDetectionGrader ──────────────────────────────────
print(f"\n{SEP}\n[2] FraudDetectionGrader\n{SEP}")
fg = FraudDetectionGrader()
for _ in range(70): fg.add_step(False, False)
for _ in range(30): fg.add_step(True, True)
assert abs(fg.evaluate() - 1.0) < 1e-9, f"Perfect: {fg.evaluate()}"
fg2 = FraudDetectionGrader()
for _ in range(70): fg2.add_step(True, False)
for _ in range(30): fg2.add_step(False, True)
assert abs(fg2.evaluate() - 0.0) < 1e-9, f"Worst: {fg2.evaluate()}"
fg3 = FraudDetectionGrader()
for _ in range(100): fg3.add_step(True, True)
assert abs(fg3.evaluate() - 0.5) < 1e-9, f"Neutral: {fg3.evaluate()}"
print(f" perfect=1.0 worst=0.0 neutral=0.5 ✅")
# ── 3. UserRetentionGrader ───────────────────────────────────
print(f"\n{SEP}\n[3] UserRetentionGrader\n{SEP}")
urg = UserRetentionGrader(churn_rate=0.1, initial_users=100)
assert abs(urg.evaluate() - 1.0) < 1e-9
urg.add_step(0); assert abs(urg.evaluate() - 1.0) < 1e-9
urg.add_step(3); assert urg.evaluate() < 1.0
print(f" initial=1.0, no-failure=1.0, 3-failures={urg.evaluate():.4f} ✅")
# ── 4. process_combined_reward ────────────────────────────────
print(f"\n{SEP}\n[4] process_combined_reward\n{SEP}")
r_best = process_combined_reward(1.0, True, False, 0)
r_worst = process_combined_reward(0.0, False, True, 5)
assert 0.0 <= r_best <= 1.0
assert 0.0 <= r_worst <= 1.0
assert r_best > r_worst
print(f" best={r_best:.4f} worst={r_worst:.4f} ✅")
# ── 5. Multi-factor fraud risk ────────────────────────────────
print(f"\n{SEP}\n[5] Multi-factor fraud risk via environment\n{SEP}")
rng_seed = np.random.default_rng(42)
env = SmartpayenvEnvironment()
# Collect 200 transactions in easy mode and check fraud_risk ranges
env.reset(difficulty=0)
risks_easy = []
for _ in range(50):
obs = env._generate_transaction()
risks_easy.append(obs.fraud_risk_score)
assert 0.0 <= obs.fraud_risk_score <= 1.0
assert obs.merchant_category in range(6)
assert obs.device_type in (0, 1, 2)
assert isinstance(obs.is_international, bool)
assert isinstance(obs.card_present, bool)
env.reset(difficulty=2)
risks_hard = []
for _ in range(50):
obs = env._generate_transaction()
risks_hard.append(obs.fraud_risk_score)
mean_easy = sum(risks_easy) / len(risks_easy)
mean_hard = sum(risks_hard) / len(risks_hard)
print(f" avg fraud_risk easy={mean_easy:.3f} hard={mean_hard:.3f}")
assert mean_hard > mean_easy, "Hard mode should have higher avg fraud risk"
print(" ✅ Multi-factor fraud + difficulty scaling OK")
# ── 6. Gateway state machine ──────────────────────────────────
print(f"\n{SEP}\n[6] Gateway state machine\n{SEP}")
env.reset(difficulty=2) # high degrade_p for quick test
states_seen = set()
for _ in range(80):
for gw in env._gateways:
gw.step()
states_seen.add(gw.state)
assert 0.0 <= gw.current_rate <= 1.0
print(f" States observed: {states_seen}")
assert "degraded" in states_seen or "recovering" in states_seen, \
"Hard mode should see degraded/recovering states"
print(" ✅ Gateway state machine OK")
# ── 7. Transaction velocity tracking ─────────────────────────
print(f"\n{SEP}\n[7] Transaction velocity tracking\n{SEP}")
env.reset(difficulty=0)
velocities = []
for _ in range(20):
obs = env._generate_transaction()
velocities.append(obs.transaction_velocity)
assert 0.0 <= obs.transaction_velocity <= 1.0
print(f" velocity range: [{min(velocities):.2f}, {max(velocities):.2f}] ✅")
# ── 8. Episode smoke test — all 3 difficulty tiers ───────────
print(f"\n{SEP}\n[8] Full episode smoke test (15 steps × 3 difficulties)\n{SEP}")
for diff in [0, 1, 2]:
obs = env.reset(difficulty=diff)
assert obs.difficulty == diff
rewards = []
for step in range(15):
action = SmartpayenvAction(
gateway=int(np.argmax(obs.gateway_success_rates)), # always choose best gw
retry_strategy=1,
fraud_decision=1 if obs.fraud_risk_score > 0.65 else 0,
)
obs = env.step(action)
assert 0.0 <= obs.reward <= 1.0, f"reward out of [0,1]: {obs.reward}"
assert 0.0 <= obs.task_routing_score <= 1.0
assert 0.0 <= obs.task_fraud_mcc_score <= 1.0
assert 0.0 <= obs.task_retention_score <= 1.0
rewards.append(obs.reward)
if obs.done:
break
avg = sum(rewards) / len(rewards)
print(f" difficulty={diff}: {len(rewards)} steps, avg_reward={avg:.4f}")
assert any(r > 0 for r in rewards), "All rewards are still 0!"
print(f"\n ✅ All difficulty tiers produce non-zero rewards")
# ── 9. Block → done=True immediately ─────────────────────────
print(f"\n{SEP}\n[9] fraud_decision=1 ends episode immediately\n{SEP}")
env.reset(difficulty=0)
obs = env.step(SmartpayenvAction(gateway=0, retry_strategy=0, fraud_decision=1))
assert obs.done is True, f"Expected done=True after block, got {obs.done}"
print(f" Block step done={obs.done} ✅")
print(f"\n{SEP}")
print(" ALL TESTS PASSED ✅")
print(f"{SEP}\n")