| import math |
| import sys |
| import os |
|
|
| |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))) |
|
|
| from server.reward_calculator import RewardCalculator, MAX_TPS_REFERENCE |
| from llmserve_env.models import MetricsSnapshot, QuantizationTier |
|
|
| def test_reward_scenarios(): |
| calc = RewardCalculator() |
| |
| print("[INFO] Testing Goldilocks Memory Penalties...") |
| |
| m1 = MetricsSnapshot( |
| throughput_tps=200.0, |
| gpu_memory_used_gb=28.0, |
| slo_violations=0, |
| requests_served=50, |
| p50_ttft_ms=100.0, |
| p99_ttft_ms=200.0, |
| p50_itl_ms=50.0, |
| estimated_cost_per_1k=0.001, |
| spec_acceptance_rate=0.8, |
| eviction_events=0, |
| preemption_events=0, |
| is_throttled=False |
| ) |
| r1 = calc.calculate("static_workload", m1, 1.0, "FP16", 0.0) |
| print(f" Optimal (70%): Reward={r1:.4f}") |
| assert r1 > 0, "Optimal memory should yield positive reward" |
|
|
| |
| m2 = m1.model_copy(update={ |
| "throughput_tps": 50.0, |
| "gpu_memory_used_gb": 8.0, |
| "requests_served": 10 |
| }) |
| r2 = calc.calculate("static_workload", m2, 1.0, "FP16", 0.0) |
| print(f" Under-utilized (20%): Reward={r2:.4f}") |
| assert r2 < r1, "Under-utilized should reward less than optimal" |
|
|
| |
| |
| m3 = m1.model_copy(update={ |
| "throughput_tps": 400.0, |
| "gpu_memory_used_gb": 38.0, |
| "requests_served": 80 |
| }) |
| r3 = calc.calculate("bursty_workload", m3, 1.0, "FP16", 0.0) |
| print(f" Danger Zone (95%, Bursty): Reward={r3:.4f}") |
| assert r3 < 0, f"Danger zone should yield negative reward in Bursty mode, got {r3}" |
|
|
| print("\n[INFO] Testing SLO Breach Penalties...") |
| |
| m4 = m1.model_copy(update={ |
| "throughput_tps": 300.0, |
| "gpu_memory_used_gb": 30.0, |
| "slo_violations": 10, |
| "requests_served": 50 |
| }) |
| r4 = calc.calculate("static_workload", m4, 0.5, "FP16", 0.0) |
| print(f" SLO Breach (50%): Reward={r4:.4f}") |
| assert r4 < r1, "SLO breach should be heavily penalized" |
|
|
| print("\n[INFO] Testing Level 3 Priority Multiplier...") |
| |
| |
| r5_std = calc.calculate("adversarial_multitenant", m1, 0.9, "FP16", 0.0) |
| |
| r5_pri = calc.calculate("adversarial_multitenant", m1, 0.9, "FP16", 0.2) |
| print(f" L3 Standard Breach (90%): Reward={r5_std:.4f}") |
| print(f" L3 Priority Breach (90%, 20% VIP): Reward={r5_pri:.4f}") |
| assert r5_pri < r5_std, "Priority breach should penalize more in Level 3" |
|
|
| print("\n[PASS] All reward logic scenarios verified.") |
|
|
| if __name__ == "__main__": |
| test_reward_scenarios() |
|
|