File size: 3,052 Bytes
4fbc241 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | import math
import sys
import os
# Add root directory to sys.path to allow imports from 'server'
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from server.reward_calculator import RewardCalculator, MAX_TPS_REFERENCE
from llmserve_env.models import MetricsSnapshot, QuantizationTier
def test_reward_scenarios():
calc = RewardCalculator()
print("[INFO] Testing Goldilocks Memory Penalties...")
# Scenario 1: Optimal Memory (70%)
m1 = MetricsSnapshot(
throughput_tps=200.0,
gpu_memory_used_gb=28.0, # 28/40 = 0.7 (Optimal)
slo_violations=0,
requests_served=50,
p50_ttft_ms=100.0,
p99_ttft_ms=200.0,
p50_itl_ms=50.0,
estimated_cost_per_1k=0.001,
spec_acceptance_rate=0.8,
eviction_events=0,
preemption_events=0,
is_throttled=False
)
r1 = calc.calculate("static_workload", m1, 1.0, "FP16", 0.0)
print(f" Optimal (70%): Reward={r1:.4f}")
assert r1 > 0, "Optimal memory should yield positive reward"
# Scenario 2: Under-utilization (20%)
m2 = m1.model_copy(update={
"throughput_tps": 50.0,
"gpu_memory_used_gb": 8.0, # 8/40 = 0.2 (Under)
"requests_served": 10
})
r2 = calc.calculate("static_workload", m2, 1.0, "FP16", 0.0)
print(f" Under-utilized (20%): Reward={r2:.4f}")
assert r2 < r1, "Under-utilized should reward less than optimal"
# Scenario 3: Danger Zone (95%)
# Use 'bursty_workload' where w_mem is higher (0.4) to check stability focus
m3 = m1.model_copy(update={
"throughput_tps": 400.0,
"gpu_memory_used_gb": 38.0, # 38/40 = 0.95 (Danger)
"requests_served": 80
})
r3 = calc.calculate("bursty_workload", m3, 1.0, "FP16", 0.0)
print(f" Danger Zone (95%, Bursty): Reward={r3:.4f}")
assert r3 < 0, f"Danger zone should yield negative reward in Bursty mode, got {r3}"
print("\n[INFO] Testing SLO Breach Penalties...")
# Scenario 4: SLO Breach
m4 = m1.model_copy(update={
"throughput_tps": 300.0,
"gpu_memory_used_gb": 30.0,
"slo_violations": 10,
"requests_served": 50
})
r4 = calc.calculate("static_workload", m4, 0.5, "FP16", 0.0)
print(f" SLO Breach (50%): Reward={r4:.4f}")
assert r4 < r1, "SLO breach should be heavily penalized"
print("\n[INFO] Testing Level 3 Priority Multiplier...")
# Scenario 5: Priority Breach in Level 3
# Standard breach (0.9 compliance)
r5_std = calc.calculate("adversarial_multitenant", m1, 0.9, "FP16", 0.0)
# Priority breach (0.9 compliance, 20% priority)
r5_pri = calc.calculate("adversarial_multitenant", m1, 0.9, "FP16", 0.2)
print(f" L3 Standard Breach (90%): Reward={r5_std:.4f}")
print(f" L3 Priority Breach (90%, 20% VIP): Reward={r5_pri:.4f}")
assert r5_pri < r5_std, "Priority breach should penalize more in Level 3"
print("\n[PASS] All reward logic scenarios verified.")
if __name__ == "__main__":
test_reward_scenarios()
|