Spaces:

ronitraj
/

vegarl

Running

App Files Files Community

vegarl / scripts /test_reward_logic.py

ronitraj

Deploy Space without oversized raw dataset

4fbc241 30 days ago

raw

history blame contribute delete

3.05 kB

	import math
	import sys
	import os

	# Add root directory to sys.path to allow imports from 'server'
	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

	from server.reward_calculator import RewardCalculator, MAX_TPS_REFERENCE
	from llmserve_env.models import MetricsSnapshot, QuantizationTier

	def test_reward_scenarios():
	calc = RewardCalculator()

	print("[INFO] Testing Goldilocks Memory Penalties...")
	# Scenario 1: Optimal Memory (70%)
	m1 = MetricsSnapshot(
	throughput_tps=200.0,
	gpu_memory_used_gb=28.0, # 28/40 = 0.7 (Optimal)
	slo_violations=0,
	requests_served=50,
	p50_ttft_ms=100.0,
	p99_ttft_ms=200.0,
	p50_itl_ms=50.0,
	estimated_cost_per_1k=0.001,
	spec_acceptance_rate=0.8,
	eviction_events=0,
	preemption_events=0,
	is_throttled=False
	)
	r1 = calc.calculate("static_workload", m1, 1.0, "FP16", 0.0)
	print(f" Optimal (70%): Reward={r1:.4f}")
	assert r1 > 0, "Optimal memory should yield positive reward"

	# Scenario 2: Under-utilization (20%)
	m2 = m1.model_copy(update={
	"throughput_tps": 50.0,
	"gpu_memory_used_gb": 8.0, # 8/40 = 0.2 (Under)
	"requests_served": 10
	})
	r2 = calc.calculate("static_workload", m2, 1.0, "FP16", 0.0)
	print(f" Under-utilized (20%): Reward={r2:.4f}")
	assert r2 < r1, "Under-utilized should reward less than optimal"

	# Scenario 3: Danger Zone (95%)
	# Use 'bursty_workload' where w_mem is higher (0.4) to check stability focus
	m3 = m1.model_copy(update={
	"throughput_tps": 400.0,
	"gpu_memory_used_gb": 38.0, # 38/40 = 0.95 (Danger)
	"requests_served": 80
	})
	r3 = calc.calculate("bursty_workload", m3, 1.0, "FP16", 0.0)
	print(f" Danger Zone (95%, Bursty): Reward={r3:.4f}")
	assert r3 < 0, f"Danger zone should yield negative reward in Bursty mode, got {r3}"

	print("\n[INFO] Testing SLO Breach Penalties...")
	# Scenario 4: SLO Breach
	m4 = m1.model_copy(update={
	"throughput_tps": 300.0,
	"gpu_memory_used_gb": 30.0,
	"slo_violations": 10,
	"requests_served": 50
	})
	r4 = calc.calculate("static_workload", m4, 0.5, "FP16", 0.0)
	print(f" SLO Breach (50%): Reward={r4:.4f}")
	assert r4 < r1, "SLO breach should be heavily penalized"

	print("\n[INFO] Testing Level 3 Priority Multiplier...")
	# Scenario 5: Priority Breach in Level 3
	# Standard breach (0.9 compliance)
	r5_std = calc.calculate("adversarial_multitenant", m1, 0.9, "FP16", 0.0)
	# Priority breach (0.9 compliance, 20% priority)
	r5_pri = calc.calculate("adversarial_multitenant", m1, 0.9, "FP16", 0.2)
	print(f" L3 Standard Breach (90%): Reward={r5_std:.4f}")
	print(f" L3 Priority Breach (90%, 20% VIP): Reward={r5_pri:.4f}")
	assert r5_pri < r5_std, "Priority breach should penalize more in Level 3"

	print("\n[PASS] All reward logic scenarios verified.")

	if __name__ == "__main__":
	test_reward_scenarios()