import math
import sys
import os

# Add root directory to sys.path to allow imports from 'server'
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from server.reward_calculator import RewardCalculator, MAX_TPS_REFERENCE
from llmserve_env.models import MetricsSnapshot, QuantizationTier

def test_reward_scenarios():
    calc = RewardCalculator()
    
    print("[INFO] Testing Goldilocks Memory Penalties...")
    # Scenario 1: Optimal Memory (70%)
    m1 = MetricsSnapshot(
        throughput_tps=200.0,
        gpu_memory_used_gb=28.0, # 28/40 = 0.7 (Optimal)
        slo_violations=0,
        requests_served=50,
        p50_ttft_ms=100.0,
        p99_ttft_ms=200.0,
        p50_itl_ms=50.0,
        estimated_cost_per_1k=0.001,
        spec_acceptance_rate=0.8,
        eviction_events=0,
        preemption_events=0,
        is_throttled=False
    )
    r1 = calc.calculate("static_workload", m1, 1.0, "FP16", 0.0)
    print(f"  Optimal (70%): Reward={r1:.4f}")
    assert r1 > 0, "Optimal memory should yield positive reward"

    # Scenario 2: Under-utilization (20%)
    m2 = m1.model_copy(update={
        "throughput_tps": 50.0,
        "gpu_memory_used_gb": 8.0, # 8/40 = 0.2 (Under)
        "requests_served": 10
    })
    r2 = calc.calculate("static_workload", m2, 1.0, "FP16", 0.0)
    print(f"  Under-utilized (20%): Reward={r2:.4f}")
    assert r2 < r1, "Under-utilized should reward less than optimal"

    # Scenario 3: Danger Zone (95%)
    # Use 'bursty_workload' where w_mem is higher (0.4) to check stability focus
    m3 = m1.model_copy(update={
        "throughput_tps": 400.0,
        "gpu_memory_used_gb": 38.0, # 38/40 = 0.95 (Danger)
        "requests_served": 80
    })
    r3 = calc.calculate("bursty_workload", m3, 1.0, "FP16", 0.0)
    print(f"  Danger Zone (95%, Bursty): Reward={r3:.4f}")
    assert r3 < 0, f"Danger zone should yield negative reward in Bursty mode, got {r3}"

    print("\n[INFO] Testing SLO Breach Penalties...")
    # Scenario 4: SLO Breach
    m4 = m1.model_copy(update={
        "throughput_tps": 300.0,
        "gpu_memory_used_gb": 30.0,
        "slo_violations": 10,
        "requests_served": 50
    })
    r4 = calc.calculate("static_workload", m4, 0.5, "FP16", 0.0)
    print(f"  SLO Breach (50%): Reward={r4:.4f}")
    assert r4 < r1, "SLO breach should be heavily penalized"

    print("\n[INFO] Testing Level 3 Priority Multiplier...")
    # Scenario 5: Priority Breach in Level 3
    # Standard breach (0.9 compliance)
    r5_std = calc.calculate("adversarial_multitenant", m1, 0.9, "FP16", 0.0)
    # Priority breach (0.9 compliance, 20% priority)
    r5_pri = calc.calculate("adversarial_multitenant", m1, 0.9, "FP16", 0.2)
    print(f"  L3 Standard Breach (90%): Reward={r5_std:.4f}")
    print(f"  L3 Priority Breach (90%, 20% VIP): Reward={r5_pri:.4f}")
    assert r5_pri < r5_std, "Priority breach should penalize more in Level 3"

    print("\n[PASS] All reward logic scenarios verified.")

if __name__ == "__main__":
    test_reward_scenarios()