"""
tests/test_calibration.py
Tests for the calibration grader — run with: pytest tests/test_calibration.py -v
ALL tests must pass before pushing to GitHub.
"""

import pytest
from server.calibration_grader import (
    calibration_reward,
    detect_confidence_gaming,
    training_reward,
    eval_reward,
    CALIBRATION_MATRIX,
)


class TestCalibrationMatrix:
    """Test the core 3×2 calibration matrix values."""

    def test_high_correct_returns_1_point_0(self):
        result = calibration_reward("approve_claim", "HIGH", "approve_claim")
        assert result == 1.0

    def test_high_wrong_returns_minus_0_point_8(self):
        result = calibration_reward("approve_claim", "HIGH", "deny_claim")
        assert result == -0.8

    def test_med_correct_returns_0_point_6(self):
        result = calibration_reward("deny_claim", "MED", "deny_claim")
        assert result == 0.6

    @pytest.mark.parametrize("confidence,correct,expected", [
        ("HIGH", True,  1.0),
        ("HIGH", False, -0.8),
        ("MED",  True,  0.6),
        ("MED",  False, -0.2),
        ("LOW",  True,  0.1),
        ("LOW",  False, 0.0),
    ])
    def test_all_outputs_in_valid_range(self, confidence, correct, expected):
        decision = "approve_claim"
        ground_truth = "approve_claim" if correct else "deny_claim"
        result = calibration_reward(decision, confidence, ground_truth)
        assert result == expected
        assert -1.0 <= result <= 1.0


class TestAntiGaming:

    def test_systematic_low_triggers_gaming_penalty(self):
        history = [{"confidence": "LOW"}] * 15
        penalty = detect_confidence_gaming(history)
        assert penalty > 0

    def test_systematic_high_triggers_gaming_penalty(self):
        history = [{"confidence": "HIGH"}] * 15
        penalty = detect_confidence_gaming(history)
        assert penalty > 0

    def test_gaming_detector_needs_10_episodes_minimum(self):
        history = [{"confidence": "LOW"}] * 9
        penalty = detect_confidence_gaming(history)
        assert penalty == 0.0


class TestTrainingReward:

    def test_training_reward_step_penalty_applied(self):
        result = training_reward("approve_claim", "HIGH", "approve_claim", 0, 1, False)
        assert result == pytest.approx(-0.05)