"""Tests for JCRSafetyGate.

Covers:
- Risk score computation across the role / candidate / shuffle / reuse axes
- INV-15: Critic with risk > threshold ALWAYS uses dense prefill
- Non-judge roles never trigger dense fallback
- gate_decision logging + summary stats
- Edge case: invalid args
"""
from __future__ import annotations

import pytest

from apohara_context_forge.safety.jcr_gate import (
    JCRDecision,
    JCRSafetyGate,
)


class TestJCRSafetyGateDefaults:
    def test_default_threshold(self):
        gate = JCRSafetyGate()
        assert gate.jcr_threshold == 0.7

    def test_invalid_threshold_rejected(self):
        with pytest.raises(ValueError, match="must be in"):
            JCRSafetyGate(jcr_threshold=1.5)
        with pytest.raises(ValueError, match="must be in"):
            JCRSafetyGate(jcr_threshold=-0.1)


class TestJCRRiskComputation:
    def test_critic_base_risk(self):
        gate = JCRSafetyGate()
        risk = gate.compute_jcr_risk(
            agent_role="critic",
            candidate_count=2,
            reuse_rate=0.5,
            layout_shuffled=False,
        )
        assert risk == pytest.approx(0.6)

    def test_non_critic_base_risk(self):
        gate = JCRSafetyGate()
        risk = gate.compute_jcr_risk(
            agent_role="retriever",
            candidate_count=2,
            reuse_rate=0.5,
            layout_shuffled=False,
        )
        assert risk == pytest.approx(0.1)

    def test_extra_candidates_increase_risk(self):
        gate = JCRSafetyGate()
        baseline = gate.compute_jcr_risk("critic", 2, 0.0, False)
        five = gate.compute_jcr_risk("critic", 5, 0.0, False)
        assert five == pytest.approx(baseline + 0.3)

    def test_layout_shuffled_increases_risk(self):
        gate = JCRSafetyGate()
        plain = gate.compute_jcr_risk("critic", 2, 0.0, False)
        shuffled = gate.compute_jcr_risk("critic", 2, 0.0, True)
        assert shuffled == pytest.approx(plain + 0.2)

    def test_high_reuse_rate_increases_risk(self):
        gate = JCRSafetyGate()
        low = gate.compute_jcr_risk("critic", 2, 0.5, False)
        high = gate.compute_jcr_risk("critic", 2, 0.95, False)
        assert high == pytest.approx(low + 0.15)

    def test_risk_clamped_to_one(self):
        gate = JCRSafetyGate()
        risk = gate.compute_jcr_risk(
            agent_role="critic",
            candidate_count=20,
            reuse_rate=1.0,
            layout_shuffled=True,
        )
        assert 0.0 <= risk <= 1.0
        assert risk == pytest.approx(1.0)

    def test_invalid_candidate_count_rejected(self):
        gate = JCRSafetyGate()
        with pytest.raises(ValueError, match="non-negative"):
            gate.compute_jcr_risk("critic", -1, 0.5, False)

    def test_invalid_reuse_rate_rejected(self):
        gate = JCRSafetyGate()
        with pytest.raises(ValueError, match="reuse_rate must be"):
            gate.compute_jcr_risk("critic", 2, 1.5, False)


class TestINV15CriticAlwaysDense:
    """INV-15: Critic with risk > threshold ALWAYS returns use_dense=True."""

    def test_critic_5_candidates_shuffle_uses_dense(self):
        gate = JCRSafetyGate()
        # Risk = 0.6 + 0.3 + 0.2 = 1.1 → clamped to 1.0 → > 0.7
        assert gate.should_use_dense_prefill(
            agent_role="critic",
            candidate_count=5,
            reuse_rate=0.5,
            layout_shuffled=True,
        ) is True

    def test_retriever_2_candidates_no_dense(self):
        gate = JCRSafetyGate()
        assert gate.should_use_dense_prefill(
            agent_role="retriever",
            candidate_count=2,
            reuse_rate=0.5,
            layout_shuffled=False,
        ) is False

    def test_non_critic_never_uses_dense_even_with_high_risk(self):
        """Non-judge roles aren't protected by INV-15."""
        gate = JCRSafetyGate()
        # Even with all risk knobs cranked up, a retriever passes through.
        assert gate.should_use_dense_prefill(
            agent_role="retriever",
            candidate_count=10,
            reuse_rate=1.0,
            layout_shuffled=True,
        ) is False

    @pytest.mark.parametrize("candidates,shuffle,reuse", [
        (5, True, 0.9),
        (4, True, 0.85),
        (8, False, 0.85),
        (10, True, 0.5),
    ])
    def test_critic_above_threshold_always_dense(self, candidates, shuffle, reuse):
        """Comprehensive sweep: Critic above threshold always dense (INV-15)."""
        gate = JCRSafetyGate()
        decision = gate.gate_decision(
            agent_role="critic",
            candidate_count=candidates,
            reuse_rate=reuse,
            layout_shuffled=shuffle,
        )
        if decision.risk_score > gate.jcr_threshold:
            assert decision.use_dense is True, (
                f"INV-15 violated: critic with risk {decision.risk_score} "
                f"> threshold {gate.jcr_threshold} did not get dense prefill"
            )

    def test_critic_exactly_at_threshold_uses_reuse(self):
        """Threshold is strict: > threshold triggers dense, not >=."""
        gate = JCRSafetyGate(jcr_threshold=0.6)
        # Critic, 2 candidates, no shuffle, low reuse → exactly 0.6
        decision = gate.gate_decision(
            agent_role="critic",
            candidate_count=2,
            reuse_rate=0.5,
            layout_shuffled=False,
        )
        assert decision.risk_score == pytest.approx(0.6)
        assert decision.use_dense is False


class TestGateDecisionLogging:
    def test_gate_decision_returns_structured_record(self):
        gate = JCRSafetyGate()
        decision = gate.gate_decision("critic", 5, 0.9, True)
        assert isinstance(decision, JCRDecision)
        assert decision.agent_role == "critic"
        assert decision.use_dense is True
        assert "INV-15" in decision.reason
        assert decision.timestamp > 0

    def test_log_accumulates(self):
        gate = JCRSafetyGate()
        for _ in range(3):
            gate.gate_decision("critic", 5, 0.9, True)
        gate.gate_decision("retriever", 2, 0.1, False)
        assert len(gate.gate_log) == 4

    def test_summary_aggregates(self):
        gate = JCRSafetyGate()
        gate.gate_decision("critic", 5, 0.9, True)   # dense
        gate.gate_decision("critic", 2, 0.1, False)  # reuse
        gate.gate_decision("retriever", 2, 0.1, False)  # reuse
        s = gate.summary()
        assert s["total_decisions"] == 3
        assert s["dense_fallback_count"] == 1
        # 2 critic decisions, 1 dense → 0.5
        assert s["critic_dense_rate"] == pytest.approx(0.5)
        assert 0.0 <= s["avg_risk_score"] <= 1.0

    def test_summary_empty_safe(self):
        gate = JCRSafetyGate()
        s = gate.summary()
        assert s["total_decisions"] == 0
        assert s["dense_fallback_count"] == 0
        assert s["avg_risk_score"] == 0.0
        assert s["critic_dense_rate"] == 0.0

    def test_role_case_insensitive(self):
        gate = JCRSafetyGate()
        # Upper-case role still resolves to "critic".
        decision = gate.gate_decision("CRITIC", 5, 0.9, True)
        assert decision.agent_role == "critic"
        assert decision.use_dense is True