File size: 5,250 Bytes
03815d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""Bank–Analyzer negotiation protocol tests (C.6).

Verifies the optional Analyzer↔Bank consultation hook:

  1. Backward compat: ``enable_negotiation=False`` (default) is identical to
     pre-C.6 behavior — no consultation is sent, no bank decision changes.
  2. Forward effect: with ``enable_negotiation=True`` and a high-confidence
     analyzer flag, the bank's combined risk crosses the freeze threshold on
     borderline transactions.
  3. Containment: the consultation surface contains *no* chat content — only
     score/signals/threshold/flagged. This is the privacy invariant that
     keeps the on-device deployment story consistent.

Spec: ``docs/negotiation_protocol.md``.
"""

from __future__ import annotations

import pytest

from chakravyuh_env.agents.bank_monitor import ScriptedBankMonitor
from chakravyuh_env.openenv_environment import ChakravyuhOpenEnv
from chakravyuh_env.openenv_models import ChakravyuhAction
from chakravyuh_env.schemas import (
    AnalyzerConsultation,
    BankApprove,
    BankFlag,
    BankFreeze,
    Observation,
    TransactionMeta,
)


# --- consultation surface (privacy contract) ---------------------------------


@pytest.mark.unit
def test_consultation_carries_no_chat_content() -> None:
    """The consultation type only exposes score/signals/threshold/flagged."""
    consultation = AnalyzerConsultation(
        score=0.92,
        signals=("urgency", "info_request"),
        threshold=0.55,
        flagged=True,
    )
    fields = set(AnalyzerConsultation.model_fields.keys())
    assert fields == {"score", "signals", "threshold", "flagged"}, (
        "consultation surface must not grow chat-content fields without explicit review"
    )


# --- bank-side risk combiner -------------------------------------------------


def _borderline_tx() -> TransactionMeta:
    """Borderline transaction — bank-only risk is just under the freeze threshold."""
    return TransactionMeta(
        amount=10000.0,
        receiver_new=True,
        receiver_id_hash="rx_test",
        frequency_24h=0,
    )


def _bank_obs(tx: TransactionMeta) -> Observation:
    return Observation(agent_role="bank", turn=8, transaction=tx)


@pytest.mark.unit
def test_bank_without_consultation_matches_act() -> None:
    """`act_with_consultation` w/o consultation must equal the original `act`."""
    bank = ScriptedBankMonitor(seed=42)
    obs = _bank_obs(_borderline_tx())

    a1 = bank.act(obs)
    a2 = bank._decide(obs, consultation=None)
    assert type(a1) is type(a2)
    assert a1.model_dump() == a2.model_dump()


@pytest.mark.unit
def test_high_analyzer_score_escalates_bank_decision() -> None:
    """A confident analyzer flag pushes a borderline tx toward freeze/flag."""
    bank = ScriptedBankMonitor(seed=42)
    obs = _bank_obs(_borderline_tx())

    no_consult = bank.act(obs)
    with_consult = bank.act_with_consultation(
        obs,
        AnalyzerConsultation(score=0.95, signals=("urgency",), flagged=True),
    )

    # Either the action class strengthens (Approve→Flag, or Flag→Freeze)
    # or the same-class confidence rises (Flag confidence increases).
    if isinstance(no_consult, BankApprove):
        assert isinstance(with_consult, (BankFlag, BankFreeze))
    elif isinstance(no_consult, BankFlag) and isinstance(with_consult, BankFlag):
        assert with_consult.confidence >= no_consult.confidence
    else:  # already Freeze in baseline → just stay Freeze
        assert isinstance(with_consult, BankFreeze)


@pytest.mark.unit
def test_low_analyzer_score_does_not_invent_risk() -> None:
    """A low analyzer score must not push a clean tx into Flag/Freeze."""
    bank = ScriptedBankMonitor(seed=42)
    clean_tx = TransactionMeta(
        amount=500.0,
        receiver_new=False,
        receiver_id_hash="rx_clean",
        frequency_24h=0,
    )
    obs = _bank_obs(clean_tx)

    action = bank.act_with_consultation(
        obs,
        AnalyzerConsultation(score=0.05, signals=(), flagged=False),
    )
    assert isinstance(action, BankApprove)


# --- env-level integration ---------------------------------------------------


@pytest.mark.unit
def test_env_negotiation_disabled_is_default() -> None:
    """`ChakravyuhOpenEnv()` must default to negotiation disabled."""
    env = ChakravyuhOpenEnv()
    assert env._enable_negotiation is False


@pytest.mark.unit
def test_env_negotiation_flag_round_trip() -> None:
    """The flag is settable and survives reset() without resetting state."""
    env = ChakravyuhOpenEnv(enable_negotiation=True)
    env.reset(seed=7)
    assert env._enable_negotiation is True


@pytest.mark.unit
def test_env_with_negotiation_completes_episode() -> None:
    """End-to-end smoke: an episode with negotiation enabled terminates cleanly."""
    env = ChakravyuhOpenEnv(enable_negotiation=True)
    obs = env.reset(seed=11)
    assert obs is not None
    # Two analyzer decisions (turn 3 + turn 6); the second triggers
    # transaction → bank consultation → outcome.
    obs1 = env.step(ChakravyuhAction(score=0.92, signals=["urgency"]))
    if not obs1.done:
        obs2 = env.step(ChakravyuhAction(score=0.95, signals=["impersonation"]))
        assert obs2.done
    else:
        assert obs1.done