Spaces:
Running
Running
File size: 5,250 Bytes
03815d6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | """Bank–Analyzer negotiation protocol tests (C.6).
Verifies the optional Analyzer↔Bank consultation hook:
1. Backward compat: ``enable_negotiation=False`` (default) is identical to
pre-C.6 behavior — no consultation is sent, no bank decision changes.
2. Forward effect: with ``enable_negotiation=True`` and a high-confidence
analyzer flag, the bank's combined risk crosses the freeze threshold on
borderline transactions.
3. Containment: the consultation surface contains *no* chat content — only
score/signals/threshold/flagged. This is the privacy invariant that
keeps the on-device deployment story consistent.
Spec: ``docs/negotiation_protocol.md``.
"""
from __future__ import annotations
import pytest
from chakravyuh_env.agents.bank_monitor import ScriptedBankMonitor
from chakravyuh_env.openenv_environment import ChakravyuhOpenEnv
from chakravyuh_env.openenv_models import ChakravyuhAction
from chakravyuh_env.schemas import (
AnalyzerConsultation,
BankApprove,
BankFlag,
BankFreeze,
Observation,
TransactionMeta,
)
# --- consultation surface (privacy contract) ---------------------------------
@pytest.mark.unit
def test_consultation_carries_no_chat_content() -> None:
"""The consultation type only exposes score/signals/threshold/flagged."""
consultation = AnalyzerConsultation(
score=0.92,
signals=("urgency", "info_request"),
threshold=0.55,
flagged=True,
)
fields = set(AnalyzerConsultation.model_fields.keys())
assert fields == {"score", "signals", "threshold", "flagged"}, (
"consultation surface must not grow chat-content fields without explicit review"
)
# --- bank-side risk combiner -------------------------------------------------
def _borderline_tx() -> TransactionMeta:
"""Borderline transaction — bank-only risk is just under the freeze threshold."""
return TransactionMeta(
amount=10000.0,
receiver_new=True,
receiver_id_hash="rx_test",
frequency_24h=0,
)
def _bank_obs(tx: TransactionMeta) -> Observation:
return Observation(agent_role="bank", turn=8, transaction=tx)
@pytest.mark.unit
def test_bank_without_consultation_matches_act() -> None:
"""`act_with_consultation` w/o consultation must equal the original `act`."""
bank = ScriptedBankMonitor(seed=42)
obs = _bank_obs(_borderline_tx())
a1 = bank.act(obs)
a2 = bank._decide(obs, consultation=None)
assert type(a1) is type(a2)
assert a1.model_dump() == a2.model_dump()
@pytest.mark.unit
def test_high_analyzer_score_escalates_bank_decision() -> None:
"""A confident analyzer flag pushes a borderline tx toward freeze/flag."""
bank = ScriptedBankMonitor(seed=42)
obs = _bank_obs(_borderline_tx())
no_consult = bank.act(obs)
with_consult = bank.act_with_consultation(
obs,
AnalyzerConsultation(score=0.95, signals=("urgency",), flagged=True),
)
# Either the action class strengthens (Approve→Flag, or Flag→Freeze)
# or the same-class confidence rises (Flag confidence increases).
if isinstance(no_consult, BankApprove):
assert isinstance(with_consult, (BankFlag, BankFreeze))
elif isinstance(no_consult, BankFlag) and isinstance(with_consult, BankFlag):
assert with_consult.confidence >= no_consult.confidence
else: # already Freeze in baseline → just stay Freeze
assert isinstance(with_consult, BankFreeze)
@pytest.mark.unit
def test_low_analyzer_score_does_not_invent_risk() -> None:
"""A low analyzer score must not push a clean tx into Flag/Freeze."""
bank = ScriptedBankMonitor(seed=42)
clean_tx = TransactionMeta(
amount=500.0,
receiver_new=False,
receiver_id_hash="rx_clean",
frequency_24h=0,
)
obs = _bank_obs(clean_tx)
action = bank.act_with_consultation(
obs,
AnalyzerConsultation(score=0.05, signals=(), flagged=False),
)
assert isinstance(action, BankApprove)
# --- env-level integration ---------------------------------------------------
@pytest.mark.unit
def test_env_negotiation_disabled_is_default() -> None:
"""`ChakravyuhOpenEnv()` must default to negotiation disabled."""
env = ChakravyuhOpenEnv()
assert env._enable_negotiation is False
@pytest.mark.unit
def test_env_negotiation_flag_round_trip() -> None:
"""The flag is settable and survives reset() without resetting state."""
env = ChakravyuhOpenEnv(enable_negotiation=True)
env.reset(seed=7)
assert env._enable_negotiation is True
@pytest.mark.unit
def test_env_with_negotiation_completes_episode() -> None:
"""End-to-end smoke: an episode with negotiation enabled terminates cleanly."""
env = ChakravyuhOpenEnv(enable_negotiation=True)
obs = env.reset(seed=11)
assert obs is not None
# Two analyzer decisions (turn 3 + turn 6); the second triggers
# transaction → bank consultation → outcome.
obs1 = env.step(ChakravyuhAction(score=0.92, signals=["urgency"]))
if not obs1.done:
obs2 = env.step(ChakravyuhAction(score=0.95, signals=["impersonation"]))
assert obs2.done
else:
assert obs1.done
|