chakravyuh / tests /test_negotiation.py
UjjwalPardeshi
deploy: latest main to HF Space
03815d6
"""Bank–Analyzer negotiation protocol tests (C.6).
Verifies the optional Analyzer↔Bank consultation hook:
1. Backward compat: ``enable_negotiation=False`` (default) is identical to
pre-C.6 behavior — no consultation is sent, no bank decision changes.
2. Forward effect: with ``enable_negotiation=True`` and a high-confidence
analyzer flag, the bank's combined risk crosses the freeze threshold on
borderline transactions.
3. Containment: the consultation surface contains *no* chat content — only
score/signals/threshold/flagged. This is the privacy invariant that
keeps the on-device deployment story consistent.
Spec: ``docs/negotiation_protocol.md``.
"""
from __future__ import annotations
import pytest
from chakravyuh_env.agents.bank_monitor import ScriptedBankMonitor
from chakravyuh_env.openenv_environment import ChakravyuhOpenEnv
from chakravyuh_env.openenv_models import ChakravyuhAction
from chakravyuh_env.schemas import (
AnalyzerConsultation,
BankApprove,
BankFlag,
BankFreeze,
Observation,
TransactionMeta,
)
# --- consultation surface (privacy contract) ---------------------------------
@pytest.mark.unit
def test_consultation_carries_no_chat_content() -> None:
"""The consultation type only exposes score/signals/threshold/flagged."""
consultation = AnalyzerConsultation(
score=0.92,
signals=("urgency", "info_request"),
threshold=0.55,
flagged=True,
)
fields = set(AnalyzerConsultation.model_fields.keys())
assert fields == {"score", "signals", "threshold", "flagged"}, (
"consultation surface must not grow chat-content fields without explicit review"
)
# --- bank-side risk combiner -------------------------------------------------
def _borderline_tx() -> TransactionMeta:
"""Borderline transaction — bank-only risk is just under the freeze threshold."""
return TransactionMeta(
amount=10000.0,
receiver_new=True,
receiver_id_hash="rx_test",
frequency_24h=0,
)
def _bank_obs(tx: TransactionMeta) -> Observation:
return Observation(agent_role="bank", turn=8, transaction=tx)
@pytest.mark.unit
def test_bank_without_consultation_matches_act() -> None:
"""`act_with_consultation` w/o consultation must equal the original `act`."""
bank = ScriptedBankMonitor(seed=42)
obs = _bank_obs(_borderline_tx())
a1 = bank.act(obs)
a2 = bank._decide(obs, consultation=None)
assert type(a1) is type(a2)
assert a1.model_dump() == a2.model_dump()
@pytest.mark.unit
def test_high_analyzer_score_escalates_bank_decision() -> None:
"""A confident analyzer flag pushes a borderline tx toward freeze/flag."""
bank = ScriptedBankMonitor(seed=42)
obs = _bank_obs(_borderline_tx())
no_consult = bank.act(obs)
with_consult = bank.act_with_consultation(
obs,
AnalyzerConsultation(score=0.95, signals=("urgency",), flagged=True),
)
# Either the action class strengthens (Approve→Flag, or Flag→Freeze)
# or the same-class confidence rises (Flag confidence increases).
if isinstance(no_consult, BankApprove):
assert isinstance(with_consult, (BankFlag, BankFreeze))
elif isinstance(no_consult, BankFlag) and isinstance(with_consult, BankFlag):
assert with_consult.confidence >= no_consult.confidence
else: # already Freeze in baseline → just stay Freeze
assert isinstance(with_consult, BankFreeze)
@pytest.mark.unit
def test_low_analyzer_score_does_not_invent_risk() -> None:
"""A low analyzer score must not push a clean tx into Flag/Freeze."""
bank = ScriptedBankMonitor(seed=42)
clean_tx = TransactionMeta(
amount=500.0,
receiver_new=False,
receiver_id_hash="rx_clean",
frequency_24h=0,
)
obs = _bank_obs(clean_tx)
action = bank.act_with_consultation(
obs,
AnalyzerConsultation(score=0.05, signals=(), flagged=False),
)
assert isinstance(action, BankApprove)
# --- env-level integration ---------------------------------------------------
@pytest.mark.unit
def test_env_negotiation_disabled_is_default() -> None:
"""`ChakravyuhOpenEnv()` must default to negotiation disabled."""
env = ChakravyuhOpenEnv()
assert env._enable_negotiation is False
@pytest.mark.unit
def test_env_negotiation_flag_round_trip() -> None:
"""The flag is settable and survives reset() without resetting state."""
env = ChakravyuhOpenEnv(enable_negotiation=True)
env.reset(seed=7)
assert env._enable_negotiation is True
@pytest.mark.unit
def test_env_with_negotiation_completes_episode() -> None:
"""End-to-end smoke: an episode with negotiation enabled terminates cleanly."""
env = ChakravyuhOpenEnv(enable_negotiation=True)
obs = env.reset(seed=11)
assert obs is not None
# Two analyzer decisions (turn 3 + turn 6); the second triggers
# transaction → bank consultation → outcome.
obs1 = env.step(ChakravyuhAction(score=0.92, signals=["urgency"]))
if not obs1.done:
obs2 = env.step(ChakravyuhAction(score=0.95, signals=["impersonation"]))
assert obs2.done
else:
assert obs1.done