Spaces:
Running
Running
| """Bank–Analyzer negotiation protocol tests (C.6). | |
| Verifies the optional Analyzer↔Bank consultation hook: | |
| 1. Backward compat: ``enable_negotiation=False`` (default) is identical to | |
| pre-C.6 behavior — no consultation is sent, no bank decision changes. | |
| 2. Forward effect: with ``enable_negotiation=True`` and a high-confidence | |
| analyzer flag, the bank's combined risk crosses the freeze threshold on | |
| borderline transactions. | |
| 3. Containment: the consultation surface contains *no* chat content — only | |
| score/signals/threshold/flagged. This is the privacy invariant that | |
| keeps the on-device deployment story consistent. | |
| Spec: ``docs/negotiation_protocol.md``. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from chakravyuh_env.agents.bank_monitor import ScriptedBankMonitor | |
| from chakravyuh_env.openenv_environment import ChakravyuhOpenEnv | |
| from chakravyuh_env.openenv_models import ChakravyuhAction | |
| from chakravyuh_env.schemas import ( | |
| AnalyzerConsultation, | |
| BankApprove, | |
| BankFlag, | |
| BankFreeze, | |
| Observation, | |
| TransactionMeta, | |
| ) | |
| # --- consultation surface (privacy contract) --------------------------------- | |
| def test_consultation_carries_no_chat_content() -> None: | |
| """The consultation type only exposes score/signals/threshold/flagged.""" | |
| consultation = AnalyzerConsultation( | |
| score=0.92, | |
| signals=("urgency", "info_request"), | |
| threshold=0.55, | |
| flagged=True, | |
| ) | |
| fields = set(AnalyzerConsultation.model_fields.keys()) | |
| assert fields == {"score", "signals", "threshold", "flagged"}, ( | |
| "consultation surface must not grow chat-content fields without explicit review" | |
| ) | |
| # --- bank-side risk combiner ------------------------------------------------- | |
| def _borderline_tx() -> TransactionMeta: | |
| """Borderline transaction — bank-only risk is just under the freeze threshold.""" | |
| return TransactionMeta( | |
| amount=10000.0, | |
| receiver_new=True, | |
| receiver_id_hash="rx_test", | |
| frequency_24h=0, | |
| ) | |
| def _bank_obs(tx: TransactionMeta) -> Observation: | |
| return Observation(agent_role="bank", turn=8, transaction=tx) | |
| def test_bank_without_consultation_matches_act() -> None: | |
| """`act_with_consultation` w/o consultation must equal the original `act`.""" | |
| bank = ScriptedBankMonitor(seed=42) | |
| obs = _bank_obs(_borderline_tx()) | |
| a1 = bank.act(obs) | |
| a2 = bank._decide(obs, consultation=None) | |
| assert type(a1) is type(a2) | |
| assert a1.model_dump() == a2.model_dump() | |
| def test_high_analyzer_score_escalates_bank_decision() -> None: | |
| """A confident analyzer flag pushes a borderline tx toward freeze/flag.""" | |
| bank = ScriptedBankMonitor(seed=42) | |
| obs = _bank_obs(_borderline_tx()) | |
| no_consult = bank.act(obs) | |
| with_consult = bank.act_with_consultation( | |
| obs, | |
| AnalyzerConsultation(score=0.95, signals=("urgency",), flagged=True), | |
| ) | |
| # Either the action class strengthens (Approve→Flag, or Flag→Freeze) | |
| # or the same-class confidence rises (Flag confidence increases). | |
| if isinstance(no_consult, BankApprove): | |
| assert isinstance(with_consult, (BankFlag, BankFreeze)) | |
| elif isinstance(no_consult, BankFlag) and isinstance(with_consult, BankFlag): | |
| assert with_consult.confidence >= no_consult.confidence | |
| else: # already Freeze in baseline → just stay Freeze | |
| assert isinstance(with_consult, BankFreeze) | |
| def test_low_analyzer_score_does_not_invent_risk() -> None: | |
| """A low analyzer score must not push a clean tx into Flag/Freeze.""" | |
| bank = ScriptedBankMonitor(seed=42) | |
| clean_tx = TransactionMeta( | |
| amount=500.0, | |
| receiver_new=False, | |
| receiver_id_hash="rx_clean", | |
| frequency_24h=0, | |
| ) | |
| obs = _bank_obs(clean_tx) | |
| action = bank.act_with_consultation( | |
| obs, | |
| AnalyzerConsultation(score=0.05, signals=(), flagged=False), | |
| ) | |
| assert isinstance(action, BankApprove) | |
| # --- env-level integration --------------------------------------------------- | |
| def test_env_negotiation_disabled_is_default() -> None: | |
| """`ChakravyuhOpenEnv()` must default to negotiation disabled.""" | |
| env = ChakravyuhOpenEnv() | |
| assert env._enable_negotiation is False | |
| def test_env_negotiation_flag_round_trip() -> None: | |
| """The flag is settable and survives reset() without resetting state.""" | |
| env = ChakravyuhOpenEnv(enable_negotiation=True) | |
| env.reset(seed=7) | |
| assert env._enable_negotiation is True | |
| def test_env_with_negotiation_completes_episode() -> None: | |
| """End-to-end smoke: an episode with negotiation enabled terminates cleanly.""" | |
| env = ChakravyuhOpenEnv(enable_negotiation=True) | |
| obs = env.reset(seed=11) | |
| assert obs is not None | |
| # Two analyzer decisions (turn 3 + turn 6); the second triggers | |
| # transaction → bank consultation → outcome. | |
| obs1 = env.step(ChakravyuhAction(score=0.92, signals=["urgency"])) | |
| if not obs1.done: | |
| obs2 = env.step(ChakravyuhAction(score=0.95, signals=["impersonation"])) | |
| assert obs2.done | |
| else: | |
| assert obs1.done | |