"""Tests for the ClaimSense Pro Adjudication Gym. 34 tests ported verbatim (semantically) from ``insurance_agent_rl/tests/test_environment.py`` plus 5 new ones that exercise the VERIFY_PURCHASE verb, the Plaid fallback, the 6-component score bound, and a constant-policy collapse across the 5-task mix. """ import os import random import sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from server.claims_environment import ( ClaimsEnvProEnvironment, Claim, DENSE_PLAID_DISCREPANCY_BONUS, TASKS, TASK_INDEX, ) from server.plaid_mock import BankProbeStub, get_bank_probe from models import ClaimsAction # ============================================================================= # 1. Reset (4) # ============================================================================= class TestEnvironmentReset: def test_reset_returns_observation(self): env = ClaimsEnvProEnvironment() obs = env.reset(seed=42) assert obs is not None assert obs.step_number == 0 assert obs.claims_processed == 0 assert obs.correct_decisions == 0 assert obs.wrong_decisions == 0 def test_reset_deterministic_with_same_seed(self): env1 = ClaimsEnvProEnvironment() obs1 = env1.reset(seed=42) env2 = ClaimsEnvProEnvironment() obs2 = env2.reset(seed=42) assert obs1.dashboard == obs2.dashboard assert obs1.claims_in_queue == obs2.claims_in_queue def test_reset_different_seeds_different_states(self): env = ClaimsEnvProEnvironment() obs1 = env.reset(seed=2) d1 = obs1.dashboard obs2 = env.reset(seed=9) d2 = obs2.dashboard assert d1 != d2 def test_reset_clears_previous_state(self): env = ClaimsEnvProEnvironment() env.reset(seed=2) env.step(ClaimsAction(message="VIEW_QUEUE")) env.step(ClaimsAction(message="OPEN_CLAIM 1")) obs = env.reset(seed=2) assert obs.step_number == 0 assert obs.claims_processed == 0 assert obs.active_claim_id == -1 # ============================================================================= # 2. Task config (5) # ============================================================================= class TestTasks: def test_five_tasks_exist(self): assert len(TASKS) == 5 def test_task_names(self): expected = { "routine_monday", "storm_surge", "multi_vehicle_pileup", "fraud_ring_day", "catastrophe_weekend", } assert set(TASKS.keys()) == expected def test_difficulty_progression(self): difficulties = [TASKS[t]["difficulty"] for t in TASKS] assert "easy" in difficulties assert "medium" in difficulties assert "hard" in difficulties assert "expert" in difficulties def test_claim_counts_increase(self): assert TASKS["catastrophe_weekend"]["num_claims"] > TASKS["routine_monday"]["num_claims"] def test_each_task_loads_via_seed(self): env = ClaimsEnvProEnvironment() seen = set() for seed in range(1, 200): obs = env.reset(seed=seed) seen.add(obs.task_name) if len(seen) == 5: break assert len(seen) == 5 # ============================================================================= # 3. Commands (13) # ============================================================================= class TestCommands: def _setup_env(self, seed=2): env = ClaimsEnvProEnvironment() env.reset(seed=seed) return env def test_view_queue(self): env = self._setup_env() obs = env.step(ClaimsAction(message="VIEW_QUEUE")) assert "CLAIMS QUEUE" in obs.dashboard assert obs.step_number == 1 def test_open_claim(self): env = self._setup_env() obs = env.step(ClaimsAction(message="OPEN_CLAIM 1")) assert "CLAIM #1" in obs.dashboard assert obs.active_claim_id == 1 def test_open_nonexistent_claim(self): env = self._setup_env() obs = env.step(ClaimsAction(message="OPEN_CLAIM 999")) assert "not found" in obs.dashboard def test_review_documents_without_open_claim(self): env = self._setup_env() obs = env.step(ClaimsAction(message="REVIEW_DOCUMENTS")) assert "No claim open" in obs.dashboard def test_review_documents(self): env = self._setup_env() env.step(ClaimsAction(message="OPEN_CLAIM 1")) obs = env.step(ClaimsAction(message="REVIEW_DOCUMENTS")) assert "DOCUMENT REVIEW" in obs.dashboard def test_check_policy(self): env = self._setup_env() env.step(ClaimsAction(message="OPEN_CLAIM 1")) obs = env.step(ClaimsAction(message="CHECK_POLICY")) assert "POLICY CHECK" in obs.dashboard assert "APPROVE" in obs.dashboard or "DENY" in obs.dashboard def test_investigate_fraud(self): env = self._setup_env() env.step(ClaimsAction(message="OPEN_CLAIM 1")) obs = env.step(ClaimsAction(message="INVESTIGATE_FRAUD")) assert "FRAUD INVESTIGATION" in obs.dashboard assert "FRAUD RISK SCORE" in obs.dashboard def test_approve_claim(self): env = self._setup_env() env.step(ClaimsAction(message="OPEN_CLAIM 1")) env.step(ClaimsAction(message="REVIEW_DOCUMENTS")) env.step(ClaimsAction(message="CHECK_POLICY")) env.step(ClaimsAction(message="INVESTIGATE_FRAUD")) obs = env.step(ClaimsAction(message="APPROVE 10000")) assert "DECISION" in obs.dashboard assert "APPROVED" in obs.dashboard assert obs.claims_processed == 1 def test_deny_claim(self): env = self._setup_env() env.step(ClaimsAction(message="OPEN_CLAIM 1")) obs = env.step(ClaimsAction(message="DENY fraud_detected")) assert "DENIED" in obs.dashboard assert obs.claims_processed == 1 def test_end_shift(self): env = self._setup_env() obs = env.step(ClaimsAction(message="END_SHIFT")) assert obs.done is True assert "SHIFT COMPLETE" in obs.dashboard def test_semicolon_multi_command(self): env = self._setup_env() obs = env.step(ClaimsAction( message="OPEN_CLAIM 1; REVIEW_DOCUMENTS; CHECK_POLICY" )) assert "CLAIM #1" in obs.dashboard assert "DOCUMENT REVIEW" in obs.dashboard assert "POLICY CHECK" in obs.dashboard def test_max_three_commands_per_step(self): env = self._setup_env() obs = env.step(ClaimsAction( message="VIEW_QUEUE; OPEN_CLAIM 1; REVIEW_DOCUMENTS; CHECK_POLICY" )) # 4th command (CHECK_POLICY) dropped. assert "CLAIMS QUEUE" in obs.dashboard assert "CLAIM #1" in obs.dashboard assert "DOCUMENT REVIEW" in obs.dashboard assert "POLICY CHECK" not in obs.dashboard def test_unknown_command(self): env = self._setup_env() obs = env.step(ClaimsAction(message="FOOBAR")) assert "Unknown command" in obs.dashboard # ============================================================================= # 4. Grading (4) # ============================================================================= class TestGrading: def test_reward_between_0_and_1(self): env = ClaimsEnvProEnvironment() env.reset(seed=2) for _ in range(5): obs = env.step(ClaimsAction(message="VIEW_QUEUE")) assert 0.0 <= obs.reward <= 1.0 def test_final_reward_on_end_shift(self): env = ClaimsEnvProEnvironment() env.reset(seed=2) obs = env.step(ClaimsAction(message="END_SHIFT")) assert obs.done is True assert 0.0 <= obs.reward <= 1.0 def test_perfect_run_scores_high(self): env = ClaimsEnvProEnvironment() env.reset(seed=2) for cid in list(env._claims.keys()): claim = env._claims[cid] if claim.appeal_pending: env.step(ClaimsAction(message=f"HANDLE_APPEAL {cid}")) else: env.step(ClaimsAction(message=f"OPEN_CLAIM {cid}")) env.step(ClaimsAction(message="REVIEW_DOCUMENTS; CHECK_POLICY")) env.step(ClaimsAction(message="INVESTIGATE_FRAUD")) if claim.correct_decision == "approve": env.step(ClaimsAction(message=f"APPROVE {claim.correct_payout:.2f}")) else: env.step(ClaimsAction(message=f"DENY {claim.deny_reason}")) obs = env.step(ClaimsAction(message="END_SHIFT")) assert obs.done is True # Perfect play across all 6 components should score very high. assert obs.reward >= 0.85, f"Perfect play scored {obs.reward}" def test_doing_nothing_scores_low(self): env = ClaimsEnvProEnvironment() env.reset(seed=2) obs = env.step(ClaimsAction(message="END_SHIFT")) assert obs.reward < 0.50 # ============================================================================= # 5. Episode boundaries (4) # ============================================================================= class TestEpisodeBoundaries: def test_done_false_initially(self): env = ClaimsEnvProEnvironment() obs = env.reset(seed=2) assert obs.done is False def test_done_on_end_shift(self): env = ClaimsEnvProEnvironment() env.reset(seed=2) obs = env.step(ClaimsAction(message="END_SHIFT")) assert obs.done is True def test_done_at_max_steps(self): env = ClaimsEnvProEnvironment() env.reset(seed=2) obs = None for _ in range(60): obs = env.step(ClaimsAction(message="VIEW_QUEUE")) if obs.done: break assert obs.done is True assert obs.step_number <= 50 def test_step_after_done_not_possible(self): env = ClaimsEnvProEnvironment() env.reset(seed=2) obs1 = env.step(ClaimsAction(message="END_SHIFT")) assert obs1.done is True # ============================================================================= # 6. Claim generation (4) # ============================================================================= class TestClaimGeneration: def test_claim_has_required_fields(self): rng = random.Random(42) c = Claim(1, rng, "easy") assert c.id == 1 assert c.claimant != "" assert c.claim_type in [ "auto_collision", "auto_theft", "health_emergency", "health_procedure", "property_fire", "property_water", "property_theft", "liability_slip_fall", "liability_product", "workers_comp", ] assert c.priority in ["low", "medium", "high", "urgent"] assert c.claimed_amount > 0 assert c.policy_limit > 0 assert c.correct_decision in ["approve", "deny"] def test_fraudulent_claim_should_be_denied(self): for seed in range(100): rng = random.Random(seed) c = Claim(1, rng, "hard") if c.is_fraudulent: assert c.correct_decision == "deny" assert c.correct_payout == 0.0 assert len(c.fraud_signals) > 0 return assert False, "No fraudulent claim found in 100 seeds" def test_lapsed_policy_should_be_denied(self): for seed in range(1000): rng = random.Random(seed) c = Claim(1, rng, "easy") if not c.policy_active: assert c.correct_decision == "deny" assert c.correct_payout == 0.0 return assert False, "No lapsed policy found in 1000 seeds" def test_valid_claim_payout_calculation(self): for seed in range(100): rng = random.Random(seed) c = Claim(1, rng, "easy") if c.correct_decision == "approve": expected = max(0, min(c.claimed_amount, c.policy_limit) - c.deductible) assert abs(c.correct_payout - expected) < 0.01 return assert False, "No valid claim found" # ============================================================================= # 7. NEW — VERIFY_PURCHASE + Plaid + 6-component bound + constant-policy (5) # ============================================================================= class TestPlaidVerifyPurchase: """5 new tests exclusive to claims-env-pro.""" def _find_seed_with_fraud(self) -> int: """Find a seed whose first claim is fraudulent so VERIFY surfaces it.""" for seed in range(1, 500): env = ClaimsEnvProEnvironment() env.reset(seed=seed) first = env._claims.get(1) if first and first.is_fraudulent: return seed return 1 def _find_seed_with_clean_claim(self) -> int: """Find a seed whose first claim is non-fraudulent and active.""" for seed in range(1, 500): env = ClaimsEnvProEnvironment() env.reset(seed=seed) first = env._claims.get(1) if first and not first.is_fraudulent and first.policy_active: return seed return 1 def test_verify_purchase_bonus_on_discrepancy(self): """+2 dense bonus when Plaid surfaces a discrepancy on a fraud claim.""" seed = self._find_seed_with_fraud() env = ClaimsEnvProEnvironment() env.reset(seed=seed) env.step(ClaimsAction(message="OPEN_CLAIM 1")) obs = env.step(ClaimsAction(message="VERIFY_PURCHASE")) # The mock biases discrepancy when fraud=True so we expect a hit. # Dense reward = -0.30 (cost) + 2.00 (bonus) = +1.70 on first surface. assert obs.dense_step_reward >= DENSE_PLAID_DISCREPANCY_BONUS - 0.50, ( f"expected dense bonus >= {DENSE_PLAID_DISCREPANCY_BONUS - 0.5}, " f"got {obs.dense_step_reward}" ) # And revealed_info should contain the plaid hit. assert "plaid" in obs.revealed_info assert obs.revealed_info["plaid"]["discrepancy"] is True def test_verify_purchase_no_bonus_when_no_discrepancy(self): """No +2 bonus when claim is clean (mock returns no discrepancy).""" seed = self._find_seed_with_clean_claim() env = ClaimsEnvProEnvironment() env.reset(seed=seed) env.step(ClaimsAction(message="OPEN_CLAIM 1")) obs = env.step(ClaimsAction(message="VERIFY_PURCHASE")) # Dense reward should be just the cost (-0.30) — no +2 bonus. assert obs.dense_step_reward < DENSE_PLAID_DISCREPANCY_BONUS / 2.0 assert obs.dense_step_reward <= 0.0 # no bonus on clean assert "plaid" in obs.revealed_info def test_plaid_falls_back_to_mock_when_unset(self): """get_bank_probe() returns BankProbeStub when PLAID_CLIENT_ID missing.""" original = os.environ.pop("PLAID_CLIENT_ID", None) try: client = get_bank_probe() assert isinstance(client, BankProbeStub) finally: if original is not None: os.environ["PLAID_CLIENT_ID"] = original def test_six_component_score_sums_le_1(self): """The 6-component graded final score must be in [0, 1].""" env = ClaimsEnvProEnvironment() env.reset(seed=2) # Drive a perfect episode (same as test_perfect_run_scores_high). for cid in list(env._claims.keys()): claim = env._claims[cid] if claim.appeal_pending: env.step(ClaimsAction(message=f"HANDLE_APPEAL {cid}")) else: env.step(ClaimsAction(message=f"OPEN_CLAIM {cid}")) env.step(ClaimsAction(message="REVIEW_DOCUMENTS; CHECK_POLICY")) env.step(ClaimsAction(message="INVESTIGATE_FRAUD")) if claim.correct_decision == "approve": env.step(ClaimsAction(message=f"APPROVE {claim.correct_payout:.2f}")) else: env.step(ClaimsAction(message=f"DENY {claim.deny_reason}")) obs = env.step(ClaimsAction(message="END_SHIFT")) # final_score field must exist and be bounded. assert obs.final_score >= 0.0 assert obs.final_score <= 1.0 + 1e-9, f"final_score {obs.final_score} > 1" def test_constant_approve_policy_collapses_across_tasks(self): """A 'always APPROVE 0' agent gets shredded across the 5-task mix. Compared against a thinking-policy upper bound of ~0.85 on perfect play (test_perfect_run_scores_high). The constant policy clears decision_accuracy on the easy task (because most claims are valid), which floors the mean at ~0.50 — well below the perfect-play ceiling and well above what the trained agent we ship will hit baseline-without-thinking. Asserted bound: <0.55, computed mean. """ scores = [] for sidx in range(5): for s in range(6): env = ClaimsEnvProEnvironment() env.reset(seed=sidx * 13 + s + 7, scenario_index=sidx) for cid in list(env._claims.keys()): claim = env._claims[cid] if claim.appeal_pending: env.step(ClaimsAction(message=f"HANDLE_APPEAL {cid}")) else: env.step(ClaimsAction(message=f"OPEN_CLAIM {cid}")) env.step(ClaimsAction(message="APPROVE 0")) obs = env.step(ClaimsAction(message="END_SHIFT")) scores.append(obs.final_score) mean_score = sum(scores) / len(scores) assert mean_score < 0.55, ( f"Constant-APPROVE-0 policy averaged {mean_score:.3f} across " f"30 episodes (expected <0.55, far from the 0.85+ perfect-play " f"ceiling)." )