claims-env-pro / tests /test_environment.py
akhiilll's picture
claims-env-pro v2.0.0 — initial port (12 verbs / 5 tasks / 18 obs / 6-comp reward / Plaid)
027ea1a verified
"""Tests for the ClaimSense Pro Adjudication Gym.
34 tests ported verbatim (semantically) from
``insurance_agent_rl/tests/test_environment.py`` plus 5 new ones that
exercise the VERIFY_PURCHASE verb, the Plaid fallback, the 6-component
score bound, and a constant-policy collapse across the 5-task mix.
"""
import os
import random
import sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from server.claims_environment import (
ClaimsEnvProEnvironment,
Claim,
DENSE_PLAID_DISCREPANCY_BONUS,
TASKS,
TASK_INDEX,
)
from server.plaid_mock import BankProbeStub, get_bank_probe
from models import ClaimsAction
# =============================================================================
# 1. Reset (4)
# =============================================================================
class TestEnvironmentReset:
def test_reset_returns_observation(self):
env = ClaimsEnvProEnvironment()
obs = env.reset(seed=42)
assert obs is not None
assert obs.step_number == 0
assert obs.claims_processed == 0
assert obs.correct_decisions == 0
assert obs.wrong_decisions == 0
def test_reset_deterministic_with_same_seed(self):
env1 = ClaimsEnvProEnvironment()
obs1 = env1.reset(seed=42)
env2 = ClaimsEnvProEnvironment()
obs2 = env2.reset(seed=42)
assert obs1.dashboard == obs2.dashboard
assert obs1.claims_in_queue == obs2.claims_in_queue
def test_reset_different_seeds_different_states(self):
env = ClaimsEnvProEnvironment()
obs1 = env.reset(seed=2)
d1 = obs1.dashboard
obs2 = env.reset(seed=9)
d2 = obs2.dashboard
assert d1 != d2
def test_reset_clears_previous_state(self):
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
env.step(ClaimsAction(message="VIEW_QUEUE"))
env.step(ClaimsAction(message="OPEN_CLAIM 1"))
obs = env.reset(seed=2)
assert obs.step_number == 0
assert obs.claims_processed == 0
assert obs.active_claim_id == -1
# =============================================================================
# 2. Task config (5)
# =============================================================================
class TestTasks:
def test_five_tasks_exist(self):
assert len(TASKS) == 5
def test_task_names(self):
expected = {
"routine_monday", "storm_surge", "multi_vehicle_pileup",
"fraud_ring_day", "catastrophe_weekend",
}
assert set(TASKS.keys()) == expected
def test_difficulty_progression(self):
difficulties = [TASKS[t]["difficulty"] for t in TASKS]
assert "easy" in difficulties
assert "medium" in difficulties
assert "hard" in difficulties
assert "expert" in difficulties
def test_claim_counts_increase(self):
assert TASKS["catastrophe_weekend"]["num_claims"] > TASKS["routine_monday"]["num_claims"]
def test_each_task_loads_via_seed(self):
env = ClaimsEnvProEnvironment()
seen = set()
for seed in range(1, 200):
obs = env.reset(seed=seed)
seen.add(obs.task_name)
if len(seen) == 5:
break
assert len(seen) == 5
# =============================================================================
# 3. Commands (13)
# =============================================================================
class TestCommands:
def _setup_env(self, seed=2):
env = ClaimsEnvProEnvironment()
env.reset(seed=seed)
return env
def test_view_queue(self):
env = self._setup_env()
obs = env.step(ClaimsAction(message="VIEW_QUEUE"))
assert "CLAIMS QUEUE" in obs.dashboard
assert obs.step_number == 1
def test_open_claim(self):
env = self._setup_env()
obs = env.step(ClaimsAction(message="OPEN_CLAIM 1"))
assert "CLAIM #1" in obs.dashboard
assert obs.active_claim_id == 1
def test_open_nonexistent_claim(self):
env = self._setup_env()
obs = env.step(ClaimsAction(message="OPEN_CLAIM 999"))
assert "not found" in obs.dashboard
def test_review_documents_without_open_claim(self):
env = self._setup_env()
obs = env.step(ClaimsAction(message="REVIEW_DOCUMENTS"))
assert "No claim open" in obs.dashboard
def test_review_documents(self):
env = self._setup_env()
env.step(ClaimsAction(message="OPEN_CLAIM 1"))
obs = env.step(ClaimsAction(message="REVIEW_DOCUMENTS"))
assert "DOCUMENT REVIEW" in obs.dashboard
def test_check_policy(self):
env = self._setup_env()
env.step(ClaimsAction(message="OPEN_CLAIM 1"))
obs = env.step(ClaimsAction(message="CHECK_POLICY"))
assert "POLICY CHECK" in obs.dashboard
assert "APPROVE" in obs.dashboard or "DENY" in obs.dashboard
def test_investigate_fraud(self):
env = self._setup_env()
env.step(ClaimsAction(message="OPEN_CLAIM 1"))
obs = env.step(ClaimsAction(message="INVESTIGATE_FRAUD"))
assert "FRAUD INVESTIGATION" in obs.dashboard
assert "FRAUD RISK SCORE" in obs.dashboard
def test_approve_claim(self):
env = self._setup_env()
env.step(ClaimsAction(message="OPEN_CLAIM 1"))
env.step(ClaimsAction(message="REVIEW_DOCUMENTS"))
env.step(ClaimsAction(message="CHECK_POLICY"))
env.step(ClaimsAction(message="INVESTIGATE_FRAUD"))
obs = env.step(ClaimsAction(message="APPROVE 10000"))
assert "DECISION" in obs.dashboard
assert "APPROVED" in obs.dashboard
assert obs.claims_processed == 1
def test_deny_claim(self):
env = self._setup_env()
env.step(ClaimsAction(message="OPEN_CLAIM 1"))
obs = env.step(ClaimsAction(message="DENY fraud_detected"))
assert "DENIED" in obs.dashboard
assert obs.claims_processed == 1
def test_end_shift(self):
env = self._setup_env()
obs = env.step(ClaimsAction(message="END_SHIFT"))
assert obs.done is True
assert "SHIFT COMPLETE" in obs.dashboard
def test_semicolon_multi_command(self):
env = self._setup_env()
obs = env.step(ClaimsAction(
message="OPEN_CLAIM 1; REVIEW_DOCUMENTS; CHECK_POLICY"
))
assert "CLAIM #1" in obs.dashboard
assert "DOCUMENT REVIEW" in obs.dashboard
assert "POLICY CHECK" in obs.dashboard
def test_max_three_commands_per_step(self):
env = self._setup_env()
obs = env.step(ClaimsAction(
message="VIEW_QUEUE; OPEN_CLAIM 1; REVIEW_DOCUMENTS; CHECK_POLICY"
))
# 4th command (CHECK_POLICY) dropped.
assert "CLAIMS QUEUE" in obs.dashboard
assert "CLAIM #1" in obs.dashboard
assert "DOCUMENT REVIEW" in obs.dashboard
assert "POLICY CHECK" not in obs.dashboard
def test_unknown_command(self):
env = self._setup_env()
obs = env.step(ClaimsAction(message="FOOBAR"))
assert "Unknown command" in obs.dashboard
# =============================================================================
# 4. Grading (4)
# =============================================================================
class TestGrading:
def test_reward_between_0_and_1(self):
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
for _ in range(5):
obs = env.step(ClaimsAction(message="VIEW_QUEUE"))
assert 0.0 <= obs.reward <= 1.0
def test_final_reward_on_end_shift(self):
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
obs = env.step(ClaimsAction(message="END_SHIFT"))
assert obs.done is True
assert 0.0 <= obs.reward <= 1.0
def test_perfect_run_scores_high(self):
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
for cid in list(env._claims.keys()):
claim = env._claims[cid]
if claim.appeal_pending:
env.step(ClaimsAction(message=f"HANDLE_APPEAL {cid}"))
else:
env.step(ClaimsAction(message=f"OPEN_CLAIM {cid}"))
env.step(ClaimsAction(message="REVIEW_DOCUMENTS; CHECK_POLICY"))
env.step(ClaimsAction(message="INVESTIGATE_FRAUD"))
if claim.correct_decision == "approve":
env.step(ClaimsAction(message=f"APPROVE {claim.correct_payout:.2f}"))
else:
env.step(ClaimsAction(message=f"DENY {claim.deny_reason}"))
obs = env.step(ClaimsAction(message="END_SHIFT"))
assert obs.done is True
# Perfect play across all 6 components should score very high.
assert obs.reward >= 0.85, f"Perfect play scored {obs.reward}"
def test_doing_nothing_scores_low(self):
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
obs = env.step(ClaimsAction(message="END_SHIFT"))
assert obs.reward < 0.50
# =============================================================================
# 5. Episode boundaries (4)
# =============================================================================
class TestEpisodeBoundaries:
def test_done_false_initially(self):
env = ClaimsEnvProEnvironment()
obs = env.reset(seed=2)
assert obs.done is False
def test_done_on_end_shift(self):
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
obs = env.step(ClaimsAction(message="END_SHIFT"))
assert obs.done is True
def test_done_at_max_steps(self):
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
obs = None
for _ in range(60):
obs = env.step(ClaimsAction(message="VIEW_QUEUE"))
if obs.done:
break
assert obs.done is True
assert obs.step_number <= 50
def test_step_after_done_not_possible(self):
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
obs1 = env.step(ClaimsAction(message="END_SHIFT"))
assert obs1.done is True
# =============================================================================
# 6. Claim generation (4)
# =============================================================================
class TestClaimGeneration:
def test_claim_has_required_fields(self):
rng = random.Random(42)
c = Claim(1, rng, "easy")
assert c.id == 1
assert c.claimant != ""
assert c.claim_type in [
"auto_collision", "auto_theft", "health_emergency",
"health_procedure", "property_fire", "property_water",
"property_theft", "liability_slip_fall", "liability_product",
"workers_comp",
]
assert c.priority in ["low", "medium", "high", "urgent"]
assert c.claimed_amount > 0
assert c.policy_limit > 0
assert c.correct_decision in ["approve", "deny"]
def test_fraudulent_claim_should_be_denied(self):
for seed in range(100):
rng = random.Random(seed)
c = Claim(1, rng, "hard")
if c.is_fraudulent:
assert c.correct_decision == "deny"
assert c.correct_payout == 0.0
assert len(c.fraud_signals) > 0
return
assert False, "No fraudulent claim found in 100 seeds"
def test_lapsed_policy_should_be_denied(self):
for seed in range(1000):
rng = random.Random(seed)
c = Claim(1, rng, "easy")
if not c.policy_active:
assert c.correct_decision == "deny"
assert c.correct_payout == 0.0
return
assert False, "No lapsed policy found in 1000 seeds"
def test_valid_claim_payout_calculation(self):
for seed in range(100):
rng = random.Random(seed)
c = Claim(1, rng, "easy")
if c.correct_decision == "approve":
expected = max(0, min(c.claimed_amount, c.policy_limit) - c.deductible)
assert abs(c.correct_payout - expected) < 0.01
return
assert False, "No valid claim found"
# =============================================================================
# 7. NEW — VERIFY_PURCHASE + Plaid + 6-component bound + constant-policy (5)
# =============================================================================
class TestPlaidVerifyPurchase:
"""5 new tests exclusive to claims-env-pro."""
def _find_seed_with_fraud(self) -> int:
"""Find a seed whose first claim is fraudulent so VERIFY surfaces it."""
for seed in range(1, 500):
env = ClaimsEnvProEnvironment()
env.reset(seed=seed)
first = env._claims.get(1)
if first and first.is_fraudulent:
return seed
return 1
def _find_seed_with_clean_claim(self) -> int:
"""Find a seed whose first claim is non-fraudulent and active."""
for seed in range(1, 500):
env = ClaimsEnvProEnvironment()
env.reset(seed=seed)
first = env._claims.get(1)
if first and not first.is_fraudulent and first.policy_active:
return seed
return 1
def test_verify_purchase_bonus_on_discrepancy(self):
"""+2 dense bonus when Plaid surfaces a discrepancy on a fraud claim."""
seed = self._find_seed_with_fraud()
env = ClaimsEnvProEnvironment()
env.reset(seed=seed)
env.step(ClaimsAction(message="OPEN_CLAIM 1"))
obs = env.step(ClaimsAction(message="VERIFY_PURCHASE"))
# The mock biases discrepancy when fraud=True so we expect a hit.
# Dense reward = -0.30 (cost) + 2.00 (bonus) = +1.70 on first surface.
assert obs.dense_step_reward >= DENSE_PLAID_DISCREPANCY_BONUS - 0.50, (
f"expected dense bonus >= {DENSE_PLAID_DISCREPANCY_BONUS - 0.5}, "
f"got {obs.dense_step_reward}"
)
# And revealed_info should contain the plaid hit.
assert "plaid" in obs.revealed_info
assert obs.revealed_info["plaid"]["discrepancy"] is True
def test_verify_purchase_no_bonus_when_no_discrepancy(self):
"""No +2 bonus when claim is clean (mock returns no discrepancy)."""
seed = self._find_seed_with_clean_claim()
env = ClaimsEnvProEnvironment()
env.reset(seed=seed)
env.step(ClaimsAction(message="OPEN_CLAIM 1"))
obs = env.step(ClaimsAction(message="VERIFY_PURCHASE"))
# Dense reward should be just the cost (-0.30) — no +2 bonus.
assert obs.dense_step_reward < DENSE_PLAID_DISCREPANCY_BONUS / 2.0
assert obs.dense_step_reward <= 0.0 # no bonus on clean
assert "plaid" in obs.revealed_info
def test_plaid_falls_back_to_mock_when_unset(self):
"""get_bank_probe() returns BankProbeStub when PLAID_CLIENT_ID missing."""
original = os.environ.pop("PLAID_CLIENT_ID", None)
try:
client = get_bank_probe()
assert isinstance(client, BankProbeStub)
finally:
if original is not None:
os.environ["PLAID_CLIENT_ID"] = original
def test_six_component_score_sums_le_1(self):
"""The 6-component graded final score must be in [0, 1]."""
env = ClaimsEnvProEnvironment()
env.reset(seed=2)
# Drive a perfect episode (same as test_perfect_run_scores_high).
for cid in list(env._claims.keys()):
claim = env._claims[cid]
if claim.appeal_pending:
env.step(ClaimsAction(message=f"HANDLE_APPEAL {cid}"))
else:
env.step(ClaimsAction(message=f"OPEN_CLAIM {cid}"))
env.step(ClaimsAction(message="REVIEW_DOCUMENTS; CHECK_POLICY"))
env.step(ClaimsAction(message="INVESTIGATE_FRAUD"))
if claim.correct_decision == "approve":
env.step(ClaimsAction(message=f"APPROVE {claim.correct_payout:.2f}"))
else:
env.step(ClaimsAction(message=f"DENY {claim.deny_reason}"))
obs = env.step(ClaimsAction(message="END_SHIFT"))
# final_score field must exist and be bounded.
assert obs.final_score >= 0.0
assert obs.final_score <= 1.0 + 1e-9, f"final_score {obs.final_score} > 1"
def test_constant_approve_policy_collapses_across_tasks(self):
"""A 'always APPROVE 0' agent gets shredded across the 5-task mix.
Compared against a thinking-policy upper bound of ~0.85 on perfect
play (test_perfect_run_scores_high). The constant policy clears
decision_accuracy on the easy task (because most claims are valid),
which floors the mean at ~0.50 — well below the perfect-play
ceiling and well above what the trained agent we ship will hit
baseline-without-thinking. Asserted bound: <0.55, computed mean.
"""
scores = []
for sidx in range(5):
for s in range(6):
env = ClaimsEnvProEnvironment()
env.reset(seed=sidx * 13 + s + 7, scenario_index=sidx)
for cid in list(env._claims.keys()):
claim = env._claims[cid]
if claim.appeal_pending:
env.step(ClaimsAction(message=f"HANDLE_APPEAL {cid}"))
else:
env.step(ClaimsAction(message=f"OPEN_CLAIM {cid}"))
env.step(ClaimsAction(message="APPROVE 0"))
obs = env.step(ClaimsAction(message="END_SHIFT"))
scores.append(obs.final_score)
mean_score = sum(scores) / len(scores)
assert mean_score < 0.55, (
f"Constant-APPROVE-0 policy averaged {mean_score:.3f} across "
f"30 episodes (expected <0.55, far from the 0.85+ perfect-play "
f"ceiling)."
)