Spaces:
Running
Running
| from __future__ import annotations | |
| from copy import deepcopy | |
| from dataclasses import dataclass | |
| from datetime import datetime, timedelta | |
| from typing import Any, Dict, List, Optional | |
| from .models import InsuranceClaimReward | |
| # Budget units consumed per action type. Final decisions are free. | |
| ACTION_COSTS: Dict[str, int] = { | |
| "validate_document": 1, | |
| "request_information": 2, | |
| "lookup_policy_history": 1, | |
| "compare_documents": 1, | |
| "flag_fraud_signal": 1, | |
| "estimate_payout": 1, | |
| "query_linked_claim": 1, | |
| "verify_identity": 2, | |
| "query_historical_data": 1, | |
| "verify_provider_registration": 1, | |
| "convene_debate_panel": 2, # multi-agent deliberation costs 2 budget units | |
| "approve_claim": 0, | |
| "deny_claim": 0, | |
| "request_investigation": 0, | |
| "escalate_to_human": 0, | |
| } | |
| class TaskDefinition: | |
| task_id: str | |
| title: str | |
| difficulty: str | |
| max_steps: int | |
| investigation_budget: int # soft budget; overage adds 0.02 penalty per unit | |
| claim_id: str | |
| claimant: Dict[str, Any] | |
| incident: Dict[str, Any] | |
| documents: List[Dict[str, Any]] | |
| linked_claims: List[Dict[str, Any]] | |
| expected_signals: List[str] | |
| allowed_final_decisions: List[str] | |
| payout_band: Optional[tuple[float, float]] | |
| consistency_group_claim_ids: List[str] | |
| policy_history: Dict[str, Any] | |
| ground_truth_confidence: float | |
| class RuntimeTask: | |
| task_id: str | |
| title: str | |
| difficulty: str | |
| max_steps: int | |
| investigation_budget: int | |
| claim_id: str | |
| claimant: Dict[str, Any] | |
| incident: Dict[str, Any] | |
| documents: List[Dict[str, Any]] | |
| linked_claims: List[Dict[str, Any]] | |
| expected_signals: List[str] | |
| allowed_final_decisions: List[str] | |
| payout_band: Optional[tuple[float, float]] | |
| consistency_group_claim_ids: List[str] | |
| policy_history: Dict[str, Any] | |
| ground_truth_confidence: float | |
| variant_id: int | |
| def _base_available_actions(task_id: str = "") -> List[str]: | |
| actions = [ | |
| "validate_document", | |
| "request_information", | |
| "lookup_policy_history", | |
| "compare_documents", | |
| "flag_fraud_signal", | |
| "estimate_payout", | |
| "approve_claim", | |
| "deny_claim", | |
| "request_investigation", | |
| ] | |
| if task_id in ("coordinated_fraud", "distribution_shift_claim"): | |
| actions.append("query_linked_claim") | |
| actions.append("query_historical_data") | |
| actions.append("escalate_to_human") | |
| if task_id == "distribution_shift_claim": | |
| actions.append("verify_provider_registration") | |
| if task_id == "identity_fraud": | |
| actions.append("verify_identity") | |
| if task_id != "clean_claim": | |
| actions.append("convene_debate_panel") | |
| return actions | |
| TASKS: Dict[str, TaskDefinition] = { | |
| "clean_claim": TaskDefinition( | |
| task_id="clean_claim", | |
| title="Clean auto claim with complete evidence", | |
| difficulty="easy", | |
| max_steps=8, | |
| investigation_budget=8, # validate×3 + estimate + approve = 5; 3 units slack | |
| claim_id="CLM-AUTO-001", | |
| claimant={ | |
| "name": "Rajesh Verma", | |
| "policy_number": "POL-AUTO-8821", | |
| "contact": "+91-9810012345", | |
| "claim_date": "2026-03-02", | |
| }, | |
| incident={ | |
| "date": "2026-02-27", | |
| "location": "Pune, Maharashtra", | |
| "type": "auto_collision", | |
| "description": "Rear-end collision at a traffic signal, bumper and tail-light damage.", | |
| }, | |
| documents=[ | |
| { | |
| "doc_id": "DOC-1", | |
| "doc_type": "claim_form", | |
| "content": "Claim submitted for rear-end collision on 2026-02-27.", | |
| "metadata": {"incident_date": "2026-02-27", "declared_cost_inr": 51000}, | |
| }, | |
| { | |
| "doc_id": "DOC-2", | |
| "doc_type": "garage_estimate", | |
| "content": "Repair estimate from authorized center.", | |
| "metadata": {"estimate_inr": 50500, "garage": "Pune Auto Care"}, | |
| }, | |
| { | |
| "doc_id": "DOC-3", | |
| "doc_type": "police_report", | |
| "content": "Minor collision report with matching date and location.", | |
| "metadata": {"incident_date": "2026-02-27", "report_id": "PR-112"}, | |
| }, | |
| ], | |
| linked_claims=[], | |
| expected_signals=[], | |
| allowed_final_decisions=["approve_claim"], | |
| payout_band=(45000, 55000), | |
| consistency_group_claim_ids=[], | |
| policy_history={ | |
| "prior_claims": [], | |
| "years_as_customer": 6, | |
| "policy_age_days": 2190, | |
| "risk_score": "low", | |
| "note": "Long-standing customer with no prior claims. Low risk.", | |
| }, | |
| ground_truth_confidence=0.95, | |
| ), | |
| "contradictory_claim": TaskDefinition( | |
| task_id="contradictory_claim", | |
| title="Medical claim with contradictory evidence", | |
| difficulty="medium", | |
| max_steps=12, | |
| investigation_budget=12, # validate×4 + lookup + flag×4 + deny = 10; 2 units slack | |
| claim_id="CLM-MED-017", | |
| claimant={ | |
| "name": "Neha Kapoor", | |
| "policy_number": "POL-HEALTH-2190", | |
| "contact": "+91-9822211188", | |
| "claim_date": "2026-03-05", | |
| }, | |
| incident={ | |
| "date": "2026-02-16", | |
| "location": "Ahmedabad, Gujarat", | |
| "type": "medical_procedure", | |
| "description": "Emergency appendectomy claim with post-op hospitalization.", | |
| }, | |
| documents=[ | |
| { | |
| "doc_id": "DOC-10", | |
| "doc_type": "claim_form", | |
| "content": "Claim incident date recorded as 2026-02-20.", | |
| "metadata": {"incident_date": "2026-02-20", "claimed_cost_inr": 240000}, | |
| }, | |
| { | |
| "doc_id": "DOC-11", | |
| "doc_type": "hospital_admission", | |
| "content": "Patient admitted on 2026-02-17 for emergency surgery.", | |
| "metadata": {"admission_date": "2026-02-17", "procedure": "appendectomy"}, | |
| }, | |
| { | |
| "doc_id": "DOC-12", | |
| "doc_type": "billing_summary", | |
| "content": "Total treatment cost billed: INR 240000.", | |
| "metadata": {"claimed_cost_inr": 240000, "standard_rate_inr": 100000}, | |
| }, | |
| { | |
| "doc_id": "DOC-13", | |
| "doc_type": "discharge_summary", | |
| "content": "Digitally scanned discharge summary.", | |
| "metadata": { | |
| "doctor_signature": "DR-XYZ-SIGN-ALPHA", | |
| "clinic_reference_signature": "DR-XYZ-SIGN-BETA", | |
| }, | |
| }, | |
| ], | |
| linked_claims=[], | |
| expected_signals=[ | |
| "date_mismatch", | |
| "cost_inflation", | |
| "signature_mismatch", | |
| "prior_similar_claim", | |
| ], | |
| allowed_final_decisions=["deny_claim", "request_investigation"], | |
| payout_band=None, | |
| consistency_group_claim_ids=[], | |
| policy_history={ | |
| "prior_claims": [ | |
| { | |
| "claim_id": "CLM-MED-008", | |
| "date": "2025-06-14", | |
| "type": "medical_procedure", | |
| "procedure": "appendectomy", | |
| "paid_inr": 95000, | |
| "note": "Claim paid for same procedure 8 months ago at a different hospital.", | |
| } | |
| ], | |
| "years_as_customer": 3, | |
| "policy_age_days": 1095, | |
| "risk_score": "medium", | |
| "note": "Prior claim for identical procedure 8 months ago — statistical anomaly.", | |
| }, | |
| ground_truth_confidence=0.80, | |
| ), | |
| "coordinated_fraud": TaskDefinition( | |
| task_id="coordinated_fraud", | |
| title="Coordinated multi-claim fraud ring", | |
| difficulty="hard", | |
| max_steps=20, | |
| investigation_budget=18, # validate×3 + query×4 + flag×5 + escalate = 15; 3 units slack | |
| claim_id="CLM-GROUP-301", | |
| claimant={ | |
| "name": "Primary: Arjun Saini", | |
| "policy_number": "POL-MOTOR-9001", | |
| "contact": "+91-9898001122", | |
| "claim_date": "2026-03-09", | |
| }, | |
| incident={ | |
| "date": "2026-03-01", | |
| "location": "Jaipur, Rajasthan", | |
| "type": "multi_vehicle_damage", | |
| "description": "Three separate claims likely linked by staged repairs and copied narratives.", | |
| }, | |
| documents=[ | |
| { | |
| "doc_id": "DOC-21", | |
| "doc_type": "primary_claim_packet", | |
| "content": "Primary claim references repair at RapidFix Motors in Kota (340km away).", | |
| "metadata": {"repair_shop": "RapidFix Motors", "distance_km": 340}, | |
| }, | |
| { | |
| "doc_id": "DOC-22", | |
| "doc_type": "narrative", | |
| "content": "Accident description text is nearly identical to two linked claims.", | |
| "metadata": {"template_similarity": 0.93}, | |
| }, | |
| { | |
| "doc_id": "DOC-23", | |
| "doc_type": "policy_timeline", | |
| "content": "All related policies purchased within 30 days of incident.", | |
| "metadata": {"days_since_purchase": [18, 24, 29]}, | |
| }, | |
| ], | |
| linked_claims=[ | |
| { | |
| "claim_id": "CLM-GROUP-302", | |
| "claimant": "Rohit Jain", | |
| "contact": "+91-9898004455", | |
| "emergency_contact": "+91-9000002222", | |
| "repair_shop": "RapidFix Motors", | |
| "accident_description": "A truck abruptly stopped causing chain collision near city bypass.", | |
| "policy_purchase_date": "2026-02-06", | |
| }, | |
| { | |
| "claim_id": "CLM-GROUP-303", | |
| "claimant": "Pooja Nair", | |
| "contact": "+91-9845509988", | |
| "emergency_contact": "+91-9000002222", | |
| "repair_shop": "RapidFix Motors", | |
| "accident_description": "A truck abruptly stopped causing chain collision near city bypass.", | |
| "policy_purchase_date": "2026-02-11", | |
| }, | |
| { | |
| "claim_id": "CLM-GROUP-301", | |
| "claimant": "Arjun Saini", | |
| "contact": "+91-9898001122", | |
| "emergency_contact": "+91-9000003333", | |
| "repair_shop": "RapidFix Motors", | |
| "accident_description": "A truck abruptly stopped causing chain collision near city bypass.", | |
| "policy_purchase_date": "2026-02-02", | |
| "broker_id": "BRK-441", | |
| }, | |
| # 4th claim — hidden until agent queries 2 linked claims (dynamic ring expansion) | |
| { | |
| "claim_id": "CLM-GROUP-304", | |
| "claimant": "Vikram Sharma", | |
| "contact": "+91-9011009988", | |
| "emergency_contact": "+91-9000002222", | |
| "repair_shop": "RapidFix Motors", | |
| "accident_description": "A truck abruptly stopped causing chain collision near city bypass.", | |
| "policy_purchase_date": "2026-02-08", | |
| "broker_id": "BRK-441", | |
| "_hidden_until_queries": 2, # surfaced only after 2 existing claims are queried | |
| }, | |
| ], | |
| expected_signals=[ | |
| "shared_repair_shop_far", | |
| "shared_emergency_contact", | |
| "near_identical_descriptions", | |
| "recent_policy_cluster", | |
| "clustered_policy_broker", # discoverable by querying the 4th linked claim | |
| ], | |
| allowed_final_decisions=["request_investigation"], | |
| payout_band=None, | |
| consistency_group_claim_ids=["CLM-GROUP-301", "CLM-GROUP-302", "CLM-GROUP-303", "CLM-GROUP-304"], | |
| policy_history={ | |
| "prior_claims": [], | |
| "years_as_customer": 0, | |
| "policy_age_days": 18, | |
| "risk_score": "high", | |
| "note": "Policy purchased only 18 days before incident. No claim history — all three claimants opened policies within 30 days of each other.", | |
| }, | |
| ground_truth_confidence=0.90, | |
| ), | |
| "distribution_shift_claim": TaskDefinition( | |
| task_id="distribution_shift_claim", | |
| title="Cross-claim coordinated ring with distribution shift", | |
| difficulty="hard", | |
| max_steps=28, | |
| investigation_budget=20, | |
| claim_id="CLM-DIST-601", | |
| claimant={ | |
| "name": "Suresh Pillai", | |
| "policy_number": "POL-MOTOR-5541", | |
| "contact": "+91-9876543210", | |
| "claim_date": "2026-03-15", | |
| }, | |
| incident={ | |
| "date": "2026-03-08", | |
| "location": "Bengaluru, Karnataka", | |
| "type": "auto_collision", | |
| "description": "Minor collision at junction. Claim appears routine on surface but cross-claim analysis reveals coordinated ring.", | |
| }, | |
| documents=[ | |
| { | |
| "doc_id": "DOC-41", | |
| "doc_type": "claim_form", | |
| "content": "Standard auto collision claim submitted on 2026-03-15 for incident on 2026-03-08.", | |
| "metadata": {"incident_date": "2026-03-08", "declared_cost_inr": 85000}, | |
| }, | |
| { | |
| "doc_id": "DOC-42", | |
| "doc_type": "garage_estimate", | |
| "content": "Repair estimate from FastRepair Hub, Whitefield.", | |
| "metadata": {"estimate_inr": 84000, "garage": "FastRepair Hub"}, | |
| }, | |
| { | |
| "doc_id": "DOC-43", | |
| "doc_type": "police_report", | |
| "content": "Minor collision report filed. No independent witnesses.", | |
| "metadata": {"incident_date": "2026-03-08", "witnesses": 0}, | |
| }, | |
| ], | |
| linked_claims=[ | |
| { | |
| "claim_id": "CLM-DIST-602", | |
| "claimant": "Meera Iyer", | |
| "contact": "+91-9845501234", | |
| "emergency_contact": "+91-9000005555", | |
| "repair_shop": "FastRepair Hub", | |
| "accident_description": "Minor collision at junction. No injuries.", | |
| "policy_purchase_date": "2026-02-12", | |
| "broker_id": "BRK-882", | |
| }, | |
| { | |
| "claim_id": "CLM-DIST-603", | |
| "claimant": "Ravi Shankar", | |
| "contact": "+91-9741200099", | |
| "emergency_contact": "+91-9000005555", | |
| "repair_shop": "FastRepair Hub", | |
| "accident_description": "Minor collision at junction. No injuries.", | |
| "policy_purchase_date": "2026-02-18", | |
| "broker_id": "BRK-882", | |
| }, | |
| { | |
| "claim_id": "CLM-DIST-604", | |
| "claimant": "Deepa Nair", | |
| "contact": "+91-9911200033", | |
| "emergency_contact": "+91-9000005555", | |
| "repair_shop": "FastRepair Hub", | |
| "accident_description": "Minor collision at junction. No injuries.", | |
| "policy_purchase_date": "2026-02-20", | |
| "broker_id": "BRK-882", | |
| "_hidden_until_queries": 2, | |
| }, | |
| ], | |
| expected_signals=[ | |
| "shared_repair_shop_far", | |
| "shared_emergency_contact", | |
| "recent_policy_cluster", | |
| "clustered_policy_broker", | |
| "near_identical_descriptions", | |
| ], | |
| allowed_final_decisions=["escalate_to_human", "request_investigation"], | |
| payout_band=None, | |
| consistency_group_claim_ids=["CLM-DIST-601", "CLM-DIST-602", "CLM-DIST-603", "CLM-DIST-604"], | |
| policy_history={ | |
| "prior_claims": [], | |
| "years_as_customer": 0, | |
| "policy_age_days": 24, | |
| "risk_score": "high", | |
| "note": "Policy purchased 24 days before incident. All 3 linked claimants share broker BRK-882 and same repair shop. Cross-claim cluster detected in historical data.", | |
| }, | |
| ground_truth_confidence=0.70, | |
| ), | |
| "identity_fraud": TaskDefinition( | |
| task_id="identity_fraud", | |
| title="Ghost claimant identity fraud", | |
| difficulty="hard", | |
| max_steps=15, | |
| investigation_budget=14, # verify(2)+lookup+validate×4+flag×4+deny = 11; 3 units slack | |
| claim_id="CLM-ID-501", | |
| claimant={ | |
| "name": "Aarav Mehta", | |
| "policy_number": "POL-HEALTH-7734", | |
| "contact": "+91-9711100045", | |
| "claim_date": "2026-03-12", | |
| "national_id": "XXXX-7821", | |
| }, | |
| incident={ | |
| "date": "2026-03-07", | |
| "location": "Mumbai, Maharashtra", | |
| "type": "medical_procedure", | |
| "description": "Knee replacement surgery claim with post-op physiotherapy.", | |
| }, | |
| documents=[ | |
| { | |
| "doc_id": "DOC-31", | |
| "doc_type": "claim_form", | |
| "content": "Claim submitted for knee replacement on 2026-03-07. National ID: XXXX-7821.", | |
| "metadata": { | |
| "incident_date": "2026-03-07", | |
| "claimed_cost_inr": 320000, | |
| "national_id_suffix": "7821", | |
| }, | |
| }, | |
| { | |
| "doc_id": "DOC-32", | |
| "doc_type": "hospital_record", | |
| "content": "Hospital system query: No patient named Aarav Mehta with DOB matching policy found. Record shows admission under a different name with similar ID.", | |
| "metadata": { | |
| "patient_found": False, | |
| "name_on_record": "Aarav Kumar", | |
| "dob_mismatch": True, | |
| }, | |
| }, | |
| { | |
| "doc_id": "DOC-33", | |
| "doc_type": "policy_inception", | |
| "content": "Policy POL-HEALTH-7734 issued on 2026-03-02. Incident date 2026-03-07 falls within the 30-day exclusion window.", | |
| "metadata": { | |
| "policy_issue_date": "2026-03-02", | |
| "incident_date": "2026-03-07", | |
| "days_to_claim": 5, | |
| "exclusion_window_days": 30, | |
| }, | |
| }, | |
| { | |
| "doc_id": "DOC-34", | |
| "doc_type": "id_proof", | |
| "content": "Submitted ID proof shows date of birth 1988-04-15. Policy application on file states DOB 1986-11-22. The national registry has no record matching either entry for this ID number.", | |
| "metadata": { | |
| "dob_on_id": "1988-04-15", | |
| "dob_on_policy": "1986-11-22", | |
| "registry_match": False, | |
| }, | |
| }, | |
| ], | |
| linked_claims=[], | |
| expected_signals=[ | |
| "identity_mismatch", | |
| "hospital_no_record", | |
| "recent_policy_purchase", | |
| "dob_inconsistency", | |
| ], | |
| allowed_final_decisions=["deny_claim", "request_investigation"], | |
| payout_band=None, | |
| consistency_group_claim_ids=[], | |
| policy_history={ | |
| "prior_claims": [], | |
| "years_as_customer": 0, | |
| "policy_age_days": 5, | |
| "risk_score": "critical", | |
| "note": "Policy opened only 5 days before incident. Claimant identity could not be verified at onboarding. KYC status: PENDING.", | |
| }, | |
| ground_truth_confidence=0.90, | |
| ), | |
| } | |
| def get_task_definition(task_id: str) -> TaskDefinition: | |
| if task_id not in TASKS: | |
| raise ValueError(f"Unknown task_id '{task_id}'. Available: {list(TASKS)}") | |
| return TASKS[task_id] | |
| def list_tasks_summary() -> List[Dict[str, Any]]: | |
| summaries: List[Dict[str, Any]] = [] | |
| for task in TASKS.values(): | |
| summaries.append( | |
| { | |
| "task_id": task.task_id, | |
| "title": task.title, | |
| "difficulty": task.difficulty, | |
| "max_steps": task.max_steps, | |
| "expected_decisions": task.allowed_final_decisions, | |
| } | |
| ) | |
| return summaries | |
| def _copy_runtime_from_task(task: TaskDefinition, variant_id: int) -> RuntimeTask: | |
| return RuntimeTask( | |
| task_id=task.task_id, | |
| title=task.title, | |
| difficulty=task.difficulty, | |
| max_steps=task.max_steps, | |
| investigation_budget=task.investigation_budget, | |
| claim_id=task.claim_id, | |
| claimant=deepcopy(task.claimant), | |
| incident=deepcopy(task.incident), | |
| documents=deepcopy(task.documents), | |
| linked_claims=deepcopy(task.linked_claims), | |
| expected_signals=deepcopy(task.expected_signals), | |
| allowed_final_decisions=deepcopy(task.allowed_final_decisions), | |
| payout_band=deepcopy(task.payout_band), | |
| consistency_group_claim_ids=deepcopy(task.consistency_group_claim_ids), | |
| policy_history=deepcopy(task.policy_history), | |
| ground_truth_confidence=task.ground_truth_confidence, | |
| variant_id=variant_id, | |
| ) | |
| def build_runtime_task(task_id: str, seed: Optional[int] = None) -> RuntimeTask: | |
| task = get_task_definition(task_id) | |
| variant_id = 0 if seed is None else abs(seed) % 5 | |
| runtime = _copy_runtime_from_task(task, variant_id) | |
| if task_id == "clean_claim": | |
| offsets = [-2000, -1000, 0, 1000, 2000] | |
| offset = offsets[variant_id] | |
| declared_cost = 51000 + offset | |
| estimate = 50500 + offset | |
| runtime.documents[0]["metadata"]["declared_cost_inr"] = declared_cost | |
| runtime.documents[1]["metadata"]["estimate_inr"] = estimate | |
| center = 50000 + offset | |
| runtime.payout_band = (float(center - 5000), float(center + 5000)) | |
| elif task_id == "contradictory_claim": | |
| admission_date_str = runtime.documents[1]["metadata"]["admission_date"] | |
| admission_date = datetime.strptime(admission_date_str, "%Y-%m-%d") | |
| date_gap_days = [3, 4, 2, 5, 3][variant_id] | |
| incident_date = (admission_date + timedelta(days=date_gap_days)).strftime("%Y-%m-%d") | |
| runtime.documents[0]["metadata"]["incident_date"] = incident_date | |
| runtime.documents[0]["content"] = f"Claim incident date recorded as {incident_date}." | |
| standard_rates = [100000, 105000, 95000, 110000, 98000] | |
| standard_rate = standard_rates[variant_id] | |
| claimed_cost = int(standard_rate * 2.4) | |
| runtime.documents[0]["metadata"]["claimed_cost_inr"] = claimed_cost | |
| runtime.documents[2]["metadata"]["claimed_cost_inr"] = claimed_cost | |
| runtime.documents[2]["metadata"]["standard_rate_inr"] = standard_rate | |
| runtime.documents[2]["content"] = f"Total treatment cost billed: INR {claimed_cost}." | |
| elif task_id == "coordinated_fraud": | |
| distances = [340, 360, 320, 380, 300] | |
| distance = distances[variant_id] | |
| runtime.documents[0]["metadata"]["distance_km"] = distance | |
| runtime.documents[0]["content"] = ( | |
| f"Primary claim references repair at RapidFix Motors in Kota ({distance}km away)." | |
| ) | |
| similarity = [0.93, 0.91, 0.95, 0.9, 0.94][variant_id] | |
| runtime.documents[1]["metadata"]["template_similarity"] = similarity | |
| purchase_sets = [ | |
| [18, 24, 29], | |
| [12, 22, 27], | |
| [9, 19, 28], | |
| [16, 21, 26], | |
| [14, 25, 30], | |
| ] | |
| runtime.documents[2]["metadata"]["days_since_purchase"] = purchase_sets[variant_id] | |
| elif task_id == "identity_fraud": | |
| # Vary days_to_claim and policy inception date across variants | |
| days_to_claim_variants = [5, 7, 3, 8, 6] | |
| days_to_claim = days_to_claim_variants[variant_id] | |
| runtime.documents[2]["metadata"]["days_to_claim"] = days_to_claim | |
| runtime.documents[2]["content"] = ( | |
| f"Policy POL-HEALTH-7734 issued 2026-03-{12 - days_to_claim:02d}. " | |
| f"Incident date 2026-03-07 falls within the 30-day exclusion window." | |
| ) | |
| runtime.policy_history = deepcopy(task.policy_history) | |
| runtime.policy_history["policy_age_days"] = days_to_claim | |
| return runtime | |
| def _stub_linked_claims(linked_claims: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | |
| """Return only claim_id and claimant. Hidden claims (with _hidden_until_queries > 0) | |
| are excluded from the initial list — they surface dynamically in the environment.""" | |
| return [ | |
| {"claim_id": c["claim_id"], "claimant": c["claimant"]} | |
| for c in linked_claims | |
| if "claim_id" in c and c.get("_hidden_until_queries", 0) == 0 | |
| ] | |
| def build_initial_payload(runtime_task: RuntimeTask) -> Dict[str, Any]: | |
| if runtime_task.task_id == "coordinated_fraud": | |
| linked_claims_visible = _stub_linked_claims(runtime_task.linked_claims) | |
| else: | |
| linked_claims_visible = deepcopy(runtime_task.linked_claims) | |
| return { | |
| "task_id": runtime_task.task_id, | |
| "claim_id": runtime_task.claim_id, | |
| "claimant": deepcopy(runtime_task.claimant), | |
| "incident": deepcopy(runtime_task.incident), | |
| "documents": deepcopy(runtime_task.documents), | |
| "linked_claims": linked_claims_visible, | |
| "_full_linked_claims": deepcopy(runtime_task.linked_claims), | |
| "max_steps": runtime_task.max_steps, | |
| "investigation_budget": runtime_task.investigation_budget, | |
| "variant_id": runtime_task.variant_id, | |
| "available_actions": _base_available_actions(runtime_task.task_id), | |
| } | |
| def get_evidence_keyword_hints(task_id: str, flag_id: str) -> List[str]: | |
| hints: Dict[str, Dict[str, List[str]]] = { | |
| "contradictory_claim": { | |
| "date_mismatch": ["date", "admission", "mismatch", "incident"], | |
| "cost_inflation": ["cost", "rate", "2.4", "inflation", "overbilled"], | |
| "signature_mismatch": ["signature", "doctor", "clinic", "dr-xyz"], | |
| "prior_similar_claim": ["prior", "previous", "history", "appendectomy", "procedure", "8 months", "clm-med-008"], | |
| }, | |
| "coordinated_fraud": { | |
| "shared_repair_shop_far": ["repair", "shop", "distance", "km", "kota", "rapidfix"], | |
| "shared_emergency_contact": ["contact", "phone", "emergency", "shared", "9000002222"], | |
| "near_identical_descriptions": ["identical", "description", "narrative", "template", "similarity"], | |
| "recent_policy_cluster": ["policy", "purchase", "days", "cluster", "30"], | |
| "clustered_policy_broker": ["broker", "brk-441", "same broker", "policy broker", "issued"], | |
| }, | |
| "identity_fraud": { | |
| "identity_mismatch": ["identity", "registry", "national", "id", "mismatch", "no record", "7821"], | |
| "hospital_no_record": ["hospital", "record", "patient", "not found", "name", "admission"], | |
| "recent_policy_purchase": ["policy", "days", "exclusion", "window", "inception", "5", "30"], | |
| "dob_inconsistency": ["dob", "date of birth", "1988", "1986", "inconsistency", "mismatch"], | |
| }, | |
| # NEW-7 fix: distribution_shift_claim previously had no entry, so the | |
| # keyword check in flag_fraud_signal returned [] and any evidence | |
| # passed (since "not hints or any(h in evidence_lc for h in hints)" | |
| # short-circuits to True when hints is empty). Adding explicit | |
| # keyword anchors enforces evidence grounding for this task too, | |
| # symmetric to the other 4 tasks. Keywords are taken verbatim from | |
| # the task data: FastRepair Hub Whitefield (DOC-42), shared | |
| # +91-9000005555 contact, BRK-882 broker, identical "Minor collision | |
| # at junction. No injuries." narrative across CLM-DIST-602/603/604, | |
| # and policies purchased ~30 days before the incident date. | |
| "distribution_shift_claim": { | |
| "shared_repair_shop_far": ["repair", "shop", "fastrepair", "whitefield", "garage"], | |
| "shared_emergency_contact": ["contact", "phone", "emergency", "9000005555", "shared"], | |
| "recent_policy_cluster": ["policy", "purchase", "days", "cluster", "24", "30"], | |
| "clustered_policy_broker": ["broker", "brk-882", "same broker", "policy broker"], | |
| "near_identical_descriptions": ["identical", "description", "narrative", "template", "minor collision"], | |
| }, | |
| } | |
| return hints.get(task_id, {}).get(flag_id, []) | |
| # Cross-document comparison signal mapping: (doc_a, doc_b) → signals discovered | |
| COMPARE_DOCUMENT_SIGNALS: Dict[str, Dict[tuple, List[str]]] = { | |
| "contradictory_claim": { | |
| ("DOC-10", "DOC-11"): ["date_mismatch"], | |
| ("DOC-11", "DOC-10"): ["date_mismatch"], | |
| ("DOC-10", "DOC-12"): ["cost_inflation"], | |
| ("DOC-12", "DOC-10"): ["cost_inflation"], | |
| }, | |
| "coordinated_fraud": { | |
| ("DOC-21", "DOC-22"): ["near_identical_descriptions"], | |
| ("DOC-22", "DOC-21"): ["near_identical_descriptions"], | |
| }, | |
| "identity_fraud": { | |
| ("DOC-31", "DOC-34"): ["dob_inconsistency"], | |
| ("DOC-34", "DOC-31"): ["dob_inconsistency"], | |
| ("DOC-32", "DOC-33"): ["hospital_no_record"], | |
| ("DOC-33", "DOC-32"): ["hospital_no_record"], | |
| }, | |
| } | |
| def get_compare_signals(task_id: str, doc_id_a: str, doc_id_b: str) -> List[str]: | |
| return COMPARE_DOCUMENT_SIGNALS.get(task_id, {}).get((doc_id_a, doc_id_b), []) | |
| def clamp01(value: float) -> float: | |
| if value < 0.0: | |
| return 0.0 | |
| if value > 1.0: | |
| return 1.0 | |
| return value | |
| def score_payout_accuracy(amount: Optional[float], payout_band: Optional[tuple[float, float]]) -> float: | |
| if payout_band is None: | |
| return 1.0 if amount is None else 0.0 | |
| if amount is None: | |
| return 0.0 | |
| low, high = payout_band | |
| if low <= amount <= high: | |
| return 1.0 | |
| band_center = (low + high) / 2.0 | |
| tolerance = max((high - low) / 2.0, 1.0) | |
| distance = abs(amount - band_center) | |
| return clamp01(1.0 - (distance / (2.5 * tolerance))) | |
| def score_calibration(agent_confidence: Optional[float], ground_truth_confidence: float) -> float: | |
| """Brier-style calibration score. | |
| Returns 1 - (agent_confidence - ground_truth)^2, in [0, 1]. | |
| If agent did not provide a confidence, returns 0.0 (no bonus, no penalty). | |
| """ | |
| if agent_confidence is None: | |
| return 0.0 | |
| agent_conf = clamp01(float(agent_confidence)) | |
| return clamp01(1.0 - (agent_conf - ground_truth_confidence) ** 2) | |
| def score_consistency( | |
| task_id: str, | |
| raised_flags: List[str], | |
| investigation_targets: List[str], | |
| queried_claims: Optional[set] = None, | |
| ) -> float: | |
| if task_id != "coordinated_fraud": | |
| return 0.0 | |
| has_flags = len(raised_flags) > 0 | |
| targets = set(investigation_targets) | |
| expected = set(get_task_definition(task_id).consistency_group_claim_ids) | |
| if not has_flags: | |
| return 1.0 | |
| if targets == expected: | |
| return 1.0 | |
| if len(targets) == 0: | |
| return 0.0 | |
| return 0.2 | |
| def compute_reward_breakdown( | |
| task_id: str, | |
| expected_signals: List[str], | |
| found_signals: List[str], | |
| false_flags: int, | |
| step_number: int, | |
| max_steps: int, | |
| final_decision: Optional[str], | |
| allowed_decisions: List[str], | |
| payout_estimate_inr: Optional[float], | |
| payout_band: Optional[tuple[float, float]], | |
| investigation_targets: List[str], | |
| evidence_quality_score: float, | |
| exploit_penalty: float, | |
| penalty_total: float, | |
| queried_claims: Optional[set] = None, | |
| agent_confidence: Optional[float] = None, | |
| ground_truth_confidence: float = 1.0, | |
| calibration_override: Optional[float] = None, | |
| ) -> InsuranceClaimReward: | |
| expected = set(expected_signals) | |
| found = set(found_signals) | |
| # --- Fraud detection --- | |
| if step_number == 0: | |
| fraud_detection_score = 0.0 | |
| elif len(expected) == 0: | |
| fraud_detection_score = 1.0 if len(found) == 0 else 0.0 | |
| else: | |
| fraud_detection_score = clamp01(len(found.intersection(expected)) / float(len(expected))) | |
| # --- Decision accuracy --- | |
| if final_decision is None: | |
| decision_accuracy = 0.0 | |
| else: | |
| decision_accuracy = 1.0 if final_decision in allowed_decisions else 0.0 | |
| # --- Payout accuracy --- | |
| if step_number == 0: | |
| payout_accuracy = 0.0 | |
| elif payout_band is None: | |
| # Non-payout tasks should not receive a free reward bump before a final decision. | |
| payout_accuracy = 1.0 if final_decision is not None else 0.0 | |
| else: | |
| payout_accuracy = score_payout_accuracy(payout_estimate_inr, payout_band) | |
| # --- Efficiency --- | |
| has_queried = queried_claims is not None and len(queried_claims) > 0 | |
| has_progress = len(found) > 0 or payout_estimate_inr is not None or has_queried | |
| if has_progress or final_decision is not None: | |
| efficiency_score = clamp01(1.0 - (max(step_number - 1, 0) / float(max_steps))) | |
| else: | |
| efficiency_score = 0.0 | |
| consistency_score = 0.0 | |
| if step_number > 0 and final_decision == "request_investigation": | |
| consistency_score = score_consistency(task_id, found_signals, investigation_targets, queried_claims) | |
| evidence_quality_score = clamp01(evidence_quality_score) | |
| # --- Calibration: only scored when a final decision is made --- | |
| if final_decision is not None: | |
| if calibration_override is not None: | |
| calibration_score = calibration_override | |
| else: | |
| calibration_score = score_calibration(agent_confidence, ground_truth_confidence) | |
| else: | |
| calibration_score = 0.0 | |
| exploit_penalty = max(exploit_penalty, 0.0) | |
| false_flag_penalty = 0.25 * false_flags if task_id == "clean_claim" else 0.1 * false_flags | |
| decision_penalty = 0.35 if (final_decision is not None and decision_accuracy == 0.0) else 0.0 | |
| partial_consistency_penalty = 0.2 if (task_id == "coordinated_fraud" and 0.0 < consistency_score < 1.0) else 0.0 | |
| query_skip_penalty = 0.0 | |
| if ( | |
| task_id == "coordinated_fraud" | |
| and final_decision == "request_investigation" | |
| and (queried_claims is None or len(queried_claims) < 2) | |
| ): | |
| query_skip_penalty = 0.15 | |
| penalty = ( | |
| penalty_total | |
| + false_flag_penalty | |
| + decision_penalty | |
| + partial_consistency_penalty | |
| + query_skip_penalty | |
| + exploit_penalty | |
| ) | |
| # Weights: sum = 1.00 | |
| # Reduced fraud/decision/evidence slightly to make room for calibration (0.08) | |
| weighted = ( | |
| 0.28 * fraud_detection_score | |
| + 0.20 * decision_accuracy | |
| + 0.11 * payout_accuracy | |
| + 0.10 * efficiency_score | |
| + 0.09 * consistency_score | |
| + 0.14 * evidence_quality_score | |
| + 0.08 * calibration_score | |
| ) | |
| total = clamp01(weighted - penalty) | |
| return InsuranceClaimReward( | |
| fraud_detection_score=clamp01(fraud_detection_score), | |
| decision_accuracy=clamp01(decision_accuracy), | |
| payout_accuracy=clamp01(payout_accuracy), | |
| efficiency_score=clamp01(efficiency_score), | |
| consistency_score=clamp01(consistency_score), | |
| evidence_quality_score=evidence_quality_score, | |
| calibration_score=clamp01(calibration_score), | |
| exploit_penalty=round(exploit_penalty, 4), | |
| penalty=round(penalty, 4), | |
| total=round(total, 4), | |
| ) | |