debatefloor / app /tasks.py
AniketAsla's picture
sync: mirror git d05fcb5 to Space
b4ac377 verified
from __future__ import annotations
from copy import deepcopy
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from .models import InsuranceClaimReward
# Budget units consumed per action type. Final decisions are free.
ACTION_COSTS: Dict[str, int] = {
"validate_document": 1,
"request_information": 2,
"lookup_policy_history": 1,
"compare_documents": 1,
"flag_fraud_signal": 1,
"estimate_payout": 1,
"query_linked_claim": 1,
"verify_identity": 2,
"query_historical_data": 1,
"verify_provider_registration": 1,
"convene_debate_panel": 2, # multi-agent deliberation costs 2 budget units
"approve_claim": 0,
"deny_claim": 0,
"request_investigation": 0,
"escalate_to_human": 0,
}
@dataclass(frozen=True)
class TaskDefinition:
task_id: str
title: str
difficulty: str
max_steps: int
investigation_budget: int # soft budget; overage adds 0.02 penalty per unit
claim_id: str
claimant: Dict[str, Any]
incident: Dict[str, Any]
documents: List[Dict[str, Any]]
linked_claims: List[Dict[str, Any]]
expected_signals: List[str]
allowed_final_decisions: List[str]
payout_band: Optional[tuple[float, float]]
consistency_group_claim_ids: List[str]
policy_history: Dict[str, Any]
ground_truth_confidence: float
@dataclass
class RuntimeTask:
task_id: str
title: str
difficulty: str
max_steps: int
investigation_budget: int
claim_id: str
claimant: Dict[str, Any]
incident: Dict[str, Any]
documents: List[Dict[str, Any]]
linked_claims: List[Dict[str, Any]]
expected_signals: List[str]
allowed_final_decisions: List[str]
payout_band: Optional[tuple[float, float]]
consistency_group_claim_ids: List[str]
policy_history: Dict[str, Any]
ground_truth_confidence: float
variant_id: int
def _base_available_actions(task_id: str = "") -> List[str]:
actions = [
"validate_document",
"request_information",
"lookup_policy_history",
"compare_documents",
"flag_fraud_signal",
"estimate_payout",
"approve_claim",
"deny_claim",
"request_investigation",
]
if task_id in ("coordinated_fraud", "distribution_shift_claim"):
actions.append("query_linked_claim")
actions.append("query_historical_data")
actions.append("escalate_to_human")
if task_id == "distribution_shift_claim":
actions.append("verify_provider_registration")
if task_id == "identity_fraud":
actions.append("verify_identity")
if task_id != "clean_claim":
actions.append("convene_debate_panel")
return actions
TASKS: Dict[str, TaskDefinition] = {
"clean_claim": TaskDefinition(
task_id="clean_claim",
title="Clean auto claim with complete evidence",
difficulty="easy",
max_steps=8,
investigation_budget=8, # validate×3 + estimate + approve = 5; 3 units slack
claim_id="CLM-AUTO-001",
claimant={
"name": "Rajesh Verma",
"policy_number": "POL-AUTO-8821",
"contact": "+91-9810012345",
"claim_date": "2026-03-02",
},
incident={
"date": "2026-02-27",
"location": "Pune, Maharashtra",
"type": "auto_collision",
"description": "Rear-end collision at a traffic signal, bumper and tail-light damage.",
},
documents=[
{
"doc_id": "DOC-1",
"doc_type": "claim_form",
"content": "Claim submitted for rear-end collision on 2026-02-27.",
"metadata": {"incident_date": "2026-02-27", "declared_cost_inr": 51000},
},
{
"doc_id": "DOC-2",
"doc_type": "garage_estimate",
"content": "Repair estimate from authorized center.",
"metadata": {"estimate_inr": 50500, "garage": "Pune Auto Care"},
},
{
"doc_id": "DOC-3",
"doc_type": "police_report",
"content": "Minor collision report with matching date and location.",
"metadata": {"incident_date": "2026-02-27", "report_id": "PR-112"},
},
],
linked_claims=[],
expected_signals=[],
allowed_final_decisions=["approve_claim"],
payout_band=(45000, 55000),
consistency_group_claim_ids=[],
policy_history={
"prior_claims": [],
"years_as_customer": 6,
"policy_age_days": 2190,
"risk_score": "low",
"note": "Long-standing customer with no prior claims. Low risk.",
},
ground_truth_confidence=0.95,
),
"contradictory_claim": TaskDefinition(
task_id="contradictory_claim",
title="Medical claim with contradictory evidence",
difficulty="medium",
max_steps=12,
investigation_budget=12, # validate×4 + lookup + flag×4 + deny = 10; 2 units slack
claim_id="CLM-MED-017",
claimant={
"name": "Neha Kapoor",
"policy_number": "POL-HEALTH-2190",
"contact": "+91-9822211188",
"claim_date": "2026-03-05",
},
incident={
"date": "2026-02-16",
"location": "Ahmedabad, Gujarat",
"type": "medical_procedure",
"description": "Emergency appendectomy claim with post-op hospitalization.",
},
documents=[
{
"doc_id": "DOC-10",
"doc_type": "claim_form",
"content": "Claim incident date recorded as 2026-02-20.",
"metadata": {"incident_date": "2026-02-20", "claimed_cost_inr": 240000},
},
{
"doc_id": "DOC-11",
"doc_type": "hospital_admission",
"content": "Patient admitted on 2026-02-17 for emergency surgery.",
"metadata": {"admission_date": "2026-02-17", "procedure": "appendectomy"},
},
{
"doc_id": "DOC-12",
"doc_type": "billing_summary",
"content": "Total treatment cost billed: INR 240000.",
"metadata": {"claimed_cost_inr": 240000, "standard_rate_inr": 100000},
},
{
"doc_id": "DOC-13",
"doc_type": "discharge_summary",
"content": "Digitally scanned discharge summary.",
"metadata": {
"doctor_signature": "DR-XYZ-SIGN-ALPHA",
"clinic_reference_signature": "DR-XYZ-SIGN-BETA",
},
},
],
linked_claims=[],
expected_signals=[
"date_mismatch",
"cost_inflation",
"signature_mismatch",
"prior_similar_claim",
],
allowed_final_decisions=["deny_claim", "request_investigation"],
payout_band=None,
consistency_group_claim_ids=[],
policy_history={
"prior_claims": [
{
"claim_id": "CLM-MED-008",
"date": "2025-06-14",
"type": "medical_procedure",
"procedure": "appendectomy",
"paid_inr": 95000,
"note": "Claim paid for same procedure 8 months ago at a different hospital.",
}
],
"years_as_customer": 3,
"policy_age_days": 1095,
"risk_score": "medium",
"note": "Prior claim for identical procedure 8 months ago — statistical anomaly.",
},
ground_truth_confidence=0.80,
),
"coordinated_fraud": TaskDefinition(
task_id="coordinated_fraud",
title="Coordinated multi-claim fraud ring",
difficulty="hard",
max_steps=20,
investigation_budget=18, # validate×3 + query×4 + flag×5 + escalate = 15; 3 units slack
claim_id="CLM-GROUP-301",
claimant={
"name": "Primary: Arjun Saini",
"policy_number": "POL-MOTOR-9001",
"contact": "+91-9898001122",
"claim_date": "2026-03-09",
},
incident={
"date": "2026-03-01",
"location": "Jaipur, Rajasthan",
"type": "multi_vehicle_damage",
"description": "Three separate claims likely linked by staged repairs and copied narratives.",
},
documents=[
{
"doc_id": "DOC-21",
"doc_type": "primary_claim_packet",
"content": "Primary claim references repair at RapidFix Motors in Kota (340km away).",
"metadata": {"repair_shop": "RapidFix Motors", "distance_km": 340},
},
{
"doc_id": "DOC-22",
"doc_type": "narrative",
"content": "Accident description text is nearly identical to two linked claims.",
"metadata": {"template_similarity": 0.93},
},
{
"doc_id": "DOC-23",
"doc_type": "policy_timeline",
"content": "All related policies purchased within 30 days of incident.",
"metadata": {"days_since_purchase": [18, 24, 29]},
},
],
linked_claims=[
{
"claim_id": "CLM-GROUP-302",
"claimant": "Rohit Jain",
"contact": "+91-9898004455",
"emergency_contact": "+91-9000002222",
"repair_shop": "RapidFix Motors",
"accident_description": "A truck abruptly stopped causing chain collision near city bypass.",
"policy_purchase_date": "2026-02-06",
},
{
"claim_id": "CLM-GROUP-303",
"claimant": "Pooja Nair",
"contact": "+91-9845509988",
"emergency_contact": "+91-9000002222",
"repair_shop": "RapidFix Motors",
"accident_description": "A truck abruptly stopped causing chain collision near city bypass.",
"policy_purchase_date": "2026-02-11",
},
{
"claim_id": "CLM-GROUP-301",
"claimant": "Arjun Saini",
"contact": "+91-9898001122",
"emergency_contact": "+91-9000003333",
"repair_shop": "RapidFix Motors",
"accident_description": "A truck abruptly stopped causing chain collision near city bypass.",
"policy_purchase_date": "2026-02-02",
"broker_id": "BRK-441",
},
# 4th claim — hidden until agent queries 2 linked claims (dynamic ring expansion)
{
"claim_id": "CLM-GROUP-304",
"claimant": "Vikram Sharma",
"contact": "+91-9011009988",
"emergency_contact": "+91-9000002222",
"repair_shop": "RapidFix Motors",
"accident_description": "A truck abruptly stopped causing chain collision near city bypass.",
"policy_purchase_date": "2026-02-08",
"broker_id": "BRK-441",
"_hidden_until_queries": 2, # surfaced only after 2 existing claims are queried
},
],
expected_signals=[
"shared_repair_shop_far",
"shared_emergency_contact",
"near_identical_descriptions",
"recent_policy_cluster",
"clustered_policy_broker", # discoverable by querying the 4th linked claim
],
allowed_final_decisions=["request_investigation"],
payout_band=None,
consistency_group_claim_ids=["CLM-GROUP-301", "CLM-GROUP-302", "CLM-GROUP-303", "CLM-GROUP-304"],
policy_history={
"prior_claims": [],
"years_as_customer": 0,
"policy_age_days": 18,
"risk_score": "high",
"note": "Policy purchased only 18 days before incident. No claim history — all three claimants opened policies within 30 days of each other.",
},
ground_truth_confidence=0.90,
),
"distribution_shift_claim": TaskDefinition(
task_id="distribution_shift_claim",
title="Cross-claim coordinated ring with distribution shift",
difficulty="hard",
max_steps=28,
investigation_budget=20,
claim_id="CLM-DIST-601",
claimant={
"name": "Suresh Pillai",
"policy_number": "POL-MOTOR-5541",
"contact": "+91-9876543210",
"claim_date": "2026-03-15",
},
incident={
"date": "2026-03-08",
"location": "Bengaluru, Karnataka",
"type": "auto_collision",
"description": "Minor collision at junction. Claim appears routine on surface but cross-claim analysis reveals coordinated ring.",
},
documents=[
{
"doc_id": "DOC-41",
"doc_type": "claim_form",
"content": "Standard auto collision claim submitted on 2026-03-15 for incident on 2026-03-08.",
"metadata": {"incident_date": "2026-03-08", "declared_cost_inr": 85000},
},
{
"doc_id": "DOC-42",
"doc_type": "garage_estimate",
"content": "Repair estimate from FastRepair Hub, Whitefield.",
"metadata": {"estimate_inr": 84000, "garage": "FastRepair Hub"},
},
{
"doc_id": "DOC-43",
"doc_type": "police_report",
"content": "Minor collision report filed. No independent witnesses.",
"metadata": {"incident_date": "2026-03-08", "witnesses": 0},
},
],
linked_claims=[
{
"claim_id": "CLM-DIST-602",
"claimant": "Meera Iyer",
"contact": "+91-9845501234",
"emergency_contact": "+91-9000005555",
"repair_shop": "FastRepair Hub",
"accident_description": "Minor collision at junction. No injuries.",
"policy_purchase_date": "2026-02-12",
"broker_id": "BRK-882",
},
{
"claim_id": "CLM-DIST-603",
"claimant": "Ravi Shankar",
"contact": "+91-9741200099",
"emergency_contact": "+91-9000005555",
"repair_shop": "FastRepair Hub",
"accident_description": "Minor collision at junction. No injuries.",
"policy_purchase_date": "2026-02-18",
"broker_id": "BRK-882",
},
{
"claim_id": "CLM-DIST-604",
"claimant": "Deepa Nair",
"contact": "+91-9911200033",
"emergency_contact": "+91-9000005555",
"repair_shop": "FastRepair Hub",
"accident_description": "Minor collision at junction. No injuries.",
"policy_purchase_date": "2026-02-20",
"broker_id": "BRK-882",
"_hidden_until_queries": 2,
},
],
expected_signals=[
"shared_repair_shop_far",
"shared_emergency_contact",
"recent_policy_cluster",
"clustered_policy_broker",
"near_identical_descriptions",
],
allowed_final_decisions=["escalate_to_human", "request_investigation"],
payout_band=None,
consistency_group_claim_ids=["CLM-DIST-601", "CLM-DIST-602", "CLM-DIST-603", "CLM-DIST-604"],
policy_history={
"prior_claims": [],
"years_as_customer": 0,
"policy_age_days": 24,
"risk_score": "high",
"note": "Policy purchased 24 days before incident. All 3 linked claimants share broker BRK-882 and same repair shop. Cross-claim cluster detected in historical data.",
},
ground_truth_confidence=0.70,
),
"identity_fraud": TaskDefinition(
task_id="identity_fraud",
title="Ghost claimant identity fraud",
difficulty="hard",
max_steps=15,
investigation_budget=14, # verify(2)+lookup+validate×4+flag×4+deny = 11; 3 units slack
claim_id="CLM-ID-501",
claimant={
"name": "Aarav Mehta",
"policy_number": "POL-HEALTH-7734",
"contact": "+91-9711100045",
"claim_date": "2026-03-12",
"national_id": "XXXX-7821",
},
incident={
"date": "2026-03-07",
"location": "Mumbai, Maharashtra",
"type": "medical_procedure",
"description": "Knee replacement surgery claim with post-op physiotherapy.",
},
documents=[
{
"doc_id": "DOC-31",
"doc_type": "claim_form",
"content": "Claim submitted for knee replacement on 2026-03-07. National ID: XXXX-7821.",
"metadata": {
"incident_date": "2026-03-07",
"claimed_cost_inr": 320000,
"national_id_suffix": "7821",
},
},
{
"doc_id": "DOC-32",
"doc_type": "hospital_record",
"content": "Hospital system query: No patient named Aarav Mehta with DOB matching policy found. Record shows admission under a different name with similar ID.",
"metadata": {
"patient_found": False,
"name_on_record": "Aarav Kumar",
"dob_mismatch": True,
},
},
{
"doc_id": "DOC-33",
"doc_type": "policy_inception",
"content": "Policy POL-HEALTH-7734 issued on 2026-03-02. Incident date 2026-03-07 falls within the 30-day exclusion window.",
"metadata": {
"policy_issue_date": "2026-03-02",
"incident_date": "2026-03-07",
"days_to_claim": 5,
"exclusion_window_days": 30,
},
},
{
"doc_id": "DOC-34",
"doc_type": "id_proof",
"content": "Submitted ID proof shows date of birth 1988-04-15. Policy application on file states DOB 1986-11-22. The national registry has no record matching either entry for this ID number.",
"metadata": {
"dob_on_id": "1988-04-15",
"dob_on_policy": "1986-11-22",
"registry_match": False,
},
},
],
linked_claims=[],
expected_signals=[
"identity_mismatch",
"hospital_no_record",
"recent_policy_purchase",
"dob_inconsistency",
],
allowed_final_decisions=["deny_claim", "request_investigation"],
payout_band=None,
consistency_group_claim_ids=[],
policy_history={
"prior_claims": [],
"years_as_customer": 0,
"policy_age_days": 5,
"risk_score": "critical",
"note": "Policy opened only 5 days before incident. Claimant identity could not be verified at onboarding. KYC status: PENDING.",
},
ground_truth_confidence=0.90,
),
}
def get_task_definition(task_id: str) -> TaskDefinition:
if task_id not in TASKS:
raise ValueError(f"Unknown task_id '{task_id}'. Available: {list(TASKS)}")
return TASKS[task_id]
def list_tasks_summary() -> List[Dict[str, Any]]:
summaries: List[Dict[str, Any]] = []
for task in TASKS.values():
summaries.append(
{
"task_id": task.task_id,
"title": task.title,
"difficulty": task.difficulty,
"max_steps": task.max_steps,
"expected_decisions": task.allowed_final_decisions,
}
)
return summaries
def _copy_runtime_from_task(task: TaskDefinition, variant_id: int) -> RuntimeTask:
return RuntimeTask(
task_id=task.task_id,
title=task.title,
difficulty=task.difficulty,
max_steps=task.max_steps,
investigation_budget=task.investigation_budget,
claim_id=task.claim_id,
claimant=deepcopy(task.claimant),
incident=deepcopy(task.incident),
documents=deepcopy(task.documents),
linked_claims=deepcopy(task.linked_claims),
expected_signals=deepcopy(task.expected_signals),
allowed_final_decisions=deepcopy(task.allowed_final_decisions),
payout_band=deepcopy(task.payout_band),
consistency_group_claim_ids=deepcopy(task.consistency_group_claim_ids),
policy_history=deepcopy(task.policy_history),
ground_truth_confidence=task.ground_truth_confidence,
variant_id=variant_id,
)
def build_runtime_task(task_id: str, seed: Optional[int] = None) -> RuntimeTask:
task = get_task_definition(task_id)
variant_id = 0 if seed is None else abs(seed) % 5
runtime = _copy_runtime_from_task(task, variant_id)
if task_id == "clean_claim":
offsets = [-2000, -1000, 0, 1000, 2000]
offset = offsets[variant_id]
declared_cost = 51000 + offset
estimate = 50500 + offset
runtime.documents[0]["metadata"]["declared_cost_inr"] = declared_cost
runtime.documents[1]["metadata"]["estimate_inr"] = estimate
center = 50000 + offset
runtime.payout_band = (float(center - 5000), float(center + 5000))
elif task_id == "contradictory_claim":
admission_date_str = runtime.documents[1]["metadata"]["admission_date"]
admission_date = datetime.strptime(admission_date_str, "%Y-%m-%d")
date_gap_days = [3, 4, 2, 5, 3][variant_id]
incident_date = (admission_date + timedelta(days=date_gap_days)).strftime("%Y-%m-%d")
runtime.documents[0]["metadata"]["incident_date"] = incident_date
runtime.documents[0]["content"] = f"Claim incident date recorded as {incident_date}."
standard_rates = [100000, 105000, 95000, 110000, 98000]
standard_rate = standard_rates[variant_id]
claimed_cost = int(standard_rate * 2.4)
runtime.documents[0]["metadata"]["claimed_cost_inr"] = claimed_cost
runtime.documents[2]["metadata"]["claimed_cost_inr"] = claimed_cost
runtime.documents[2]["metadata"]["standard_rate_inr"] = standard_rate
runtime.documents[2]["content"] = f"Total treatment cost billed: INR {claimed_cost}."
elif task_id == "coordinated_fraud":
distances = [340, 360, 320, 380, 300]
distance = distances[variant_id]
runtime.documents[0]["metadata"]["distance_km"] = distance
runtime.documents[0]["content"] = (
f"Primary claim references repair at RapidFix Motors in Kota ({distance}km away)."
)
similarity = [0.93, 0.91, 0.95, 0.9, 0.94][variant_id]
runtime.documents[1]["metadata"]["template_similarity"] = similarity
purchase_sets = [
[18, 24, 29],
[12, 22, 27],
[9, 19, 28],
[16, 21, 26],
[14, 25, 30],
]
runtime.documents[2]["metadata"]["days_since_purchase"] = purchase_sets[variant_id]
elif task_id == "identity_fraud":
# Vary days_to_claim and policy inception date across variants
days_to_claim_variants = [5, 7, 3, 8, 6]
days_to_claim = days_to_claim_variants[variant_id]
runtime.documents[2]["metadata"]["days_to_claim"] = days_to_claim
runtime.documents[2]["content"] = (
f"Policy POL-HEALTH-7734 issued 2026-03-{12 - days_to_claim:02d}. "
f"Incident date 2026-03-07 falls within the 30-day exclusion window."
)
runtime.policy_history = deepcopy(task.policy_history)
runtime.policy_history["policy_age_days"] = days_to_claim
return runtime
def _stub_linked_claims(linked_claims: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Return only claim_id and claimant. Hidden claims (with _hidden_until_queries > 0)
are excluded from the initial list — they surface dynamically in the environment."""
return [
{"claim_id": c["claim_id"], "claimant": c["claimant"]}
for c in linked_claims
if "claim_id" in c and c.get("_hidden_until_queries", 0) == 0
]
def build_initial_payload(runtime_task: RuntimeTask) -> Dict[str, Any]:
if runtime_task.task_id == "coordinated_fraud":
linked_claims_visible = _stub_linked_claims(runtime_task.linked_claims)
else:
linked_claims_visible = deepcopy(runtime_task.linked_claims)
return {
"task_id": runtime_task.task_id,
"claim_id": runtime_task.claim_id,
"claimant": deepcopy(runtime_task.claimant),
"incident": deepcopy(runtime_task.incident),
"documents": deepcopy(runtime_task.documents),
"linked_claims": linked_claims_visible,
"_full_linked_claims": deepcopy(runtime_task.linked_claims),
"max_steps": runtime_task.max_steps,
"investigation_budget": runtime_task.investigation_budget,
"variant_id": runtime_task.variant_id,
"available_actions": _base_available_actions(runtime_task.task_id),
}
def get_evidence_keyword_hints(task_id: str, flag_id: str) -> List[str]:
hints: Dict[str, Dict[str, List[str]]] = {
"contradictory_claim": {
"date_mismatch": ["date", "admission", "mismatch", "incident"],
"cost_inflation": ["cost", "rate", "2.4", "inflation", "overbilled"],
"signature_mismatch": ["signature", "doctor", "clinic", "dr-xyz"],
"prior_similar_claim": ["prior", "previous", "history", "appendectomy", "procedure", "8 months", "clm-med-008"],
},
"coordinated_fraud": {
"shared_repair_shop_far": ["repair", "shop", "distance", "km", "kota", "rapidfix"],
"shared_emergency_contact": ["contact", "phone", "emergency", "shared", "9000002222"],
"near_identical_descriptions": ["identical", "description", "narrative", "template", "similarity"],
"recent_policy_cluster": ["policy", "purchase", "days", "cluster", "30"],
"clustered_policy_broker": ["broker", "brk-441", "same broker", "policy broker", "issued"],
},
"identity_fraud": {
"identity_mismatch": ["identity", "registry", "national", "id", "mismatch", "no record", "7821"],
"hospital_no_record": ["hospital", "record", "patient", "not found", "name", "admission"],
"recent_policy_purchase": ["policy", "days", "exclusion", "window", "inception", "5", "30"],
"dob_inconsistency": ["dob", "date of birth", "1988", "1986", "inconsistency", "mismatch"],
},
# NEW-7 fix: distribution_shift_claim previously had no entry, so the
# keyword check in flag_fraud_signal returned [] and any evidence
# passed (since "not hints or any(h in evidence_lc for h in hints)"
# short-circuits to True when hints is empty). Adding explicit
# keyword anchors enforces evidence grounding for this task too,
# symmetric to the other 4 tasks. Keywords are taken verbatim from
# the task data: FastRepair Hub Whitefield (DOC-42), shared
# +91-9000005555 contact, BRK-882 broker, identical "Minor collision
# at junction. No injuries." narrative across CLM-DIST-602/603/604,
# and policies purchased ~30 days before the incident date.
"distribution_shift_claim": {
"shared_repair_shop_far": ["repair", "shop", "fastrepair", "whitefield", "garage"],
"shared_emergency_contact": ["contact", "phone", "emergency", "9000005555", "shared"],
"recent_policy_cluster": ["policy", "purchase", "days", "cluster", "24", "30"],
"clustered_policy_broker": ["broker", "brk-882", "same broker", "policy broker"],
"near_identical_descriptions": ["identical", "description", "narrative", "template", "minor collision"],
},
}
return hints.get(task_id, {}).get(flag_id, [])
# Cross-document comparison signal mapping: (doc_a, doc_b) → signals discovered
COMPARE_DOCUMENT_SIGNALS: Dict[str, Dict[tuple, List[str]]] = {
"contradictory_claim": {
("DOC-10", "DOC-11"): ["date_mismatch"],
("DOC-11", "DOC-10"): ["date_mismatch"],
("DOC-10", "DOC-12"): ["cost_inflation"],
("DOC-12", "DOC-10"): ["cost_inflation"],
},
"coordinated_fraud": {
("DOC-21", "DOC-22"): ["near_identical_descriptions"],
("DOC-22", "DOC-21"): ["near_identical_descriptions"],
},
"identity_fraud": {
("DOC-31", "DOC-34"): ["dob_inconsistency"],
("DOC-34", "DOC-31"): ["dob_inconsistency"],
("DOC-32", "DOC-33"): ["hospital_no_record"],
("DOC-33", "DOC-32"): ["hospital_no_record"],
},
}
def get_compare_signals(task_id: str, doc_id_a: str, doc_id_b: str) -> List[str]:
return COMPARE_DOCUMENT_SIGNALS.get(task_id, {}).get((doc_id_a, doc_id_b), [])
def clamp01(value: float) -> float:
if value < 0.0:
return 0.0
if value > 1.0:
return 1.0
return value
def score_payout_accuracy(amount: Optional[float], payout_band: Optional[tuple[float, float]]) -> float:
if payout_band is None:
return 1.0 if amount is None else 0.0
if amount is None:
return 0.0
low, high = payout_band
if low <= amount <= high:
return 1.0
band_center = (low + high) / 2.0
tolerance = max((high - low) / 2.0, 1.0)
distance = abs(amount - band_center)
return clamp01(1.0 - (distance / (2.5 * tolerance)))
def score_calibration(agent_confidence: Optional[float], ground_truth_confidence: float) -> float:
"""Brier-style calibration score.
Returns 1 - (agent_confidence - ground_truth)^2, in [0, 1].
If agent did not provide a confidence, returns 0.0 (no bonus, no penalty).
"""
if agent_confidence is None:
return 0.0
agent_conf = clamp01(float(agent_confidence))
return clamp01(1.0 - (agent_conf - ground_truth_confidence) ** 2)
def score_consistency(
task_id: str,
raised_flags: List[str],
investigation_targets: List[str],
queried_claims: Optional[set] = None,
) -> float:
if task_id != "coordinated_fraud":
return 0.0
has_flags = len(raised_flags) > 0
targets = set(investigation_targets)
expected = set(get_task_definition(task_id).consistency_group_claim_ids)
if not has_flags:
return 1.0
if targets == expected:
return 1.0
if len(targets) == 0:
return 0.0
return 0.2
def compute_reward_breakdown(
task_id: str,
expected_signals: List[str],
found_signals: List[str],
false_flags: int,
step_number: int,
max_steps: int,
final_decision: Optional[str],
allowed_decisions: List[str],
payout_estimate_inr: Optional[float],
payout_band: Optional[tuple[float, float]],
investigation_targets: List[str],
evidence_quality_score: float,
exploit_penalty: float,
penalty_total: float,
queried_claims: Optional[set] = None,
agent_confidence: Optional[float] = None,
ground_truth_confidence: float = 1.0,
calibration_override: Optional[float] = None,
) -> InsuranceClaimReward:
expected = set(expected_signals)
found = set(found_signals)
# --- Fraud detection ---
if step_number == 0:
fraud_detection_score = 0.0
elif len(expected) == 0:
fraud_detection_score = 1.0 if len(found) == 0 else 0.0
else:
fraud_detection_score = clamp01(len(found.intersection(expected)) / float(len(expected)))
# --- Decision accuracy ---
if final_decision is None:
decision_accuracy = 0.0
else:
decision_accuracy = 1.0 if final_decision in allowed_decisions else 0.0
# --- Payout accuracy ---
if step_number == 0:
payout_accuracy = 0.0
elif payout_band is None:
# Non-payout tasks should not receive a free reward bump before a final decision.
payout_accuracy = 1.0 if final_decision is not None else 0.0
else:
payout_accuracy = score_payout_accuracy(payout_estimate_inr, payout_band)
# --- Efficiency ---
has_queried = queried_claims is not None and len(queried_claims) > 0
has_progress = len(found) > 0 or payout_estimate_inr is not None or has_queried
if has_progress or final_decision is not None:
efficiency_score = clamp01(1.0 - (max(step_number - 1, 0) / float(max_steps)))
else:
efficiency_score = 0.0
consistency_score = 0.0
if step_number > 0 and final_decision == "request_investigation":
consistency_score = score_consistency(task_id, found_signals, investigation_targets, queried_claims)
evidence_quality_score = clamp01(evidence_quality_score)
# --- Calibration: only scored when a final decision is made ---
if final_decision is not None:
if calibration_override is not None:
calibration_score = calibration_override
else:
calibration_score = score_calibration(agent_confidence, ground_truth_confidence)
else:
calibration_score = 0.0
exploit_penalty = max(exploit_penalty, 0.0)
false_flag_penalty = 0.25 * false_flags if task_id == "clean_claim" else 0.1 * false_flags
decision_penalty = 0.35 if (final_decision is not None and decision_accuracy == 0.0) else 0.0
partial_consistency_penalty = 0.2 if (task_id == "coordinated_fraud" and 0.0 < consistency_score < 1.0) else 0.0
query_skip_penalty = 0.0
if (
task_id == "coordinated_fraud"
and final_decision == "request_investigation"
and (queried_claims is None or len(queried_claims) < 2)
):
query_skip_penalty = 0.15
penalty = (
penalty_total
+ false_flag_penalty
+ decision_penalty
+ partial_consistency_penalty
+ query_skip_penalty
+ exploit_penalty
)
# Weights: sum = 1.00
# Reduced fraud/decision/evidence slightly to make room for calibration (0.08)
weighted = (
0.28 * fraud_detection_score
+ 0.20 * decision_accuracy
+ 0.11 * payout_accuracy
+ 0.10 * efficiency_score
+ 0.09 * consistency_score
+ 0.14 * evidence_quality_score
+ 0.08 * calibration_score
)
total = clamp01(weighted - penalty)
return InsuranceClaimReward(
fraud_detection_score=clamp01(fraud_detection_score),
decision_accuracy=clamp01(decision_accuracy),
payout_accuracy=clamp01(payout_accuracy),
efficiency_score=clamp01(efficiency_score),
consistency_score=clamp01(consistency_score),
evidence_quality_score=evidence_quality_score,
calibration_score=clamp01(calibration_score),
exploit_penalty=round(exploit_penalty, 4),
penalty=round(penalty, 4),
total=round(total, 4),
)