""" Task definitions for the Invoice Exception Handler environment. Each task defines a scenario with documents, simulator logic for every action type, and a grader that produces sub-scores in [0.0, 1.0]. This is the biggest file in the project — it contains all the business logic the environment needs. """ from __future__ import annotations import time from typing import Any, Dict, List, Optional, Tuple from .models import ( ActionType, CheckResult, ExceptionFlag, GoodsReceiptNote, InspectionResult, Invoice, LineItem, PurchaseOrder, QueryResult, SupplierMaster, ) # --------------------------------------------------------------------------- # EpisodeData — mutable state for one episode # --------------------------------------------------------------------------- class EpisodeData: """Tracks the full history of one episode for grading and state building.""" def __init__(self) -> None: self.inspections: List[InspectionResult] = [] self.checks: List[CheckResult] = [] self.queries: List[QueryResult] = [] self.rules_applied: List[str] = [] self.decision: Optional[str] = None self.decision_reason: Optional[str] = None self.routed_to: List[str] = [] self.closed: bool = False self.close_summary: Optional[str] = None self.step_count: int = 0 self.cumulative_reward: float = 0.0 def has_inspected(self, doc: str, field: str) -> bool: """Check if we already looked at this field in this document.""" return any(i.document == doc and i.field == field for i in self.inspections) def has_checked(self, name: str) -> bool: """Check if this validation check has already been run.""" return any(c.check_name == name for c in self.checks) def has_queried(self, target: str) -> bool: """Check if we already queried this person or department.""" return any(q.target == target for q in self.queries) # --------------------------------------------------------------------------- # BaseTask — abstract interface # --------------------------------------------------------------------------- class BaseTask: """Abstract base that all task classes inherit from.""" task_id: str = "base" max_steps: int = 20 difficulty: str = "easy" def get_purchase_order(self) -> PurchaseOrder: raise NotImplementedError def get_invoice(self) -> Invoice: raise NotImplementedError def get_grn(self) -> GoodsReceiptNote: raise NotImplementedError def get_supplier_master(self) -> SupplierMaster: raise NotImplementedError def get_exception_flag(self) -> ExceptionFlag: raise NotImplementedError def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]: raise NotImplementedError def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]: raise NotImplementedError def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]: raise NotImplementedError def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]: raise NotImplementedError def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]: raise NotImplementedError def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]: raise NotImplementedError def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float: raise NotImplementedError def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float: raise NotImplementedError def simulate_close(self, summary: str, ep: EpisodeData) -> float: raise NotImplementedError def grade(self, ep: EpisodeData) -> Dict[str, float]: raise NotImplementedError @property def available_checks(self) -> List[str]: return [] @property def available_rules(self) -> List[str]: return [] @property def knowledge_base(self) -> List[str]: return [] # --------------------------------------------------------------------------- # Task 1 — Price Variance Exception (Easy) # --------------------------------------------------------------------------- class PriceVarianceTask(BaseTask): """ Office stationery invoice arrives 3.08% above the PO. Company tolerance is +/-2% auto-approval. Supplier had verbal approval from procurement for the price increase but the PO was never updated. Optimal path: check tolerance -> cross-check prices -> verify GRN -> query supplier -> query procurement -> apply exception rule -> approve -> route to procurement for PO amendment -> close. """ task_id = "task1_price_variance" max_steps = 18 difficulty = "easy" def get_purchase_order(self) -> PurchaseOrder: return PurchaseOrder( po_number="PO-2024-1041", vendor_name="OfficeNeed Supplies", po_date="2024-02-15", line_items=[ LineItem(description="A4 Paper", quantity=100, unit_price=220.0, total=22000.0, tax_rate=18.0), LineItem(description="Ballpoint Pens", quantity=20, unit_price=450.0, total=9000.0, tax_rate=18.0), LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0), ], total_amount=50000.0, payment_terms="Net-30", ) def get_invoice(self) -> Invoice: return Invoice( invoice_number="INV-ON-8821", supplier_name="OfficeNeed Supplies", invoice_date="2024-03-05", due_date="2024-04-04", po_reference="PO-2024-1041", line_items=[ LineItem(description="A4 Paper", quantity=100, unit_price=231.0, total=23100.0, tax_rate=18.0), LineItem(description="Ballpoint Pens", quantity=20, unit_price=472.0, total=9440.0, tax_rate=18.0), LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0), ], subtotal=51540.0, tax_amount=9277.20, tax_rate=18.0, total_amount=60817.20, bank_account="9876543210", bank_name="HDFC Bank", ifsc_code="HDFC0001234", supplier_gstin="29AABCO1234F1Z5", supplier_email="accounts@officeneed.com", ) def get_grn(self) -> GoodsReceiptNote: return GoodsReceiptNote( grn_number="GRN-2024-0892", po_reference="PO-2024-1041", receipt_date="2024-03-01", items_received=[ {"description": "A4 Paper", "quantity_received": 100, "quantity_pending": 0, "quantity_rejected": 0}, {"description": "Ballpoint Pens", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0}, {"description": "Staplers", "quantity_received": 10, "quantity_pending": 0, "quantity_rejected": 0}, ], receiving_officer="Ramesh Kumar", notes="All items received in good condition.", ) def get_supplier_master(self) -> SupplierMaster: return SupplierMaster( supplier_id="SUP-0441", supplier_name="OfficeNeed Supplies", registered_address="45 MG Road, Bengaluru 560001", gstin="29AABCO1234F1Z5", bank_account="9876543210", bank_name="HDFC Bank", ifsc_code="HDFC0001234", contact_email="sales@officeneed.com", contact_phone="+91-80-4567-8901", registered_domain="officeneed.com", pan_number="AABCO1234F", status="active", ) def get_exception_flag(self) -> ExceptionFlag: return ExceptionFlag( flag_code="PRICE_MISMATCH", flag_description=( "Invoice total ₹51,540 exceeds PO ₹50,000 by ₹1,540 (3.08%). " "Above auto-approval threshold." ), auto_hold=True, flagged_date="2024-03-06", severity="medium", ) @property def available_checks(self) -> List[str]: return ["tolerance_rule", "grn_match", "duplicate_detection", "bank_account_verification", "gst_verification", "po_match"] @property def available_rules(self) -> List[str]: return ["tolerance_2pct_auto_approve", "tolerance_exception_approval", "rejection_with_reason", "partial_approval"] @property def knowledge_base(self) -> List[str]: return [ "POL-001: Price variance ≤±2% may be auto-approved. Above 2% requires exception approval.", "POL-002: Exception approval requires confirmation from originating department.", "POL-003: Any approved invoice with a price change must be followed by a PO amendment request.", "POL-004: Bank account on invoice must match supplier master.", ] # --- Simulators --- def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]: """Return meaningful values for key fields, small reward for others.""" key_fields = { ("invoice", "line_items"): ("A4 Paper @₹231 (+5%), Pens @₹472 (+4.9%), Staplers @₹1900 (unchanged)", 0.10), ("invoice", "total_amount"): ("₹51,540 (subtotal) + ₹9,277.20 (GST 18%) = ₹60,817.20", 0.08), ("po", "line_items"): ("A4 Paper @₹220, Pens @₹450, Staplers @₹1900. Total: ₹50,000", 0.06), ("grn", "items_received"): ("All 3 items fully received. No pending, no rejected.", 0.05), ("invoice", "bank_account"): ("9876543210 — HDFC Bank, IFSC HDFC0001234", 0.02), ("invoice", "supplier_gstin"): ("29AABCO1234F1Z5", 0.02), } key = (document.lower(), field.lower()) value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01)) result = InspectionResult(document=document, field=field, value=value, note="") return result, reward def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]: """Cross-check a field between two documents.""" checks = { ("unit_price", "invoice", "po"): (False, "MISMATCH: A4 Paper ₹231 vs ₹220 (+5.0%), Pens ₹472 vs ₹450 (+4.9%). Staplers match.", 0.12), ("total_amount", "invoice", "po"): (False, "Invoice subtotal ₹51,540 vs PO ₹50,000. Variance: +₹1,540 (+3.08%).", 0.10), ("bank_account", "invoice", "supplier_master"): (True, "Bank account 9876543210 matches supplier master.", 0.03), ("gstin", "invoice", "supplier_master"): (True, "GSTIN 29AABCO1234F1Z5 matches supplier master.", 0.02), ("quantity", "invoice", "grn"): (True, "All quantities match: 100 reams, 20 boxes, 10 units.", 0.04), } key = (field.lower(), doc_a.lower(), doc_b.lower()) passed, detail, reward = checks.get(key, (True, f"No mismatch found for {field} between {doc_a} and {doc_b}.", 0.01)) result = CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail) return result, reward def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]: """Run a named validation check.""" checks = { "tolerance_rule": (False, "Price variance 3.08% exceeds ±2% auto-approval threshold. Manual exception approval required.", 0.14), "grn_match": (True, "All items fully received. GRN matches invoice quantities.", 0.06), "duplicate_detection": (True, "No duplicate invoice found in payment history.", 0.02), "bank_account_verification": (True, "Bank account matches supplier master record.", 0.02), "gst_verification": (True, "GSTIN matches supplier master. GST calculation correct.", 0.02), "po_match": (False, "PO match FAILED on unit prices: 2 of 3 line items have price variance.", 0.08), } passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed — no issues found.", 0.01)) result = CheckResult(check_name=check_name, passed=passed, detail=detail) return result, reward def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]: """Query the supplier — returns email explaining the price increase.""" response = ( "Dear Sir/Madam, due to a 12% increase in raw material costs effective January 2024, " "we revised prices for A4 Paper and Ballpoint Pens. This was communicated to Mr. Arjun Mehta " "in your Procurement team via email on Feb 20, 2024. He acknowledged and verbally approved " "the revised pricing. We can provide the email trail if needed. — OfficeNeed Supplies" ) result = QueryResult(target="supplier", question=question, response=response, channel=channel) return result, 0.10 def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]: """Query an internal department.""" if department.lower() == "procurement": response = ( "Hi, this is Arjun Mehta from Procurement. Yes, I received the price revision email " "from OfficeNeed on Feb 20. I verbally approved it as the increase was reasonable " "(raw material cost pass-through). I should have raised a PO amendment but it slipped. " "I'll raise the amendment today. Please go ahead and approve the invoice." ) return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.12 response = f"{department.title()} department: We don't have specific information about this invoice exception." return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03 def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]: """Apply a business rule.""" rules = { "tolerance_2pct_auto_approve": ("BLOCKED: Cannot auto-approve. Price variance 3.08% exceeds ±2% threshold.", -0.05), "tolerance_exception_approval": ("APPLIED: Exception approval pathway activated. Requires department confirmation (obtained from procurement).", 0.10), "rejection_with_reason": ("APPLIED: Rejection rule activated. Invoice will be returned to supplier.", -0.08), "partial_approval": ("NOT APPLICABLE: All items received in full. Partial approval not warranted.", -0.05), } detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found in policy database.", -0.03)) return detail, reward def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float: """Score the agent's decision based on evidence gathered.""" checks_run = {c.check_name for c in ep.checks} queries_to = {q.target for q in ep.queries} if decision == "approve": if "tolerance_rule" in checks_run and "procurement" in queries_to: return 0.25 elif "tolerance_rule" in checks_run: return 0.18 else: return 0.05 elif decision == "reject": return -0.10 elif decision == "hold": return 0.08 return 0.0 def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float: """Score routing decisions.""" routes = {"procurement": 0.12, "finance": 0.03, "legal": -0.05} return routes.get(team.lower(), 0.0) def simulate_close(self, summary: str, ep: EpisodeData) -> float: """Score case closure.""" checks_run = {c.check_name for c in ep.checks} if ep.decision == "approve" and "tolerance_rule" in checks_run and "procurement" in set(ep.routed_to): return 0.12 elif ep.decision is not None: return 0.06 return 0.0 def grade(self, ep: EpisodeData) -> Dict[str, float]: """Final grader producing sub-scores.""" checks_run = {c.check_name for c in ep.checks} queries_to = {q.target for q in ep.queries} # Diagnosis d = 0.0 if any("unit_price" in c.check_name or "total" in c.check_name for c in ep.checks): d += 0.12 if "tolerance_rule" in checks_run: d += 0.14 if "grn_match" in checks_run: d += 0.06 # Investigation i = 0.0 if "supplier" in queries_to: i += 0.10 if "procurement" in queries_to: i += 0.12 if "tolerance_exception_approval" in ep.rules_applied: i += 0.08 # Decision dec = 0.0 if ep.decision == "approve": dec += 0.18 elif ep.decision == "hold": dec += 0.06 elif ep.decision == "reject": dec -= 0.10 # Routing route = 0.12 if "procurement" in ep.routed_to else 0.0 # Closure closure = 0.08 if ep.closed else 0.0 # Efficiency eff = max(0.0, 0.06 - 0.004 * max(0, ep.step_count - 9)) total = d + i + dec + route + closure + eff return { "score": round(max(0.0, min(1.0, total)), 4), "diagnosis_score": round(d, 4), "investigation_score": round(i, 4), "decision_score": round(dec, 4), "routing_score": round(route, 4), "closure_score": round(closure, 4), "efficiency_score": round(eff, 4), } # --------------------------------------------------------------------------- # Task 2 — Duplicate Invoice with Hidden Tax Error (Medium) # --------------------------------------------------------------------------- class DuplicateTaxErrorTask(BaseTask): """ Logistics supplier submits INV-2024-891 which is a duplicate of already-paid INV-2024-819 (digit transposition). The original invoice applied 15% GST (wrong), correct rate is 18%. Company overpaid ₹3,240. The new invoice has the correct rate. It's both a duplicate AND a legitimate correction. """ task_id = "task2_duplicate_tax" max_steps = 20 difficulty = "medium" def get_purchase_order(self) -> PurchaseOrder: return PurchaseOrder( po_number="PO-2024-0778", vendor_name="FastMove Logistics", po_date="2024-01-25", line_items=[ LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0), LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0), ], total_amount=108000.0, payment_terms="Net-15", ) def get_invoice(self) -> Invoice: return Invoice( invoice_number="INV-2024-891", supplier_name="FastMove Logistics", invoice_date="2024-03-12", due_date="2024-03-27", po_reference="PO-2024-0778", line_items=[ LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0), LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0), ], subtotal=108000.0, tax_amount=19440.0, tax_rate=18.0, total_amount=127440.0, bank_account="1122334455", bank_name="ICICI Bank", ifsc_code="ICIC0005678", supplier_gstin="27AABCF5678G1Z3", supplier_email="billing@fastmove.in", ) def get_grn(self) -> GoodsReceiptNote: return GoodsReceiptNote( grn_number="GRN-2024-0740", po_reference="PO-2024-0778", receipt_date="2024-02-28", items_received=[ {"description": "Mumbai-Pune Transport", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True}, {"description": "Warehousing charges Feb 2024", "quantity_received": 1, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True}, ], receiving_officer="Priya Sharma", notes="All transport trips completed. Warehousing service confirmed for February.", ) def get_supplier_master(self) -> SupplierMaster: return SupplierMaster( supplier_id="SUP-0229", supplier_name="FastMove Logistics", registered_address="12 Logistics Park, Navi Mumbai 400710", gstin="27AABCF5678G1Z3", bank_account="1122334455", bank_name="ICICI Bank", ifsc_code="ICIC0005678", contact_email="accounts@fastmove.in", contact_phone="+91-22-3456-7890", registered_domain="fastmove.in", pan_number="AABCF5678G", status="active", ) def get_exception_flag(self) -> ExceptionFlag: return ExceptionFlag( flag_code="POSSIBLE_DUPLICATE", flag_description="Invoice INV-2024-891 closely matches previously processed invoice INV-2024-819. Possible duplicate submission.", auto_hold=True, flagged_date="2024-03-13", severity="high", ) @property def available_checks(self) -> List[str]: return ["duplicate_detection", "tax_calculation_verify", "grn_match", "bank_account_verification", "gst_verification", "po_match"] @property def available_rules(self) -> List[str]: return ["partial_approval", "credit_note_request", "full_rejection", "duplicate_block", "tax_correction"] @property def knowledge_base(self) -> List[str]: return [ "POL-005: Duplicate invoices must be rejected unless they represent a legitimate correction.", "POL-006: Tax calculation errors on paid invoices require a credit note and correction entry.", "POL-007: Partial approval may be used when only a portion of the invoice amount is valid.", "POL-008: Any tax correction must be documented with the original invoice reference.", ] def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]: key_fields = { ("invoice", "invoice_number"): ("INV-2024-891 — note digit transposition vs INV-2024-819 (891 vs 819)", 0.10), ("invoice", "tax_amount"): ("₹19,440 (18% GST on ₹1,08,000) — this is the CORRECT rate", 0.08), ("invoice", "total_amount"): ("₹1,27,440 (subtotal ₹1,08,000 + 18% GST ₹19,440)", 0.05), ("invoice", "line_items"): ("Transport 20×₹4,500 = ₹90,000 + Warehousing ₹18,000 = ₹1,08,000", 0.04), } key = (document.lower(), field.lower()) value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01)) return InspectionResult(document=document, field=field, value=value, note=""), reward def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]: checks = { ("invoice_number", "invoice", "payment_history"): (False, "MATCH FOUND: INV-2024-819 paid 12 days ago for ₹1,24,200. Digit transposition: 891 vs 819.", 0.15), ("tax_amount", "invoice", "payment_history"): (False, "TAX DISCREPANCY: Original INV-2024-819 had 15% GST (₹16,200). Current INV-2024-891 has 18% GST (₹19,440). Delta: ₹3,240.", 0.14), ("total_amount", "invoice", "po"): (True, "Invoice subtotal ₹1,08,000 matches PO total ₹1,08,000.", 0.03), ("bank_account", "invoice", "supplier_master"): (True, "Bank account matches supplier master.", 0.02), } key = (field.lower(), doc_a.lower(), doc_b.lower()) passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01)) return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]: checks = { "duplicate_detection": (False, "DUPLICATE FOUND: INV-2024-891 matches INV-2024-819 (paid 12 days ago, ₹1,24,200). Invoice numbers differ by digit transposition (891 vs 819).", 0.18), "tax_calculation_verify": (False, "TAX ERROR on ORIGINAL: INV-2024-819 applied 15% GST (₹16,200) instead of correct 18% (₹19,440). Company overpaid ₹3,240 in tax on already-paid invoice.", 0.16), "grn_match": (True, "Services fully confirmed. GRN matches invoice.", 0.04), "bank_account_verification": (True, "Bank account matches supplier master.", 0.02), "gst_verification": (True, "GSTIN matches supplier master.", 0.02), "po_match": (True, "PO amounts and line items match current invoice.", 0.03), } passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01)) return CheckResult(check_name=check_name, passed=passed, detail=detail), reward def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]: response = ( "We are aware that INV-2024-819 was submitted with incorrect 15% GST. The correct rate " "is 18%. INV-2024-891 is a corrected resubmission. We request partial approval for the " "₹3,240 tax differential only, not the full invoice amount. We will issue a credit note " "for the remaining amount." ) return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.10 def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]: if department.lower() == "finance": response = ( "Confirmed: INV-2024-819 was paid on March 1 for ₹1,24,200 (₹1,08,000 + 15% GST of " "₹16,200). The correct GST rate for logistics services is 18%. We overpaid — the " "correct total should have been ₹1,27,440. The tax differential is ₹3,240. This " "can be corrected via partial approval of the new invoice for ₹3,240 only." ) return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.12 response = f"{department.title()}: No specific information available." return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03 def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]: rules = { "partial_approval": ("APPLIED: Partial approval for ₹3,240 (tax correction delta). Main invoice amount blocked as duplicate.", 0.12), "credit_note_request": ("APPLIED: Credit note requested from supplier for balance amount. Reference: INV-2024-819 tax correction.", 0.10), "full_rejection": ("APPLIED: Full rejection. Invoice returned to supplier.", -0.05), "duplicate_block": ("APPLIED: Duplicate block activated. Full payment prevented.", 0.04), "tax_correction": ("APPLIED: Tax correction entry created referencing original INV-2024-819.", 0.08), } detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found.", -0.03)) return detail, reward def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float: checks_run = {c.check_name for c in ep.checks} dup_found = "duplicate_detection" in checks_run tax_found = "tax_calculation_verify" in checks_run if decision == "partial_approve": if dup_found and tax_found: return 0.28 elif dup_found: return 0.14 return 0.06 elif decision == "reject": if dup_found: return 0.08 return 0.02 elif decision == "approve": return -0.15 elif decision == "hold": return 0.06 return 0.0 def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float: routes = {"finance": 0.08, "procurement": 0.03, "legal": 0.02} return routes.get(team.lower(), 0.0) def simulate_close(self, summary: str, ep: EpisodeData) -> float: if ep.decision == "partial_approve" and ep.closed is False: return 0.06 elif ep.decision is not None: return 0.03 return 0.0 def grade(self, ep: EpisodeData) -> Dict[str, float]: checks_run = {c.check_name for c in ep.checks} queries_to = {q.target for q in ep.queries} # Diagnosis (max 0.30) d = 0.0 if "duplicate_detection" in checks_run: d += 0.16 if "tax_calculation_verify" in checks_run: d += 0.14 # Investigation (max 0.32) i = 0.0 if "finance" in queries_to: i += 0.12 if "supplier" in queries_to: i += 0.10 if "partial_approval" in ep.rules_applied: i += 0.06 if "credit_note_request" in ep.rules_applied: i += 0.04 # Decision (max 0.20) dec = 0.0 if ep.decision == "partial_approve": dec = 0.20 elif ep.decision == "reject": dec = 0.05 elif ep.decision == "approve": dec = -0.15 elif ep.decision == "hold": dec = 0.04 # Routing (max 0.08) route = 0.08 if "finance" in ep.routed_to else 0.0 # Closure (max 0.06) closure = 0.06 if ep.closed else 0.0 # Efficiency eff = max(0.0, 0.04 - 0.003 * max(0, ep.step_count - 10)) total = d + i + dec + route + closure + eff return { "score": round(max(0.0, min(1.0, total)), 4), "diagnosis_score": round(d, 4), "investigation_score": round(i, 4), "decision_score": round(dec, 4), "routing_score": round(route, 4), "closure_score": round(closure, 4), "efficiency_score": round(eff, 4), } # --------------------------------------------------------------------------- # Task 3 — Compound Fraud Signals (Hard) # --------------------------------------------------------------------------- class CompoundFraudTask(BaseTask): """ IT supplier submits ₹8,47,500 invoice for 15 laptops. System flags a bank account change. But there are FOUR simultaneous fraud signals: bank BEC, GSTIN mismatch, quantity mismatch (13 vs 15), and price inflation (8.65%). Critical trap: querying supplier via email contacts the fraudster (-0.15). Must use phone to reach real supplier (+0.15). """ task_id = "task3_compound_fraud" max_steps = 25 difficulty = "hard" def get_purchase_order(self) -> PurchaseOrder: return PurchaseOrder( po_number="PO-2024-0955", vendor_name="TechCore Solutions", po_date="2024-03-08", line_items=[ LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=52000.0, total=780000.0, tax_rate=18.0), ], total_amount=780000.0, payment_terms="Net-30", ) def get_invoice(self) -> Invoice: return Invoice( invoice_number="INV-TC-2024-0312", supplier_name="TechCore Solutions", invoice_date="2024-03-10", due_date="2024-04-09", po_reference="PO-2024-0955", line_items=[ LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=56500.0, total=847500.0, tax_rate=18.0), ], subtotal=847500.0, tax_amount=152550.0, tax_rate=18.0, total_amount=1000050.0, bank_account="5566778899", bank_name="Yes Bank", ifsc_code="YESB0000999", supplier_gstin="07AABCT9999X1Z8", supplier_email="accounts@techcore-solutions.com", ) def get_grn(self) -> GoodsReceiptNote: return GoodsReceiptNote( grn_number="GRN-2024-0901", po_reference="PO-2024-0955", receipt_date="2024-03-15", items_received=[ {"description": "Business Laptop (14-inch, i7, 16GB)", "quantity_received": 13, "quantity_pending": 2, "quantity_rejected": 0}, ], receiving_officer="Vikram Singh", notes="13 of 15 laptops received. 2 units still in transit.", ) def get_supplier_master(self) -> SupplierMaster: return SupplierMaster( supplier_id="SUP-0187", supplier_name="TechCore Solutions", registered_address="88 Tech Park, Sector 62, Noida 201301", gstin="07AABCT1234Y1Z5", bank_account="1234567890", bank_name="State Bank of India", ifsc_code="SBIN0001234", contact_email="sales@techcore-solutions.in", contact_phone="+91-120-456-7890", registered_domain="techcore-solutions.in", pan_number="AABCT1234Y", status="active", ) def get_exception_flag(self) -> ExceptionFlag: return ExceptionFlag( flag_code="BANK_ACCOUNT_CHANGE", flag_description=( "Invoice bank account (5566778899, Yes Bank) does not match supplier master " "(1234567890, SBI). Bank account change request received from " "accounts@techcore-solutions.com." ), auto_hold=True, flagged_date="2024-03-16", severity="critical", ) @property def available_checks(self) -> List[str]: return ["bank_account_verification", "gst_verification", "grn_match", "email_domain_verification", "invoice_date_validation", "quantity_check", "price_check", "duplicate_detection", "po_match"] @property def available_rules(self) -> List[str]: return ["fraud_hold", "rejection_with_reason", "bank_change_verification", "escalate_to_security"] @property def knowledge_base(self) -> List[str]: return [ "POL-004: Bank account on invoice must match supplier master.", "POL-009: Bank account change must be verified via registered phone number — NEVER via email.", "POL-010: GSTIN on invoice must match supplier master. Mismatch is a fraud indicator.", "POL-011: Invoice quantities must not exceed GRN quantities.", "POL-012: Any suspected fraud must be escalated to Legal and Security teams.", "POL-013: Do not process payment while fraud investigation is pending.", ] def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]: key_fields = { ("invoice", "bank_account"): ("5566778899 (Yes Bank) — DOES NOT MATCH supplier master (1234567890, SBI)", 0.12), ("invoice", "supplier_gstin"): ("07AABCT9999X1Z8 — DOES NOT MATCH supplier master (07AABCT1234Y1Z5)", 0.10), ("invoice", "supplier_email"): ("accounts@techcore-solutions.com — domain is .com, registered domain is .in", 0.08), ("grn", "items_received"): ("13 of 15 laptops received. 2 pending delivery.", 0.08), ("invoice", "line_items"): ("15 laptops @ ₹56,500 = ₹8,47,500. PO price was ₹52,000/unit.", 0.06), ("invoice", "invoice_date"): ("2024-03-10 (Sunday) — unusual for B2B invoicing", 0.04), ("invoice", "total_amount"): ("₹10,00,050 (₹8,47,500 + 18% GST ₹1,52,550)", 0.03), } key = (document.lower(), field.lower()) value, reward = key_fields.get(key, (f"{document}.{field} — value noted", 0.01)) return InspectionResult(document=document, field=field, value=value, note=""), reward def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]: checks = { ("bank_account", "invoice", "supplier_master"): (False, "MISMATCH: Invoice has 5566778899 (Yes Bank). Supplier master has 1234567890 (SBI). Change request from lookalike domain.", 0.14), ("gstin", "invoice", "supplier_master"): (False, "MISMATCH: Invoice GSTIN 07AABCT9999X1Z8 belongs to 'TechCore Trading Pvt Ltd' (different entity). Supplier master: 07AABCT1234Y1Z5.", 0.14), ("quantity", "invoice", "grn"): (False, "MISMATCH: Invoice claims 15 units. GRN shows only 13 received, 2 pending.", 0.10), ("unit_price", "invoice", "po"): (False, "MISMATCH: Invoice ₹56,500/unit vs PO ₹52,000/unit. Variance: +8.65%. No approved revision.", 0.08), } key = (field.lower(), doc_a.lower(), doc_b.lower()) passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01)) return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]: checks = { "bank_account_verification": (False, "FAILED: Bank account mismatch. Change request from techcore-solutions.com (lookalike of registered .in domain). Suspected BEC attack.", 0.18), "gst_verification": (False, "FAILED: GSTIN 07AABCT9999X1Z8 registered to 'TechCore Trading Pvt Ltd' in Delhi — a DIFFERENT entity. Supplier master shows 07AABCT1234Y1Z5 for 'TechCore Solutions'.", 0.18), "grn_match": (False, "FAILED: Invoice claims 15 laptops. GRN confirms only 13 received. 2 units still in transit.", 0.14), "email_domain_verification": (False, "FAILED: Invoice email domain techcore-solutions.com does not match registered domain techcore-solutions.in. Lookalike domain — possible BEC.", 0.16), "invoice_date_validation": (False, "WARNING: Invoice dated 2024-03-10 is a Sunday. Unusual for B2B invoicing.", 0.08), "quantity_check": (False, "FAILED: Invoiced quantity (15) exceeds received quantity (13). 2 units undelivered.", 0.12), "price_check": (False, "FAILED: Unit price ₹56,500 is 8.65% above PO price ₹52,000. No price revision approved.", 0.10), "duplicate_detection": (True, "No duplicate invoice found.", 0.02), "po_match": (False, "FAILED: Multiple mismatches — GSTIN, quantity, and unit price all differ from PO/supplier master.", 0.08), } passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01)) return CheckResult(check_name=check_name, passed=passed, detail=detail), reward def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]: """Critical trap: email contacts the fraudster, phone reaches real supplier.""" if channel.lower() in ("email", "mail"): response = ( "Dear Customer, thank you for reaching out. We recently changed our banking details " "due to a corporate restructuring. The new account (5566778899, Yes Bank) is correct. " "Please process the payment at your earliest convenience. We can provide updated " "KYC documents if needed. — TechCore Solutions Finance Team" ) return QueryResult(target="supplier", question=question, response=response, channel=channel), -0.15 else: response = ( "This is Rajesh from TechCore Solutions. We have NOT sent any bank account change " "request. Our bank account is still 1234567890 with SBI. We also have not sent any " "invoice from techcore-solutions.com — our domain is techcore-solutions.in. This " "looks like a fraud attempt. Please do NOT process payment to the new account. " "We will file a complaint with our legal team." ) return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.15 def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]: dept = department.lower() if dept == "security": response = ( "Security Team: We will initiate a BEC investigation. The lookalike domain " "techcore-solutions.com was registered 3 days before the invoice date. This is " "a classic Business Email Compromise pattern. Do NOT process any payment. " "We are preserving email headers for forensic analysis." ) return QueryResult(target="security", question=question, response=response, channel="internal"), 0.10 elif dept == "legal": response = ( "Legal Team: Based on the fraud indicators you've documented, we recommend: " "1) Immediate payment block, 2) Formal complaint to cybercrime authorities, " "3) Supplier audit of TechCore Solutions, 4) Review of all recent invoices " "from this supplier." ) return QueryResult(target="legal", question=question, response=response, channel="internal"), 0.08 elif dept == "finance": response = "Finance: Payment has been blocked pending investigation. No funds released." return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.04 elif dept == "procurement": response = "Procurement: PO-2024-0955 was raised on March 8. Standard 2-day processing for IT equipment." return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.03 response = f"{department.title()}: No specific information available." return QueryResult(target=dept, question=question, response=response, channel="internal"), 0.02 def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]: rules = { "fraud_hold": ("APPLIED: Fraud hold activated. All payments to this supplier frozen pending investigation.", 0.10), "rejection_with_reason": ("APPLIED: Invoice rejected with documented fraud signals.", 0.06), "bank_change_verification": ("APPLIED: Bank change verification protocol activated. Phone verification required per POL-009.", 0.08), "escalate_to_security": ("APPLIED: Case escalated to Information Security team for BEC investigation.", 0.08), } detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not applicable.", -0.03)) return detail, reward def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float: checks_run = {c.check_name for c in ep.checks} signals = sum([ "bank_account_verification" in checks_run, "gst_verification" in checks_run, "grn_match" in checks_run, "email_domain_verification" in checks_run, ]) if decision == "reject": return 0.10 + 0.05 * signals elif decision == "approve": return -0.40 elif decision == "partial_approve": return -0.20 elif decision == "hold": return 0.08 + 0.03 * signals return 0.0 def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float: routes = {"legal": 0.14, "security": 0.12, "finance": 0.08, "procurement": 0.06} return routes.get(team.lower(), 0.0) def simulate_close(self, summary: str, ep: EpisodeData) -> float: if ep.closed is False and ep.decision == "reject": return 0.06 return 0.0 def grade(self, ep: EpisodeData) -> Dict[str, float]: checks_run = {c.check_name for c in ep.checks} bank_found = "bank_account_verification" in checks_run gst_found = "gst_verification" in checks_run qty_found = "grn_match" in checks_run domain_found = "email_domain_verification" in checks_run price_found = "price_check" in checks_run # Diagnosis d = ((0.12 if bank_found else 0) + (0.12 if gst_found else 0) + (0.10 if qty_found else 0) + (0.10 if domain_found else 0) + (0.06 if price_found else 0)) # Investigation — reward phone, penalise email i = 0.0 for q in ep.queries: if q.target == "supplier" and q.channel not in ("email", "mail"): i += 0.10 elif q.target == "supplier" and q.channel in ("email", "mail"): i -= 0.15 if "legal" in {q.target for q in ep.queries}: i += 0.06 if "security" in {q.target for q in ep.queries}: i += 0.06 # Decision signals = sum([bank_found, gst_found, qty_found, domain_found]) dec = 0.0 if ep.decision == "reject": dec = 0.08 + 0.03 * signals elif ep.decision == "approve": dec = -0.35 elif ep.decision == "partial_approve": dec = -0.15 elif ep.decision == "hold": dec = 0.06 # Routing routes = set(ep.routed_to) route = ((0.10 if "legal" in routes else 0) + (0.06 if "security" in routes else 0) + (0.04 if "finance" in routes else 0)) # Closure closure = 0.06 if (ep.closed and ep.decision == "reject") else 0.0 # Efficiency eff = max(0.0, 0.04 - 0.002 * max(0, ep.step_count - 12)) total = d + i + dec + route + closure + eff return { "score": round(max(0.0, min(1.0, total)), 4), "signals_found": sum([bank_found, gst_found, qty_found, domain_found, price_found]), "diagnosis_score": round(d, 4), "investigation_score": round(i, 4), "decision_score": round(dec, 4), "routing_score": round(route, 4), "closure_score": round(closure, 4), "efficiency_score": round(eff, 4), } # --------------------------------------------------------------------------- # Task Registry # --------------------------------------------------------------------------- TASK_REGISTRY: Dict[str, type] = { "task1_price_variance": PriceVarianceTask, "task2_duplicate_tax": DuplicateTaxErrorTask, "task3_compound_fraud": CompoundFraudTask, } ALL_TASKS = list(TASK_REGISTRY.keys()) def make_task(task_id: str) -> BaseTask: """Instantiate a task by its ID. Raises ValueError for unknown IDs.""" cls = TASK_REGISTRY.get(task_id) if cls is None: raise ValueError(f"Unknown task '{task_id}'. Available: {ALL_TASKS}") return cls()