Spaces:
Sleeping
Sleeping
| """ | |
| Task definitions for the Invoice Exception Handler environment. | |
| Each task defines a scenario with documents, simulator logic for every action | |
| type, and a grader that produces sub-scores in [0.0, 1.0]. This is the biggest | |
| file in the project — it contains all the business logic the environment needs. | |
| """ | |
| from __future__ import annotations | |
| import time | |
| from typing import Any, Dict, List, Optional, Tuple | |
| from .models import ( | |
| ActionType, CheckResult, ExceptionFlag, GoodsReceiptNote, | |
| InspectionResult, Invoice, LineItem, PurchaseOrder, QueryResult, | |
| SupplierMaster, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # EpisodeData — mutable state for one episode | |
| # --------------------------------------------------------------------------- | |
| class EpisodeData: | |
| """Tracks the full history of one episode for grading and state building.""" | |
| def __init__(self) -> None: | |
| self.inspections: List[InspectionResult] = [] | |
| self.checks: List[CheckResult] = [] | |
| self.queries: List[QueryResult] = [] | |
| self.rules_applied: List[str] = [] | |
| self.decision: Optional[str] = None | |
| self.decision_reason: Optional[str] = None | |
| self.routed_to: List[str] = [] | |
| self.closed: bool = False | |
| self.close_summary: Optional[str] = None | |
| self.step_count: int = 0 | |
| self.cumulative_reward: float = 0.0 | |
| def has_inspected(self, doc: str, field: str) -> bool: | |
| """Check if we already looked at this field in this document.""" | |
| return any(i.document == doc and i.field == field for i in self.inspections) | |
| def has_checked(self, name: str) -> bool: | |
| """Check if this validation check has already been run.""" | |
| return any(c.check_name == name for c in self.checks) | |
| def has_queried(self, target: str) -> bool: | |
| """Check if we already queried this person or department.""" | |
| return any(q.target == target for q in self.queries) | |
| # --------------------------------------------------------------------------- | |
| # BaseTask — abstract interface | |
| # --------------------------------------------------------------------------- | |
| class BaseTask: | |
| """Abstract base that all task classes inherit from.""" | |
| task_id: str = "base" | |
| max_steps: int = 20 | |
| difficulty: str = "easy" | |
| def get_purchase_order(self) -> PurchaseOrder: | |
| raise NotImplementedError | |
| def get_invoice(self) -> Invoice: | |
| raise NotImplementedError | |
| def get_grn(self) -> GoodsReceiptNote: | |
| raise NotImplementedError | |
| def get_supplier_master(self) -> SupplierMaster: | |
| raise NotImplementedError | |
| def get_exception_flag(self) -> ExceptionFlag: | |
| raise NotImplementedError | |
| def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]: | |
| raise NotImplementedError | |
| def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]: | |
| raise NotImplementedError | |
| def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]: | |
| raise NotImplementedError | |
| def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]: | |
| raise NotImplementedError | |
| def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]: | |
| raise NotImplementedError | |
| def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]: | |
| raise NotImplementedError | |
| def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float: | |
| raise NotImplementedError | |
| def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float: | |
| raise NotImplementedError | |
| def simulate_close(self, summary: str, ep: EpisodeData) -> float: | |
| raise NotImplementedError | |
| def grade(self, ep: EpisodeData) -> Dict[str, float]: | |
| raise NotImplementedError | |
| def available_checks(self) -> List[str]: | |
| return [] | |
| def available_rules(self) -> List[str]: | |
| return [] | |
| def knowledge_base(self) -> List[str]: | |
| return [] | |
| # --------------------------------------------------------------------------- | |
| # Task 1 — Price Variance Exception (Easy) | |
| # --------------------------------------------------------------------------- | |
| class PriceVarianceTask(BaseTask): | |
| """ | |
| Office stationery invoice arrives 3.08% above the PO. | |
| Company tolerance is +/-2% auto-approval. Supplier had verbal approval | |
| from procurement for the price increase but the PO was never updated. | |
| Optimal path: check tolerance -> cross-check prices -> verify GRN -> | |
| query supplier -> query procurement -> apply exception rule -> approve -> | |
| route to procurement for PO amendment -> close. | |
| """ | |
| task_id = "task1_price_variance" | |
| max_steps = 18 | |
| difficulty = "easy" | |
| def get_purchase_order(self) -> PurchaseOrder: | |
| return PurchaseOrder( | |
| po_number="PO-2024-1041", | |
| vendor_name="OfficeNeed Supplies", | |
| po_date="2024-02-15", | |
| line_items=[ | |
| LineItem(description="A4 Paper", quantity=100, unit_price=220.0, total=22000.0, tax_rate=18.0), | |
| LineItem(description="Ballpoint Pens", quantity=20, unit_price=450.0, total=9000.0, tax_rate=18.0), | |
| LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0), | |
| ], | |
| total_amount=50000.0, | |
| payment_terms="Net-30", | |
| ) | |
| def get_invoice(self) -> Invoice: | |
| return Invoice( | |
| invoice_number="INV-ON-8821", | |
| supplier_name="OfficeNeed Supplies", | |
| invoice_date="2024-03-05", | |
| due_date="2024-04-04", | |
| po_reference="PO-2024-1041", | |
| line_items=[ | |
| LineItem(description="A4 Paper", quantity=100, unit_price=231.0, total=23100.0, tax_rate=18.0), | |
| LineItem(description="Ballpoint Pens", quantity=20, unit_price=472.0, total=9440.0, tax_rate=18.0), | |
| LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0), | |
| ], | |
| subtotal=51540.0, | |
| tax_amount=9277.20, | |
| tax_rate=18.0, | |
| total_amount=60817.20, | |
| bank_account="9876543210", | |
| bank_name="HDFC Bank", | |
| ifsc_code="HDFC0001234", | |
| supplier_gstin="29AABCO1234F1Z5", | |
| supplier_email="accounts@officeneed.com", | |
| ) | |
| def get_grn(self) -> GoodsReceiptNote: | |
| return GoodsReceiptNote( | |
| grn_number="GRN-2024-0892", | |
| po_reference="PO-2024-1041", | |
| receipt_date="2024-03-01", | |
| items_received=[ | |
| {"description": "A4 Paper", "quantity_received": 100, "quantity_pending": 0, "quantity_rejected": 0}, | |
| {"description": "Ballpoint Pens", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0}, | |
| {"description": "Staplers", "quantity_received": 10, "quantity_pending": 0, "quantity_rejected": 0}, | |
| ], | |
| receiving_officer="Ramesh Kumar", | |
| notes="All items received in good condition.", | |
| ) | |
| def get_supplier_master(self) -> SupplierMaster: | |
| return SupplierMaster( | |
| supplier_id="SUP-0441", | |
| supplier_name="OfficeNeed Supplies", | |
| registered_address="45 MG Road, Bengaluru 560001", | |
| gstin="29AABCO1234F1Z5", | |
| bank_account="9876543210", | |
| bank_name="HDFC Bank", | |
| ifsc_code="HDFC0001234", | |
| contact_email="sales@officeneed.com", | |
| contact_phone="+91-80-4567-8901", | |
| registered_domain="officeneed.com", | |
| pan_number="AABCO1234F", | |
| status="active", | |
| ) | |
| def get_exception_flag(self) -> ExceptionFlag: | |
| return ExceptionFlag( | |
| flag_code="PRICE_MISMATCH", | |
| flag_description=( | |
| "Invoice total ₹51,540 exceeds PO ₹50,000 by ₹1,540 (3.08%). " | |
| "Above auto-approval threshold." | |
| ), | |
| auto_hold=True, | |
| flagged_date="2024-03-06", | |
| severity="medium", | |
| ) | |
| def available_checks(self) -> List[str]: | |
| return ["tolerance_rule", "grn_match", "duplicate_detection", | |
| "bank_account_verification", "gst_verification", "po_match"] | |
| def available_rules(self) -> List[str]: | |
| return ["tolerance_2pct_auto_approve", "tolerance_exception_approval", | |
| "rejection_with_reason", "partial_approval"] | |
| def knowledge_base(self) -> List[str]: | |
| return [ | |
| "POL-001: Price variance ≤±2% may be auto-approved. Above 2% requires exception approval.", | |
| "POL-002: Exception approval requires confirmation from originating department.", | |
| "POL-003: Any approved invoice with a price change must be followed by a PO amendment request.", | |
| "POL-004: Bank account on invoice must match supplier master.", | |
| ] | |
| # --- Simulators --- | |
| def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]: | |
| """Return meaningful values for key fields, small reward for others.""" | |
| key_fields = { | |
| ("invoice", "line_items"): ("A4 Paper @₹231 (+5%), Pens @₹472 (+4.9%), Staplers @₹1900 (unchanged)", 0.10), | |
| ("invoice", "total_amount"): ("₹51,540 (subtotal) + ₹9,277.20 (GST 18%) = ₹60,817.20", 0.08), | |
| ("po", "line_items"): ("A4 Paper @₹220, Pens @₹450, Staplers @₹1900. Total: ₹50,000", 0.06), | |
| ("grn", "items_received"): ("All 3 items fully received. No pending, no rejected.", 0.05), | |
| ("invoice", "bank_account"): ("9876543210 — HDFC Bank, IFSC HDFC0001234", 0.02), | |
| ("invoice", "supplier_gstin"): ("29AABCO1234F1Z5", 0.02), | |
| } | |
| key = (document.lower(), field.lower()) | |
| value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01)) | |
| result = InspectionResult(document=document, field=field, value=value, note="") | |
| return result, reward | |
| def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]: | |
| """Cross-check a field between two documents.""" | |
| checks = { | |
| ("unit_price", "invoice", "po"): (False, "MISMATCH: A4 Paper ₹231 vs ₹220 (+5.0%), Pens ₹472 vs ₹450 (+4.9%). Staplers match.", 0.12), | |
| ("total_amount", "invoice", "po"): (False, "Invoice subtotal ₹51,540 vs PO ₹50,000. Variance: +₹1,540 (+3.08%).", 0.10), | |
| ("bank_account", "invoice", "supplier_master"): (True, "Bank account 9876543210 matches supplier master.", 0.03), | |
| ("gstin", "invoice", "supplier_master"): (True, "GSTIN 29AABCO1234F1Z5 matches supplier master.", 0.02), | |
| ("quantity", "invoice", "grn"): (True, "All quantities match: 100 reams, 20 boxes, 10 units.", 0.04), | |
| } | |
| key = (field.lower(), doc_a.lower(), doc_b.lower()) | |
| passed, detail, reward = checks.get(key, (True, f"No mismatch found for {field} between {doc_a} and {doc_b}.", 0.01)) | |
| result = CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail) | |
| return result, reward | |
| def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]: | |
| """Run a named validation check.""" | |
| checks = { | |
| "tolerance_rule": (False, "Price variance 3.08% exceeds ±2% auto-approval threshold. Manual exception approval required.", 0.14), | |
| "grn_match": (True, "All items fully received. GRN matches invoice quantities.", 0.06), | |
| "duplicate_detection": (True, "No duplicate invoice found in payment history.", 0.02), | |
| "bank_account_verification": (True, "Bank account matches supplier master record.", 0.02), | |
| "gst_verification": (True, "GSTIN matches supplier master. GST calculation correct.", 0.02), | |
| "po_match": (False, "PO match FAILED on unit prices: 2 of 3 line items have price variance.", 0.08), | |
| } | |
| passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed — no issues found.", 0.01)) | |
| result = CheckResult(check_name=check_name, passed=passed, detail=detail) | |
| return result, reward | |
| def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]: | |
| """Query the supplier — returns email explaining the price increase.""" | |
| response = ( | |
| "Dear Sir/Madam, due to a 12% increase in raw material costs effective January 2024, " | |
| "we revised prices for A4 Paper and Ballpoint Pens. This was communicated to Mr. Arjun Mehta " | |
| "in your Procurement team via email on Feb 20, 2024. He acknowledged and verbally approved " | |
| "the revised pricing. We can provide the email trail if needed. — OfficeNeed Supplies" | |
| ) | |
| result = QueryResult(target="supplier", question=question, response=response, channel=channel) | |
| return result, 0.10 | |
| def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]: | |
| """Query an internal department.""" | |
| if department.lower() == "procurement": | |
| response = ( | |
| "Hi, this is Arjun Mehta from Procurement. Yes, I received the price revision email " | |
| "from OfficeNeed on Feb 20. I verbally approved it as the increase was reasonable " | |
| "(raw material cost pass-through). I should have raised a PO amendment but it slipped. " | |
| "I'll raise the amendment today. Please go ahead and approve the invoice." | |
| ) | |
| return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.12 | |
| response = f"{department.title()} department: We don't have specific information about this invoice exception." | |
| return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03 | |
| def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]: | |
| """Apply a business rule.""" | |
| rules = { | |
| "tolerance_2pct_auto_approve": ("BLOCKED: Cannot auto-approve. Price variance 3.08% exceeds ±2% threshold.", -0.05), | |
| "tolerance_exception_approval": ("APPLIED: Exception approval pathway activated. Requires department confirmation (obtained from procurement).", 0.10), | |
| "rejection_with_reason": ("APPLIED: Rejection rule activated. Invoice will be returned to supplier.", -0.08), | |
| "partial_approval": ("NOT APPLICABLE: All items received in full. Partial approval not warranted.", -0.05), | |
| } | |
| detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found in policy database.", -0.03)) | |
| return detail, reward | |
| def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float: | |
| """Score the agent's decision based on evidence gathered.""" | |
| checks_run = {c.check_name for c in ep.checks} | |
| queries_to = {q.target for q in ep.queries} | |
| if decision == "approve": | |
| if "tolerance_rule" in checks_run and "procurement" in queries_to: | |
| return 0.25 | |
| elif "tolerance_rule" in checks_run: | |
| return 0.18 | |
| else: | |
| return 0.05 | |
| elif decision == "reject": | |
| return -0.10 | |
| elif decision == "hold": | |
| return 0.08 | |
| return 0.0 | |
| def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float: | |
| """Score routing decisions.""" | |
| routes = {"procurement": 0.12, "finance": 0.03, "legal": -0.05} | |
| return routes.get(team.lower(), 0.0) | |
| def simulate_close(self, summary: str, ep: EpisodeData) -> float: | |
| """Score case closure.""" | |
| checks_run = {c.check_name for c in ep.checks} | |
| if ep.decision == "approve" and "tolerance_rule" in checks_run and "procurement" in set(ep.routed_to): | |
| return 0.12 | |
| elif ep.decision is not None: | |
| return 0.06 | |
| return 0.0 | |
| def grade(self, ep: EpisodeData) -> Dict[str, float]: | |
| """Final grader producing sub-scores.""" | |
| checks_run = {c.check_name for c in ep.checks} | |
| queries_to = {q.target for q in ep.queries} | |
| # Diagnosis | |
| d = 0.0 | |
| if any("unit_price" in c.check_name or "total" in c.check_name for c in ep.checks): | |
| d += 0.12 | |
| if "tolerance_rule" in checks_run: | |
| d += 0.14 | |
| if "grn_match" in checks_run: | |
| d += 0.06 | |
| # Investigation | |
| i = 0.0 | |
| if "supplier" in queries_to: | |
| i += 0.10 | |
| if "procurement" in queries_to: | |
| i += 0.12 | |
| if "tolerance_exception_approval" in ep.rules_applied: | |
| i += 0.08 | |
| # Decision | |
| dec = 0.0 | |
| if ep.decision == "approve": | |
| dec += 0.18 | |
| elif ep.decision == "hold": | |
| dec += 0.06 | |
| elif ep.decision == "reject": | |
| dec -= 0.10 | |
| # Routing | |
| route = 0.12 if "procurement" in ep.routed_to else 0.0 | |
| # Closure | |
| closure = 0.08 if ep.closed else 0.0 | |
| # Efficiency | |
| eff = max(0.0, 0.06 - 0.004 * max(0, ep.step_count - 9)) | |
| total = d + i + dec + route + closure + eff | |
| return { | |
| "score": round(max(0.0, min(1.0, total)), 4), | |
| "diagnosis_score": round(d, 4), | |
| "investigation_score": round(i, 4), | |
| "decision_score": round(dec, 4), | |
| "routing_score": round(route, 4), | |
| "closure_score": round(closure, 4), | |
| "efficiency_score": round(eff, 4), | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Task 2 — Duplicate Invoice with Hidden Tax Error (Medium) | |
| # --------------------------------------------------------------------------- | |
| class DuplicateTaxErrorTask(BaseTask): | |
| """ | |
| Logistics supplier submits INV-2024-891 which is a duplicate of already-paid | |
| INV-2024-819 (digit transposition). The original invoice applied 15% GST | |
| (wrong), correct rate is 18%. Company overpaid ₹3,240. The new invoice has | |
| the correct rate. It's both a duplicate AND a legitimate correction. | |
| """ | |
| task_id = "task2_duplicate_tax" | |
| max_steps = 20 | |
| difficulty = "medium" | |
| def get_purchase_order(self) -> PurchaseOrder: | |
| return PurchaseOrder( | |
| po_number="PO-2024-0778", | |
| vendor_name="FastMove Logistics", | |
| po_date="2024-01-25", | |
| line_items=[ | |
| LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0), | |
| LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0), | |
| ], | |
| total_amount=108000.0, | |
| payment_terms="Net-15", | |
| ) | |
| def get_invoice(self) -> Invoice: | |
| return Invoice( | |
| invoice_number="INV-2024-891", | |
| supplier_name="FastMove Logistics", | |
| invoice_date="2024-03-12", | |
| due_date="2024-03-27", | |
| po_reference="PO-2024-0778", | |
| line_items=[ | |
| LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0), | |
| LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0), | |
| ], | |
| subtotal=108000.0, | |
| tax_amount=19440.0, | |
| tax_rate=18.0, | |
| total_amount=127440.0, | |
| bank_account="1122334455", | |
| bank_name="ICICI Bank", | |
| ifsc_code="ICIC0005678", | |
| supplier_gstin="27AABCF5678G1Z3", | |
| supplier_email="billing@fastmove.in", | |
| ) | |
| def get_grn(self) -> GoodsReceiptNote: | |
| return GoodsReceiptNote( | |
| grn_number="GRN-2024-0740", | |
| po_reference="PO-2024-0778", | |
| receipt_date="2024-02-28", | |
| items_received=[ | |
| {"description": "Mumbai-Pune Transport", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True}, | |
| {"description": "Warehousing charges Feb 2024", "quantity_received": 1, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True}, | |
| ], | |
| receiving_officer="Priya Sharma", | |
| notes="All transport trips completed. Warehousing service confirmed for February.", | |
| ) | |
| def get_supplier_master(self) -> SupplierMaster: | |
| return SupplierMaster( | |
| supplier_id="SUP-0229", | |
| supplier_name="FastMove Logistics", | |
| registered_address="12 Logistics Park, Navi Mumbai 400710", | |
| gstin="27AABCF5678G1Z3", | |
| bank_account="1122334455", | |
| bank_name="ICICI Bank", | |
| ifsc_code="ICIC0005678", | |
| contact_email="accounts@fastmove.in", | |
| contact_phone="+91-22-3456-7890", | |
| registered_domain="fastmove.in", | |
| pan_number="AABCF5678G", | |
| status="active", | |
| ) | |
| def get_exception_flag(self) -> ExceptionFlag: | |
| return ExceptionFlag( | |
| flag_code="POSSIBLE_DUPLICATE", | |
| flag_description="Invoice INV-2024-891 closely matches previously processed invoice INV-2024-819. Possible duplicate submission.", | |
| auto_hold=True, | |
| flagged_date="2024-03-13", | |
| severity="high", | |
| ) | |
| def available_checks(self) -> List[str]: | |
| return ["duplicate_detection", "tax_calculation_verify", "grn_match", | |
| "bank_account_verification", "gst_verification", "po_match"] | |
| def available_rules(self) -> List[str]: | |
| return ["partial_approval", "credit_note_request", "full_rejection", | |
| "duplicate_block", "tax_correction"] | |
| def knowledge_base(self) -> List[str]: | |
| return [ | |
| "POL-005: Duplicate invoices must be rejected unless they represent a legitimate correction.", | |
| "POL-006: Tax calculation errors on paid invoices require a credit note and correction entry.", | |
| "POL-007: Partial approval may be used when only a portion of the invoice amount is valid.", | |
| "POL-008: Any tax correction must be documented with the original invoice reference.", | |
| ] | |
| def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]: | |
| key_fields = { | |
| ("invoice", "invoice_number"): ("INV-2024-891 — note digit transposition vs INV-2024-819 (891 vs 819)", 0.10), | |
| ("invoice", "tax_amount"): ("₹19,440 (18% GST on ₹1,08,000) — this is the CORRECT rate", 0.08), | |
| ("invoice", "total_amount"): ("₹1,27,440 (subtotal ₹1,08,000 + 18% GST ₹19,440)", 0.05), | |
| ("invoice", "line_items"): ("Transport 20×₹4,500 = ₹90,000 + Warehousing ₹18,000 = ₹1,08,000", 0.04), | |
| } | |
| key = (document.lower(), field.lower()) | |
| value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01)) | |
| return InspectionResult(document=document, field=field, value=value, note=""), reward | |
| def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]: | |
| checks = { | |
| ("invoice_number", "invoice", "payment_history"): (False, "MATCH FOUND: INV-2024-819 paid 12 days ago for ₹1,24,200. Digit transposition: 891 vs 819.", 0.15), | |
| ("tax_amount", "invoice", "payment_history"): (False, "TAX DISCREPANCY: Original INV-2024-819 had 15% GST (₹16,200). Current INV-2024-891 has 18% GST (₹19,440). Delta: ₹3,240.", 0.14), | |
| ("total_amount", "invoice", "po"): (True, "Invoice subtotal ₹1,08,000 matches PO total ₹1,08,000.", 0.03), | |
| ("bank_account", "invoice", "supplier_master"): (True, "Bank account matches supplier master.", 0.02), | |
| } | |
| key = (field.lower(), doc_a.lower(), doc_b.lower()) | |
| passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01)) | |
| return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward | |
| def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]: | |
| checks = { | |
| "duplicate_detection": (False, "DUPLICATE FOUND: INV-2024-891 matches INV-2024-819 (paid 12 days ago, ₹1,24,200). Invoice numbers differ by digit transposition (891 vs 819).", 0.18), | |
| "tax_calculation_verify": (False, "TAX ERROR on ORIGINAL: INV-2024-819 applied 15% GST (₹16,200) instead of correct 18% (₹19,440). Company overpaid ₹3,240 in tax on already-paid invoice.", 0.16), | |
| "grn_match": (True, "Services fully confirmed. GRN matches invoice.", 0.04), | |
| "bank_account_verification": (True, "Bank account matches supplier master.", 0.02), | |
| "gst_verification": (True, "GSTIN matches supplier master.", 0.02), | |
| "po_match": (True, "PO amounts and line items match current invoice.", 0.03), | |
| } | |
| passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01)) | |
| return CheckResult(check_name=check_name, passed=passed, detail=detail), reward | |
| def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]: | |
| response = ( | |
| "We are aware that INV-2024-819 was submitted with incorrect 15% GST. The correct rate " | |
| "is 18%. INV-2024-891 is a corrected resubmission. We request partial approval for the " | |
| "₹3,240 tax differential only, not the full invoice amount. We will issue a credit note " | |
| "for the remaining amount." | |
| ) | |
| return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.10 | |
| def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]: | |
| if department.lower() == "finance": | |
| response = ( | |
| "Confirmed: INV-2024-819 was paid on March 1 for ₹1,24,200 (₹1,08,000 + 15% GST of " | |
| "₹16,200). The correct GST rate for logistics services is 18%. We overpaid — the " | |
| "correct total should have been ₹1,27,440. The tax differential is ₹3,240. This " | |
| "can be corrected via partial approval of the new invoice for ₹3,240 only." | |
| ) | |
| return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.12 | |
| response = f"{department.title()}: No specific information available." | |
| return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03 | |
| def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]: | |
| rules = { | |
| "partial_approval": ("APPLIED: Partial approval for ₹3,240 (tax correction delta). Main invoice amount blocked as duplicate.", 0.12), | |
| "credit_note_request": ("APPLIED: Credit note requested from supplier for balance amount. Reference: INV-2024-819 tax correction.", 0.10), | |
| "full_rejection": ("APPLIED: Full rejection. Invoice returned to supplier.", -0.05), | |
| "duplicate_block": ("APPLIED: Duplicate block activated. Full payment prevented.", 0.04), | |
| "tax_correction": ("APPLIED: Tax correction entry created referencing original INV-2024-819.", 0.08), | |
| } | |
| detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found.", -0.03)) | |
| return detail, reward | |
| def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float: | |
| checks_run = {c.check_name for c in ep.checks} | |
| dup_found = "duplicate_detection" in checks_run | |
| tax_found = "tax_calculation_verify" in checks_run | |
| if decision == "partial_approve": | |
| if dup_found and tax_found: | |
| return 0.28 | |
| elif dup_found: | |
| return 0.14 | |
| return 0.06 | |
| elif decision == "reject": | |
| if dup_found: | |
| return 0.08 | |
| return 0.02 | |
| elif decision == "approve": | |
| return -0.15 | |
| elif decision == "hold": | |
| return 0.06 | |
| return 0.0 | |
| def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float: | |
| routes = {"finance": 0.08, "procurement": 0.03, "legal": 0.02} | |
| return routes.get(team.lower(), 0.0) | |
| def simulate_close(self, summary: str, ep: EpisodeData) -> float: | |
| if ep.decision == "partial_approve" and ep.closed is False: | |
| return 0.06 | |
| elif ep.decision is not None: | |
| return 0.03 | |
| return 0.0 | |
| def grade(self, ep: EpisodeData) -> Dict[str, float]: | |
| checks_run = {c.check_name for c in ep.checks} | |
| queries_to = {q.target for q in ep.queries} | |
| # Diagnosis (max 0.30) | |
| d = 0.0 | |
| if "duplicate_detection" in checks_run: | |
| d += 0.16 | |
| if "tax_calculation_verify" in checks_run: | |
| d += 0.14 | |
| # Investigation (max 0.32) | |
| i = 0.0 | |
| if "finance" in queries_to: | |
| i += 0.12 | |
| if "supplier" in queries_to: | |
| i += 0.10 | |
| if "partial_approval" in ep.rules_applied: | |
| i += 0.06 | |
| if "credit_note_request" in ep.rules_applied: | |
| i += 0.04 | |
| # Decision (max 0.20) | |
| dec = 0.0 | |
| if ep.decision == "partial_approve": | |
| dec = 0.20 | |
| elif ep.decision == "reject": | |
| dec = 0.05 | |
| elif ep.decision == "approve": | |
| dec = -0.15 | |
| elif ep.decision == "hold": | |
| dec = 0.04 | |
| # Routing (max 0.08) | |
| route = 0.08 if "finance" in ep.routed_to else 0.0 | |
| # Closure (max 0.06) | |
| closure = 0.06 if ep.closed else 0.0 | |
| # Efficiency | |
| eff = max(0.0, 0.04 - 0.003 * max(0, ep.step_count - 10)) | |
| total = d + i + dec + route + closure + eff | |
| return { | |
| "score": round(max(0.0, min(1.0, total)), 4), | |
| "diagnosis_score": round(d, 4), | |
| "investigation_score": round(i, 4), | |
| "decision_score": round(dec, 4), | |
| "routing_score": round(route, 4), | |
| "closure_score": round(closure, 4), | |
| "efficiency_score": round(eff, 4), | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Task 3 — Compound Fraud Signals (Hard) | |
| # --------------------------------------------------------------------------- | |
| class CompoundFraudTask(BaseTask): | |
| """ | |
| IT supplier submits ₹8,47,500 invoice for 15 laptops. System flags a bank | |
| account change. But there are FOUR simultaneous fraud signals: bank BEC, | |
| GSTIN mismatch, quantity mismatch (13 vs 15), and price inflation (8.65%). | |
| Critical trap: querying supplier via email contacts the fraudster (-0.15). | |
| Must use phone to reach real supplier (+0.15). | |
| """ | |
| task_id = "task3_compound_fraud" | |
| max_steps = 25 | |
| difficulty = "hard" | |
| def get_purchase_order(self) -> PurchaseOrder: | |
| return PurchaseOrder( | |
| po_number="PO-2024-0955", | |
| vendor_name="TechCore Solutions", | |
| po_date="2024-03-08", | |
| line_items=[ | |
| LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=52000.0, total=780000.0, tax_rate=18.0), | |
| ], | |
| total_amount=780000.0, | |
| payment_terms="Net-30", | |
| ) | |
| def get_invoice(self) -> Invoice: | |
| return Invoice( | |
| invoice_number="INV-TC-2024-0312", | |
| supplier_name="TechCore Solutions", | |
| invoice_date="2024-03-10", | |
| due_date="2024-04-09", | |
| po_reference="PO-2024-0955", | |
| line_items=[ | |
| LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=56500.0, total=847500.0, tax_rate=18.0), | |
| ], | |
| subtotal=847500.0, | |
| tax_amount=152550.0, | |
| tax_rate=18.0, | |
| total_amount=1000050.0, | |
| bank_account="5566778899", | |
| bank_name="Yes Bank", | |
| ifsc_code="YESB0000999", | |
| supplier_gstin="07AABCT9999X1Z8", | |
| supplier_email="accounts@techcore-solutions.com", | |
| ) | |
| def get_grn(self) -> GoodsReceiptNote: | |
| return GoodsReceiptNote( | |
| grn_number="GRN-2024-0901", | |
| po_reference="PO-2024-0955", | |
| receipt_date="2024-03-15", | |
| items_received=[ | |
| {"description": "Business Laptop (14-inch, i7, 16GB)", "quantity_received": 13, "quantity_pending": 2, "quantity_rejected": 0}, | |
| ], | |
| receiving_officer="Vikram Singh", | |
| notes="13 of 15 laptops received. 2 units still in transit.", | |
| ) | |
| def get_supplier_master(self) -> SupplierMaster: | |
| return SupplierMaster( | |
| supplier_id="SUP-0187", | |
| supplier_name="TechCore Solutions", | |
| registered_address="88 Tech Park, Sector 62, Noida 201301", | |
| gstin="07AABCT1234Y1Z5", | |
| bank_account="1234567890", | |
| bank_name="State Bank of India", | |
| ifsc_code="SBIN0001234", | |
| contact_email="sales@techcore-solutions.in", | |
| contact_phone="+91-120-456-7890", | |
| registered_domain="techcore-solutions.in", | |
| pan_number="AABCT1234Y", | |
| status="active", | |
| ) | |
| def get_exception_flag(self) -> ExceptionFlag: | |
| return ExceptionFlag( | |
| flag_code="BANK_ACCOUNT_CHANGE", | |
| flag_description=( | |
| "Invoice bank account (5566778899, Yes Bank) does not match supplier master " | |
| "(1234567890, SBI). Bank account change request received from " | |
| "accounts@techcore-solutions.com." | |
| ), | |
| auto_hold=True, | |
| flagged_date="2024-03-16", | |
| severity="critical", | |
| ) | |
| def available_checks(self) -> List[str]: | |
| return ["bank_account_verification", "gst_verification", "grn_match", | |
| "email_domain_verification", "invoice_date_validation", | |
| "quantity_check", "price_check", "duplicate_detection", "po_match"] | |
| def available_rules(self) -> List[str]: | |
| return ["fraud_hold", "rejection_with_reason", "bank_change_verification", | |
| "escalate_to_security"] | |
| def knowledge_base(self) -> List[str]: | |
| return [ | |
| "POL-004: Bank account on invoice must match supplier master.", | |
| "POL-009: Bank account change must be verified via registered phone number — NEVER via email.", | |
| "POL-010: GSTIN on invoice must match supplier master. Mismatch is a fraud indicator.", | |
| "POL-011: Invoice quantities must not exceed GRN quantities.", | |
| "POL-012: Any suspected fraud must be escalated to Legal and Security teams.", | |
| "POL-013: Do not process payment while fraud investigation is pending.", | |
| ] | |
| def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]: | |
| key_fields = { | |
| ("invoice", "bank_account"): ("5566778899 (Yes Bank) — DOES NOT MATCH supplier master (1234567890, SBI)", 0.12), | |
| ("invoice", "supplier_gstin"): ("07AABCT9999X1Z8 — DOES NOT MATCH supplier master (07AABCT1234Y1Z5)", 0.10), | |
| ("invoice", "supplier_email"): ("accounts@techcore-solutions.com — domain is .com, registered domain is .in", 0.08), | |
| ("grn", "items_received"): ("13 of 15 laptops received. 2 pending delivery.", 0.08), | |
| ("invoice", "line_items"): ("15 laptops @ ₹56,500 = ₹8,47,500. PO price was ₹52,000/unit.", 0.06), | |
| ("invoice", "invoice_date"): ("2024-03-10 (Sunday) — unusual for B2B invoicing", 0.04), | |
| ("invoice", "total_amount"): ("₹10,00,050 (₹8,47,500 + 18% GST ₹1,52,550)", 0.03), | |
| } | |
| key = (document.lower(), field.lower()) | |
| value, reward = key_fields.get(key, (f"{document}.{field} — value noted", 0.01)) | |
| return InspectionResult(document=document, field=field, value=value, note=""), reward | |
| def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]: | |
| checks = { | |
| ("bank_account", "invoice", "supplier_master"): (False, "MISMATCH: Invoice has 5566778899 (Yes Bank). Supplier master has 1234567890 (SBI). Change request from lookalike domain.", 0.14), | |
| ("gstin", "invoice", "supplier_master"): (False, "MISMATCH: Invoice GSTIN 07AABCT9999X1Z8 belongs to 'TechCore Trading Pvt Ltd' (different entity). Supplier master: 07AABCT1234Y1Z5.", 0.14), | |
| ("quantity", "invoice", "grn"): (False, "MISMATCH: Invoice claims 15 units. GRN shows only 13 received, 2 pending.", 0.10), | |
| ("unit_price", "invoice", "po"): (False, "MISMATCH: Invoice ₹56,500/unit vs PO ₹52,000/unit. Variance: +8.65%. No approved revision.", 0.08), | |
| } | |
| key = (field.lower(), doc_a.lower(), doc_b.lower()) | |
| passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01)) | |
| return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward | |
| def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]: | |
| checks = { | |
| "bank_account_verification": (False, "FAILED: Bank account mismatch. Change request from techcore-solutions.com (lookalike of registered .in domain). Suspected BEC attack.", 0.18), | |
| "gst_verification": (False, "FAILED: GSTIN 07AABCT9999X1Z8 registered to 'TechCore Trading Pvt Ltd' in Delhi — a DIFFERENT entity. Supplier master shows 07AABCT1234Y1Z5 for 'TechCore Solutions'.", 0.18), | |
| "grn_match": (False, "FAILED: Invoice claims 15 laptops. GRN confirms only 13 received. 2 units still in transit.", 0.14), | |
| "email_domain_verification": (False, "FAILED: Invoice email domain techcore-solutions.com does not match registered domain techcore-solutions.in. Lookalike domain — possible BEC.", 0.16), | |
| "invoice_date_validation": (False, "WARNING: Invoice dated 2024-03-10 is a Sunday. Unusual for B2B invoicing.", 0.08), | |
| "quantity_check": (False, "FAILED: Invoiced quantity (15) exceeds received quantity (13). 2 units undelivered.", 0.12), | |
| "price_check": (False, "FAILED: Unit price ₹56,500 is 8.65% above PO price ₹52,000. No price revision approved.", 0.10), | |
| "duplicate_detection": (True, "No duplicate invoice found.", 0.02), | |
| "po_match": (False, "FAILED: Multiple mismatches — GSTIN, quantity, and unit price all differ from PO/supplier master.", 0.08), | |
| } | |
| passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01)) | |
| return CheckResult(check_name=check_name, passed=passed, detail=detail), reward | |
| def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]: | |
| """Critical trap: email contacts the fraudster, phone reaches real supplier.""" | |
| if channel.lower() in ("email", "mail"): | |
| response = ( | |
| "Dear Customer, thank you for reaching out. We recently changed our banking details " | |
| "due to a corporate restructuring. The new account (5566778899, Yes Bank) is correct. " | |
| "Please process the payment at your earliest convenience. We can provide updated " | |
| "KYC documents if needed. — TechCore Solutions Finance Team" | |
| ) | |
| return QueryResult(target="supplier", question=question, response=response, channel=channel), -0.15 | |
| else: | |
| response = ( | |
| "This is Rajesh from TechCore Solutions. We have NOT sent any bank account change " | |
| "request. Our bank account is still 1234567890 with SBI. We also have not sent any " | |
| "invoice from techcore-solutions.com — our domain is techcore-solutions.in. This " | |
| "looks like a fraud attempt. Please do NOT process payment to the new account. " | |
| "We will file a complaint with our legal team." | |
| ) | |
| return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.15 | |
| def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]: | |
| dept = department.lower() | |
| if dept == "security": | |
| response = ( | |
| "Security Team: We will initiate a BEC investigation. The lookalike domain " | |
| "techcore-solutions.com was registered 3 days before the invoice date. This is " | |
| "a classic Business Email Compromise pattern. Do NOT process any payment. " | |
| "We are preserving email headers for forensic analysis." | |
| ) | |
| return QueryResult(target="security", question=question, response=response, channel="internal"), 0.10 | |
| elif dept == "legal": | |
| response = ( | |
| "Legal Team: Based on the fraud indicators you've documented, we recommend: " | |
| "1) Immediate payment block, 2) Formal complaint to cybercrime authorities, " | |
| "3) Supplier audit of TechCore Solutions, 4) Review of all recent invoices " | |
| "from this supplier." | |
| ) | |
| return QueryResult(target="legal", question=question, response=response, channel="internal"), 0.08 | |
| elif dept == "finance": | |
| response = "Finance: Payment has been blocked pending investigation. No funds released." | |
| return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.04 | |
| elif dept == "procurement": | |
| response = "Procurement: PO-2024-0955 was raised on March 8. Standard 2-day processing for IT equipment." | |
| return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.03 | |
| response = f"{department.title()}: No specific information available." | |
| return QueryResult(target=dept, question=question, response=response, channel="internal"), 0.02 | |
| def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]: | |
| rules = { | |
| "fraud_hold": ("APPLIED: Fraud hold activated. All payments to this supplier frozen pending investigation.", 0.10), | |
| "rejection_with_reason": ("APPLIED: Invoice rejected with documented fraud signals.", 0.06), | |
| "bank_change_verification": ("APPLIED: Bank change verification protocol activated. Phone verification required per POL-009.", 0.08), | |
| "escalate_to_security": ("APPLIED: Case escalated to Information Security team for BEC investigation.", 0.08), | |
| } | |
| detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not applicable.", -0.03)) | |
| return detail, reward | |
| def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float: | |
| checks_run = {c.check_name for c in ep.checks} | |
| signals = sum([ | |
| "bank_account_verification" in checks_run, | |
| "gst_verification" in checks_run, | |
| "grn_match" in checks_run, | |
| "email_domain_verification" in checks_run, | |
| ]) | |
| if decision == "reject": | |
| return 0.10 + 0.05 * signals | |
| elif decision == "approve": | |
| return -0.40 | |
| elif decision == "partial_approve": | |
| return -0.20 | |
| elif decision == "hold": | |
| return 0.08 + 0.03 * signals | |
| return 0.0 | |
| def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float: | |
| routes = {"legal": 0.14, "security": 0.12, "finance": 0.08, "procurement": 0.06} | |
| return routes.get(team.lower(), 0.0) | |
| def simulate_close(self, summary: str, ep: EpisodeData) -> float: | |
| if ep.closed is False and ep.decision == "reject": | |
| return 0.06 | |
| return 0.0 | |
| def grade(self, ep: EpisodeData) -> Dict[str, float]: | |
| checks_run = {c.check_name for c in ep.checks} | |
| bank_found = "bank_account_verification" in checks_run | |
| gst_found = "gst_verification" in checks_run | |
| qty_found = "grn_match" in checks_run | |
| domain_found = "email_domain_verification" in checks_run | |
| price_found = "price_check" in checks_run | |
| # Diagnosis | |
| d = ((0.12 if bank_found else 0) + (0.12 if gst_found else 0) | |
| + (0.10 if qty_found else 0) + (0.10 if domain_found else 0) | |
| + (0.06 if price_found else 0)) | |
| # Investigation — reward phone, penalise email | |
| i = 0.0 | |
| for q in ep.queries: | |
| if q.target == "supplier" and q.channel not in ("email", "mail"): | |
| i += 0.10 | |
| elif q.target == "supplier" and q.channel in ("email", "mail"): | |
| i -= 0.15 | |
| if "legal" in {q.target for q in ep.queries}: | |
| i += 0.06 | |
| if "security" in {q.target for q in ep.queries}: | |
| i += 0.06 | |
| # Decision | |
| signals = sum([bank_found, gst_found, qty_found, domain_found]) | |
| dec = 0.0 | |
| if ep.decision == "reject": | |
| dec = 0.08 + 0.03 * signals | |
| elif ep.decision == "approve": | |
| dec = -0.35 | |
| elif ep.decision == "partial_approve": | |
| dec = -0.15 | |
| elif ep.decision == "hold": | |
| dec = 0.06 | |
| # Routing | |
| routes = set(ep.routed_to) | |
| route = ((0.10 if "legal" in routes else 0) | |
| + (0.06 if "security" in routes else 0) | |
| + (0.04 if "finance" in routes else 0)) | |
| # Closure | |
| closure = 0.06 if (ep.closed and ep.decision == "reject") else 0.0 | |
| # Efficiency | |
| eff = max(0.0, 0.04 - 0.002 * max(0, ep.step_count - 12)) | |
| total = d + i + dec + route + closure + eff | |
| return { | |
| "score": round(max(0.0, min(1.0, total)), 4), | |
| "signals_found": sum([bank_found, gst_found, qty_found, domain_found, price_found]), | |
| "diagnosis_score": round(d, 4), | |
| "investigation_score": round(i, 4), | |
| "decision_score": round(dec, 4), | |
| "routing_score": round(route, 4), | |
| "closure_score": round(closure, 4), | |
| "efficiency_score": round(eff, 4), | |
| } | |
| # --------------------------------------------------------------------------- | |
| # Task Registry | |
| # --------------------------------------------------------------------------- | |
| TASK_REGISTRY: Dict[str, type] = { | |
| "task1_price_variance": PriceVarianceTask, | |
| "task2_duplicate_tax": DuplicateTaxErrorTask, | |
| "task3_compound_fraud": CompoundFraudTask, | |
| } | |
| ALL_TASKS = list(TASK_REGISTRY.keys()) | |
| def make_task(task_id: str) -> BaseTask: | |
| """Instantiate a task by its ID. Raises ValueError for unknown IDs.""" | |
| cls = TASK_REGISTRY.get(task_id) | |
| if cls is None: | |
| raise ValueError(f"Unknown task '{task_id}'. Available: {ALL_TASKS}") | |
| return cls() | |