YUS200619's picture
feat: complete invoice exception handler v1.0.0
562f58d
"""
Task definitions for the Invoice Exception Handler environment.
Each task defines a scenario with documents, simulator logic for every action
type, and a grader that produces sub-scores in [0.0, 1.0]. This is the biggest
file in the project — it contains all the business logic the environment needs.
"""
from __future__ import annotations
import time
from typing import Any, Dict, List, Optional, Tuple
from .models import (
ActionType, CheckResult, ExceptionFlag, GoodsReceiptNote,
InspectionResult, Invoice, LineItem, PurchaseOrder, QueryResult,
SupplierMaster,
)
# ---------------------------------------------------------------------------
# EpisodeData — mutable state for one episode
# ---------------------------------------------------------------------------
class EpisodeData:
"""Tracks the full history of one episode for grading and state building."""
def __init__(self) -> None:
self.inspections: List[InspectionResult] = []
self.checks: List[CheckResult] = []
self.queries: List[QueryResult] = []
self.rules_applied: List[str] = []
self.decision: Optional[str] = None
self.decision_reason: Optional[str] = None
self.routed_to: List[str] = []
self.closed: bool = False
self.close_summary: Optional[str] = None
self.step_count: int = 0
self.cumulative_reward: float = 0.0
def has_inspected(self, doc: str, field: str) -> bool:
"""Check if we already looked at this field in this document."""
return any(i.document == doc and i.field == field for i in self.inspections)
def has_checked(self, name: str) -> bool:
"""Check if this validation check has already been run."""
return any(c.check_name == name for c in self.checks)
def has_queried(self, target: str) -> bool:
"""Check if we already queried this person or department."""
return any(q.target == target for q in self.queries)
# ---------------------------------------------------------------------------
# BaseTask — abstract interface
# ---------------------------------------------------------------------------
class BaseTask:
"""Abstract base that all task classes inherit from."""
task_id: str = "base"
max_steps: int = 20
difficulty: str = "easy"
def get_purchase_order(self) -> PurchaseOrder:
raise NotImplementedError
def get_invoice(self) -> Invoice:
raise NotImplementedError
def get_grn(self) -> GoodsReceiptNote:
raise NotImplementedError
def get_supplier_master(self) -> SupplierMaster:
raise NotImplementedError
def get_exception_flag(self) -> ExceptionFlag:
raise NotImplementedError
def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
raise NotImplementedError
def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
raise NotImplementedError
def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
raise NotImplementedError
def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
raise NotImplementedError
def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
raise NotImplementedError
def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
raise NotImplementedError
def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
raise NotImplementedError
def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
raise NotImplementedError
def simulate_close(self, summary: str, ep: EpisodeData) -> float:
raise NotImplementedError
def grade(self, ep: EpisodeData) -> Dict[str, float]:
raise NotImplementedError
@property
def available_checks(self) -> List[str]:
return []
@property
def available_rules(self) -> List[str]:
return []
@property
def knowledge_base(self) -> List[str]:
return []
# ---------------------------------------------------------------------------
# Task 1 — Price Variance Exception (Easy)
# ---------------------------------------------------------------------------
class PriceVarianceTask(BaseTask):
"""
Office stationery invoice arrives 3.08% above the PO.
Company tolerance is +/-2% auto-approval. Supplier had verbal approval
from procurement for the price increase but the PO was never updated.
Optimal path: check tolerance -> cross-check prices -> verify GRN ->
query supplier -> query procurement -> apply exception rule -> approve ->
route to procurement for PO amendment -> close.
"""
task_id = "task1_price_variance"
max_steps = 18
difficulty = "easy"
def get_purchase_order(self) -> PurchaseOrder:
return PurchaseOrder(
po_number="PO-2024-1041",
vendor_name="OfficeNeed Supplies",
po_date="2024-02-15",
line_items=[
LineItem(description="A4 Paper", quantity=100, unit_price=220.0, total=22000.0, tax_rate=18.0),
LineItem(description="Ballpoint Pens", quantity=20, unit_price=450.0, total=9000.0, tax_rate=18.0),
LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0),
],
total_amount=50000.0,
payment_terms="Net-30",
)
def get_invoice(self) -> Invoice:
return Invoice(
invoice_number="INV-ON-8821",
supplier_name="OfficeNeed Supplies",
invoice_date="2024-03-05",
due_date="2024-04-04",
po_reference="PO-2024-1041",
line_items=[
LineItem(description="A4 Paper", quantity=100, unit_price=231.0, total=23100.0, tax_rate=18.0),
LineItem(description="Ballpoint Pens", quantity=20, unit_price=472.0, total=9440.0, tax_rate=18.0),
LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0),
],
subtotal=51540.0,
tax_amount=9277.20,
tax_rate=18.0,
total_amount=60817.20,
bank_account="9876543210",
bank_name="HDFC Bank",
ifsc_code="HDFC0001234",
supplier_gstin="29AABCO1234F1Z5",
supplier_email="accounts@officeneed.com",
)
def get_grn(self) -> GoodsReceiptNote:
return GoodsReceiptNote(
grn_number="GRN-2024-0892",
po_reference="PO-2024-1041",
receipt_date="2024-03-01",
items_received=[
{"description": "A4 Paper", "quantity_received": 100, "quantity_pending": 0, "quantity_rejected": 0},
{"description": "Ballpoint Pens", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0},
{"description": "Staplers", "quantity_received": 10, "quantity_pending": 0, "quantity_rejected": 0},
],
receiving_officer="Ramesh Kumar",
notes="All items received in good condition.",
)
def get_supplier_master(self) -> SupplierMaster:
return SupplierMaster(
supplier_id="SUP-0441",
supplier_name="OfficeNeed Supplies",
registered_address="45 MG Road, Bengaluru 560001",
gstin="29AABCO1234F1Z5",
bank_account="9876543210",
bank_name="HDFC Bank",
ifsc_code="HDFC0001234",
contact_email="sales@officeneed.com",
contact_phone="+91-80-4567-8901",
registered_domain="officeneed.com",
pan_number="AABCO1234F",
status="active",
)
def get_exception_flag(self) -> ExceptionFlag:
return ExceptionFlag(
flag_code="PRICE_MISMATCH",
flag_description=(
"Invoice total ₹51,540 exceeds PO ₹50,000 by ₹1,540 (3.08%). "
"Above auto-approval threshold."
),
auto_hold=True,
flagged_date="2024-03-06",
severity="medium",
)
@property
def available_checks(self) -> List[str]:
return ["tolerance_rule", "grn_match", "duplicate_detection",
"bank_account_verification", "gst_verification", "po_match"]
@property
def available_rules(self) -> List[str]:
return ["tolerance_2pct_auto_approve", "tolerance_exception_approval",
"rejection_with_reason", "partial_approval"]
@property
def knowledge_base(self) -> List[str]:
return [
"POL-001: Price variance ≤±2% may be auto-approved. Above 2% requires exception approval.",
"POL-002: Exception approval requires confirmation from originating department.",
"POL-003: Any approved invoice with a price change must be followed by a PO amendment request.",
"POL-004: Bank account on invoice must match supplier master.",
]
# --- Simulators ---
def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
"""Return meaningful values for key fields, small reward for others."""
key_fields = {
("invoice", "line_items"): ("A4 Paper @₹231 (+5%), Pens @₹472 (+4.9%), Staplers @₹1900 (unchanged)", 0.10),
("invoice", "total_amount"): ("₹51,540 (subtotal) + ₹9,277.20 (GST 18%) = ₹60,817.20", 0.08),
("po", "line_items"): ("A4 Paper @₹220, Pens @₹450, Staplers @₹1900. Total: ₹50,000", 0.06),
("grn", "items_received"): ("All 3 items fully received. No pending, no rejected.", 0.05),
("invoice", "bank_account"): ("9876543210 — HDFC Bank, IFSC HDFC0001234", 0.02),
("invoice", "supplier_gstin"): ("29AABCO1234F1Z5", 0.02),
}
key = (document.lower(), field.lower())
value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01))
result = InspectionResult(document=document, field=field, value=value, note="")
return result, reward
def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
"""Cross-check a field between two documents."""
checks = {
("unit_price", "invoice", "po"): (False, "MISMATCH: A4 Paper ₹231 vs ₹220 (+5.0%), Pens ₹472 vs ₹450 (+4.9%). Staplers match.", 0.12),
("total_amount", "invoice", "po"): (False, "Invoice subtotal ₹51,540 vs PO ₹50,000. Variance: +₹1,540 (+3.08%).", 0.10),
("bank_account", "invoice", "supplier_master"): (True, "Bank account 9876543210 matches supplier master.", 0.03),
("gstin", "invoice", "supplier_master"): (True, "GSTIN 29AABCO1234F1Z5 matches supplier master.", 0.02),
("quantity", "invoice", "grn"): (True, "All quantities match: 100 reams, 20 boxes, 10 units.", 0.04),
}
key = (field.lower(), doc_a.lower(), doc_b.lower())
passed, detail, reward = checks.get(key, (True, f"No mismatch found for {field} between {doc_a} and {doc_b}.", 0.01))
result = CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail)
return result, reward
def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
"""Run a named validation check."""
checks = {
"tolerance_rule": (False, "Price variance 3.08% exceeds ±2% auto-approval threshold. Manual exception approval required.", 0.14),
"grn_match": (True, "All items fully received. GRN matches invoice quantities.", 0.06),
"duplicate_detection": (True, "No duplicate invoice found in payment history.", 0.02),
"bank_account_verification": (True, "Bank account matches supplier master record.", 0.02),
"gst_verification": (True, "GSTIN matches supplier master. GST calculation correct.", 0.02),
"po_match": (False, "PO match FAILED on unit prices: 2 of 3 line items have price variance.", 0.08),
}
passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed — no issues found.", 0.01))
result = CheckResult(check_name=check_name, passed=passed, detail=detail)
return result, reward
def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
"""Query the supplier — returns email explaining the price increase."""
response = (
"Dear Sir/Madam, due to a 12% increase in raw material costs effective January 2024, "
"we revised prices for A4 Paper and Ballpoint Pens. This was communicated to Mr. Arjun Mehta "
"in your Procurement team via email on Feb 20, 2024. He acknowledged and verbally approved "
"the revised pricing. We can provide the email trail if needed. — OfficeNeed Supplies"
)
result = QueryResult(target="supplier", question=question, response=response, channel=channel)
return result, 0.10
def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
"""Query an internal department."""
if department.lower() == "procurement":
response = (
"Hi, this is Arjun Mehta from Procurement. Yes, I received the price revision email "
"from OfficeNeed on Feb 20. I verbally approved it as the increase was reasonable "
"(raw material cost pass-through). I should have raised a PO amendment but it slipped. "
"I'll raise the amendment today. Please go ahead and approve the invoice."
)
return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.12
response = f"{department.title()} department: We don't have specific information about this invoice exception."
return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03
def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
"""Apply a business rule."""
rules = {
"tolerance_2pct_auto_approve": ("BLOCKED: Cannot auto-approve. Price variance 3.08% exceeds ±2% threshold.", -0.05),
"tolerance_exception_approval": ("APPLIED: Exception approval pathway activated. Requires department confirmation (obtained from procurement).", 0.10),
"rejection_with_reason": ("APPLIED: Rejection rule activated. Invoice will be returned to supplier.", -0.08),
"partial_approval": ("NOT APPLICABLE: All items received in full. Partial approval not warranted.", -0.05),
}
detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found in policy database.", -0.03))
return detail, reward
def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
"""Score the agent's decision based on evidence gathered."""
checks_run = {c.check_name for c in ep.checks}
queries_to = {q.target for q in ep.queries}
if decision == "approve":
if "tolerance_rule" in checks_run and "procurement" in queries_to:
return 0.25
elif "tolerance_rule" in checks_run:
return 0.18
else:
return 0.05
elif decision == "reject":
return -0.10
elif decision == "hold":
return 0.08
return 0.0
def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
"""Score routing decisions."""
routes = {"procurement": 0.12, "finance": 0.03, "legal": -0.05}
return routes.get(team.lower(), 0.0)
def simulate_close(self, summary: str, ep: EpisodeData) -> float:
"""Score case closure."""
checks_run = {c.check_name for c in ep.checks}
if ep.decision == "approve" and "tolerance_rule" in checks_run and "procurement" in set(ep.routed_to):
return 0.12
elif ep.decision is not None:
return 0.06
return 0.0
def grade(self, ep: EpisodeData) -> Dict[str, float]:
"""Final grader producing sub-scores."""
checks_run = {c.check_name for c in ep.checks}
queries_to = {q.target for q in ep.queries}
# Diagnosis
d = 0.0
if any("unit_price" in c.check_name or "total" in c.check_name for c in ep.checks):
d += 0.12
if "tolerance_rule" in checks_run:
d += 0.14
if "grn_match" in checks_run:
d += 0.06
# Investigation
i = 0.0
if "supplier" in queries_to:
i += 0.10
if "procurement" in queries_to:
i += 0.12
if "tolerance_exception_approval" in ep.rules_applied:
i += 0.08
# Decision
dec = 0.0
if ep.decision == "approve":
dec += 0.18
elif ep.decision == "hold":
dec += 0.06
elif ep.decision == "reject":
dec -= 0.10
# Routing
route = 0.12 if "procurement" in ep.routed_to else 0.0
# Closure
closure = 0.08 if ep.closed else 0.0
# Efficiency
eff = max(0.0, 0.06 - 0.004 * max(0, ep.step_count - 9))
total = d + i + dec + route + closure + eff
return {
"score": round(max(0.0, min(1.0, total)), 4),
"diagnosis_score": round(d, 4),
"investigation_score": round(i, 4),
"decision_score": round(dec, 4),
"routing_score": round(route, 4),
"closure_score": round(closure, 4),
"efficiency_score": round(eff, 4),
}
# ---------------------------------------------------------------------------
# Task 2 — Duplicate Invoice with Hidden Tax Error (Medium)
# ---------------------------------------------------------------------------
class DuplicateTaxErrorTask(BaseTask):
"""
Logistics supplier submits INV-2024-891 which is a duplicate of already-paid
INV-2024-819 (digit transposition). The original invoice applied 15% GST
(wrong), correct rate is 18%. Company overpaid ₹3,240. The new invoice has
the correct rate. It's both a duplicate AND a legitimate correction.
"""
task_id = "task2_duplicate_tax"
max_steps = 20
difficulty = "medium"
def get_purchase_order(self) -> PurchaseOrder:
return PurchaseOrder(
po_number="PO-2024-0778",
vendor_name="FastMove Logistics",
po_date="2024-01-25",
line_items=[
LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0),
LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0),
],
total_amount=108000.0,
payment_terms="Net-15",
)
def get_invoice(self) -> Invoice:
return Invoice(
invoice_number="INV-2024-891",
supplier_name="FastMove Logistics",
invoice_date="2024-03-12",
due_date="2024-03-27",
po_reference="PO-2024-0778",
line_items=[
LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0),
LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0),
],
subtotal=108000.0,
tax_amount=19440.0,
tax_rate=18.0,
total_amount=127440.0,
bank_account="1122334455",
bank_name="ICICI Bank",
ifsc_code="ICIC0005678",
supplier_gstin="27AABCF5678G1Z3",
supplier_email="billing@fastmove.in",
)
def get_grn(self) -> GoodsReceiptNote:
return GoodsReceiptNote(
grn_number="GRN-2024-0740",
po_reference="PO-2024-0778",
receipt_date="2024-02-28",
items_received=[
{"description": "Mumbai-Pune Transport", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True},
{"description": "Warehousing charges Feb 2024", "quantity_received": 1, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True},
],
receiving_officer="Priya Sharma",
notes="All transport trips completed. Warehousing service confirmed for February.",
)
def get_supplier_master(self) -> SupplierMaster:
return SupplierMaster(
supplier_id="SUP-0229",
supplier_name="FastMove Logistics",
registered_address="12 Logistics Park, Navi Mumbai 400710",
gstin="27AABCF5678G1Z3",
bank_account="1122334455",
bank_name="ICICI Bank",
ifsc_code="ICIC0005678",
contact_email="accounts@fastmove.in",
contact_phone="+91-22-3456-7890",
registered_domain="fastmove.in",
pan_number="AABCF5678G",
status="active",
)
def get_exception_flag(self) -> ExceptionFlag:
return ExceptionFlag(
flag_code="POSSIBLE_DUPLICATE",
flag_description="Invoice INV-2024-891 closely matches previously processed invoice INV-2024-819. Possible duplicate submission.",
auto_hold=True,
flagged_date="2024-03-13",
severity="high",
)
@property
def available_checks(self) -> List[str]:
return ["duplicate_detection", "tax_calculation_verify", "grn_match",
"bank_account_verification", "gst_verification", "po_match"]
@property
def available_rules(self) -> List[str]:
return ["partial_approval", "credit_note_request", "full_rejection",
"duplicate_block", "tax_correction"]
@property
def knowledge_base(self) -> List[str]:
return [
"POL-005: Duplicate invoices must be rejected unless they represent a legitimate correction.",
"POL-006: Tax calculation errors on paid invoices require a credit note and correction entry.",
"POL-007: Partial approval may be used when only a portion of the invoice amount is valid.",
"POL-008: Any tax correction must be documented with the original invoice reference.",
]
def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
key_fields = {
("invoice", "invoice_number"): ("INV-2024-891 — note digit transposition vs INV-2024-819 (891 vs 819)", 0.10),
("invoice", "tax_amount"): ("₹19,440 (18% GST on ₹1,08,000) — this is the CORRECT rate", 0.08),
("invoice", "total_amount"): ("₹1,27,440 (subtotal ₹1,08,000 + 18% GST ₹19,440)", 0.05),
("invoice", "line_items"): ("Transport 20×₹4,500 = ₹90,000 + Warehousing ₹18,000 = ₹1,08,000", 0.04),
}
key = (document.lower(), field.lower())
value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01))
return InspectionResult(document=document, field=field, value=value, note=""), reward
def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
checks = {
("invoice_number", "invoice", "payment_history"): (False, "MATCH FOUND: INV-2024-819 paid 12 days ago for ₹1,24,200. Digit transposition: 891 vs 819.", 0.15),
("tax_amount", "invoice", "payment_history"): (False, "TAX DISCREPANCY: Original INV-2024-819 had 15% GST (₹16,200). Current INV-2024-891 has 18% GST (₹19,440). Delta: ₹3,240.", 0.14),
("total_amount", "invoice", "po"): (True, "Invoice subtotal ₹1,08,000 matches PO total ₹1,08,000.", 0.03),
("bank_account", "invoice", "supplier_master"): (True, "Bank account matches supplier master.", 0.02),
}
key = (field.lower(), doc_a.lower(), doc_b.lower())
passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01))
return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward
def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
checks = {
"duplicate_detection": (False, "DUPLICATE FOUND: INV-2024-891 matches INV-2024-819 (paid 12 days ago, ₹1,24,200). Invoice numbers differ by digit transposition (891 vs 819).", 0.18),
"tax_calculation_verify": (False, "TAX ERROR on ORIGINAL: INV-2024-819 applied 15% GST (₹16,200) instead of correct 18% (₹19,440). Company overpaid ₹3,240 in tax on already-paid invoice.", 0.16),
"grn_match": (True, "Services fully confirmed. GRN matches invoice.", 0.04),
"bank_account_verification": (True, "Bank account matches supplier master.", 0.02),
"gst_verification": (True, "GSTIN matches supplier master.", 0.02),
"po_match": (True, "PO amounts and line items match current invoice.", 0.03),
}
passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01))
return CheckResult(check_name=check_name, passed=passed, detail=detail), reward
def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
response = (
"We are aware that INV-2024-819 was submitted with incorrect 15% GST. The correct rate "
"is 18%. INV-2024-891 is a corrected resubmission. We request partial approval for the "
"₹3,240 tax differential only, not the full invoice amount. We will issue a credit note "
"for the remaining amount."
)
return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.10
def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
if department.lower() == "finance":
response = (
"Confirmed: INV-2024-819 was paid on March 1 for ₹1,24,200 (₹1,08,000 + 15% GST of "
"₹16,200). The correct GST rate for logistics services is 18%. We overpaid — the "
"correct total should have been ₹1,27,440. The tax differential is ₹3,240. This "
"can be corrected via partial approval of the new invoice for ₹3,240 only."
)
return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.12
response = f"{department.title()}: No specific information available."
return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03
def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
rules = {
"partial_approval": ("APPLIED: Partial approval for ₹3,240 (tax correction delta). Main invoice amount blocked as duplicate.", 0.12),
"credit_note_request": ("APPLIED: Credit note requested from supplier for balance amount. Reference: INV-2024-819 tax correction.", 0.10),
"full_rejection": ("APPLIED: Full rejection. Invoice returned to supplier.", -0.05),
"duplicate_block": ("APPLIED: Duplicate block activated. Full payment prevented.", 0.04),
"tax_correction": ("APPLIED: Tax correction entry created referencing original INV-2024-819.", 0.08),
}
detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found.", -0.03))
return detail, reward
def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
checks_run = {c.check_name for c in ep.checks}
dup_found = "duplicate_detection" in checks_run
tax_found = "tax_calculation_verify" in checks_run
if decision == "partial_approve":
if dup_found and tax_found:
return 0.28
elif dup_found:
return 0.14
return 0.06
elif decision == "reject":
if dup_found:
return 0.08
return 0.02
elif decision == "approve":
return -0.15
elif decision == "hold":
return 0.06
return 0.0
def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
routes = {"finance": 0.08, "procurement": 0.03, "legal": 0.02}
return routes.get(team.lower(), 0.0)
def simulate_close(self, summary: str, ep: EpisodeData) -> float:
if ep.decision == "partial_approve" and ep.closed is False:
return 0.06
elif ep.decision is not None:
return 0.03
return 0.0
def grade(self, ep: EpisodeData) -> Dict[str, float]:
checks_run = {c.check_name for c in ep.checks}
queries_to = {q.target for q in ep.queries}
# Diagnosis (max 0.30)
d = 0.0
if "duplicate_detection" in checks_run:
d += 0.16
if "tax_calculation_verify" in checks_run:
d += 0.14
# Investigation (max 0.32)
i = 0.0
if "finance" in queries_to:
i += 0.12
if "supplier" in queries_to:
i += 0.10
if "partial_approval" in ep.rules_applied:
i += 0.06
if "credit_note_request" in ep.rules_applied:
i += 0.04
# Decision (max 0.20)
dec = 0.0
if ep.decision == "partial_approve":
dec = 0.20
elif ep.decision == "reject":
dec = 0.05
elif ep.decision == "approve":
dec = -0.15
elif ep.decision == "hold":
dec = 0.04
# Routing (max 0.08)
route = 0.08 if "finance" in ep.routed_to else 0.0
# Closure (max 0.06)
closure = 0.06 if ep.closed else 0.0
# Efficiency
eff = max(0.0, 0.04 - 0.003 * max(0, ep.step_count - 10))
total = d + i + dec + route + closure + eff
return {
"score": round(max(0.0, min(1.0, total)), 4),
"diagnosis_score": round(d, 4),
"investigation_score": round(i, 4),
"decision_score": round(dec, 4),
"routing_score": round(route, 4),
"closure_score": round(closure, 4),
"efficiency_score": round(eff, 4),
}
# ---------------------------------------------------------------------------
# Task 3 — Compound Fraud Signals (Hard)
# ---------------------------------------------------------------------------
class CompoundFraudTask(BaseTask):
"""
IT supplier submits ₹8,47,500 invoice for 15 laptops. System flags a bank
account change. But there are FOUR simultaneous fraud signals: bank BEC,
GSTIN mismatch, quantity mismatch (13 vs 15), and price inflation (8.65%).
Critical trap: querying supplier via email contacts the fraudster (-0.15).
Must use phone to reach real supplier (+0.15).
"""
task_id = "task3_compound_fraud"
max_steps = 25
difficulty = "hard"
def get_purchase_order(self) -> PurchaseOrder:
return PurchaseOrder(
po_number="PO-2024-0955",
vendor_name="TechCore Solutions",
po_date="2024-03-08",
line_items=[
LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=52000.0, total=780000.0, tax_rate=18.0),
],
total_amount=780000.0,
payment_terms="Net-30",
)
def get_invoice(self) -> Invoice:
return Invoice(
invoice_number="INV-TC-2024-0312",
supplier_name="TechCore Solutions",
invoice_date="2024-03-10",
due_date="2024-04-09",
po_reference="PO-2024-0955",
line_items=[
LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=56500.0, total=847500.0, tax_rate=18.0),
],
subtotal=847500.0,
tax_amount=152550.0,
tax_rate=18.0,
total_amount=1000050.0,
bank_account="5566778899",
bank_name="Yes Bank",
ifsc_code="YESB0000999",
supplier_gstin="07AABCT9999X1Z8",
supplier_email="accounts@techcore-solutions.com",
)
def get_grn(self) -> GoodsReceiptNote:
return GoodsReceiptNote(
grn_number="GRN-2024-0901",
po_reference="PO-2024-0955",
receipt_date="2024-03-15",
items_received=[
{"description": "Business Laptop (14-inch, i7, 16GB)", "quantity_received": 13, "quantity_pending": 2, "quantity_rejected": 0},
],
receiving_officer="Vikram Singh",
notes="13 of 15 laptops received. 2 units still in transit.",
)
def get_supplier_master(self) -> SupplierMaster:
return SupplierMaster(
supplier_id="SUP-0187",
supplier_name="TechCore Solutions",
registered_address="88 Tech Park, Sector 62, Noida 201301",
gstin="07AABCT1234Y1Z5",
bank_account="1234567890",
bank_name="State Bank of India",
ifsc_code="SBIN0001234",
contact_email="sales@techcore-solutions.in",
contact_phone="+91-120-456-7890",
registered_domain="techcore-solutions.in",
pan_number="AABCT1234Y",
status="active",
)
def get_exception_flag(self) -> ExceptionFlag:
return ExceptionFlag(
flag_code="BANK_ACCOUNT_CHANGE",
flag_description=(
"Invoice bank account (5566778899, Yes Bank) does not match supplier master "
"(1234567890, SBI). Bank account change request received from "
"accounts@techcore-solutions.com."
),
auto_hold=True,
flagged_date="2024-03-16",
severity="critical",
)
@property
def available_checks(self) -> List[str]:
return ["bank_account_verification", "gst_verification", "grn_match",
"email_domain_verification", "invoice_date_validation",
"quantity_check", "price_check", "duplicate_detection", "po_match"]
@property
def available_rules(self) -> List[str]:
return ["fraud_hold", "rejection_with_reason", "bank_change_verification",
"escalate_to_security"]
@property
def knowledge_base(self) -> List[str]:
return [
"POL-004: Bank account on invoice must match supplier master.",
"POL-009: Bank account change must be verified via registered phone number — NEVER via email.",
"POL-010: GSTIN on invoice must match supplier master. Mismatch is a fraud indicator.",
"POL-011: Invoice quantities must not exceed GRN quantities.",
"POL-012: Any suspected fraud must be escalated to Legal and Security teams.",
"POL-013: Do not process payment while fraud investigation is pending.",
]
def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
key_fields = {
("invoice", "bank_account"): ("5566778899 (Yes Bank) — DOES NOT MATCH supplier master (1234567890, SBI)", 0.12),
("invoice", "supplier_gstin"): ("07AABCT9999X1Z8 — DOES NOT MATCH supplier master (07AABCT1234Y1Z5)", 0.10),
("invoice", "supplier_email"): ("accounts@techcore-solutions.com — domain is .com, registered domain is .in", 0.08),
("grn", "items_received"): ("13 of 15 laptops received. 2 pending delivery.", 0.08),
("invoice", "line_items"): ("15 laptops @ ₹56,500 = ₹8,47,500. PO price was ₹52,000/unit.", 0.06),
("invoice", "invoice_date"): ("2024-03-10 (Sunday) — unusual for B2B invoicing", 0.04),
("invoice", "total_amount"): ("₹10,00,050 (₹8,47,500 + 18% GST ₹1,52,550)", 0.03),
}
key = (document.lower(), field.lower())
value, reward = key_fields.get(key, (f"{document}.{field} — value noted", 0.01))
return InspectionResult(document=document, field=field, value=value, note=""), reward
def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
checks = {
("bank_account", "invoice", "supplier_master"): (False, "MISMATCH: Invoice has 5566778899 (Yes Bank). Supplier master has 1234567890 (SBI). Change request from lookalike domain.", 0.14),
("gstin", "invoice", "supplier_master"): (False, "MISMATCH: Invoice GSTIN 07AABCT9999X1Z8 belongs to 'TechCore Trading Pvt Ltd' (different entity). Supplier master: 07AABCT1234Y1Z5.", 0.14),
("quantity", "invoice", "grn"): (False, "MISMATCH: Invoice claims 15 units. GRN shows only 13 received, 2 pending.", 0.10),
("unit_price", "invoice", "po"): (False, "MISMATCH: Invoice ₹56,500/unit vs PO ₹52,000/unit. Variance: +8.65%. No approved revision.", 0.08),
}
key = (field.lower(), doc_a.lower(), doc_b.lower())
passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01))
return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward
def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
checks = {
"bank_account_verification": (False, "FAILED: Bank account mismatch. Change request from techcore-solutions.com (lookalike of registered .in domain). Suspected BEC attack.", 0.18),
"gst_verification": (False, "FAILED: GSTIN 07AABCT9999X1Z8 registered to 'TechCore Trading Pvt Ltd' in Delhi — a DIFFERENT entity. Supplier master shows 07AABCT1234Y1Z5 for 'TechCore Solutions'.", 0.18),
"grn_match": (False, "FAILED: Invoice claims 15 laptops. GRN confirms only 13 received. 2 units still in transit.", 0.14),
"email_domain_verification": (False, "FAILED: Invoice email domain techcore-solutions.com does not match registered domain techcore-solutions.in. Lookalike domain — possible BEC.", 0.16),
"invoice_date_validation": (False, "WARNING: Invoice dated 2024-03-10 is a Sunday. Unusual for B2B invoicing.", 0.08),
"quantity_check": (False, "FAILED: Invoiced quantity (15) exceeds received quantity (13). 2 units undelivered.", 0.12),
"price_check": (False, "FAILED: Unit price ₹56,500 is 8.65% above PO price ₹52,000. No price revision approved.", 0.10),
"duplicate_detection": (True, "No duplicate invoice found.", 0.02),
"po_match": (False, "FAILED: Multiple mismatches — GSTIN, quantity, and unit price all differ from PO/supplier master.", 0.08),
}
passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01))
return CheckResult(check_name=check_name, passed=passed, detail=detail), reward
def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
"""Critical trap: email contacts the fraudster, phone reaches real supplier."""
if channel.lower() in ("email", "mail"):
response = (
"Dear Customer, thank you for reaching out. We recently changed our banking details "
"due to a corporate restructuring. The new account (5566778899, Yes Bank) is correct. "
"Please process the payment at your earliest convenience. We can provide updated "
"KYC documents if needed. — TechCore Solutions Finance Team"
)
return QueryResult(target="supplier", question=question, response=response, channel=channel), -0.15
else:
response = (
"This is Rajesh from TechCore Solutions. We have NOT sent any bank account change "
"request. Our bank account is still 1234567890 with SBI. We also have not sent any "
"invoice from techcore-solutions.com — our domain is techcore-solutions.in. This "
"looks like a fraud attempt. Please do NOT process payment to the new account. "
"We will file a complaint with our legal team."
)
return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.15
def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
dept = department.lower()
if dept == "security":
response = (
"Security Team: We will initiate a BEC investigation. The lookalike domain "
"techcore-solutions.com was registered 3 days before the invoice date. This is "
"a classic Business Email Compromise pattern. Do NOT process any payment. "
"We are preserving email headers for forensic analysis."
)
return QueryResult(target="security", question=question, response=response, channel="internal"), 0.10
elif dept == "legal":
response = (
"Legal Team: Based on the fraud indicators you've documented, we recommend: "
"1) Immediate payment block, 2) Formal complaint to cybercrime authorities, "
"3) Supplier audit of TechCore Solutions, 4) Review of all recent invoices "
"from this supplier."
)
return QueryResult(target="legal", question=question, response=response, channel="internal"), 0.08
elif dept == "finance":
response = "Finance: Payment has been blocked pending investigation. No funds released."
return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.04
elif dept == "procurement":
response = "Procurement: PO-2024-0955 was raised on March 8. Standard 2-day processing for IT equipment."
return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.03
response = f"{department.title()}: No specific information available."
return QueryResult(target=dept, question=question, response=response, channel="internal"), 0.02
def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
rules = {
"fraud_hold": ("APPLIED: Fraud hold activated. All payments to this supplier frozen pending investigation.", 0.10),
"rejection_with_reason": ("APPLIED: Invoice rejected with documented fraud signals.", 0.06),
"bank_change_verification": ("APPLIED: Bank change verification protocol activated. Phone verification required per POL-009.", 0.08),
"escalate_to_security": ("APPLIED: Case escalated to Information Security team for BEC investigation.", 0.08),
}
detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not applicable.", -0.03))
return detail, reward
def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
checks_run = {c.check_name for c in ep.checks}
signals = sum([
"bank_account_verification" in checks_run,
"gst_verification" in checks_run,
"grn_match" in checks_run,
"email_domain_verification" in checks_run,
])
if decision == "reject":
return 0.10 + 0.05 * signals
elif decision == "approve":
return -0.40
elif decision == "partial_approve":
return -0.20
elif decision == "hold":
return 0.08 + 0.03 * signals
return 0.0
def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
routes = {"legal": 0.14, "security": 0.12, "finance": 0.08, "procurement": 0.06}
return routes.get(team.lower(), 0.0)
def simulate_close(self, summary: str, ep: EpisodeData) -> float:
if ep.closed is False and ep.decision == "reject":
return 0.06
return 0.0
def grade(self, ep: EpisodeData) -> Dict[str, float]:
checks_run = {c.check_name for c in ep.checks}
bank_found = "bank_account_verification" in checks_run
gst_found = "gst_verification" in checks_run
qty_found = "grn_match" in checks_run
domain_found = "email_domain_verification" in checks_run
price_found = "price_check" in checks_run
# Diagnosis
d = ((0.12 if bank_found else 0) + (0.12 if gst_found else 0)
+ (0.10 if qty_found else 0) + (0.10 if domain_found else 0)
+ (0.06 if price_found else 0))
# Investigation — reward phone, penalise email
i = 0.0
for q in ep.queries:
if q.target == "supplier" and q.channel not in ("email", "mail"):
i += 0.10
elif q.target == "supplier" and q.channel in ("email", "mail"):
i -= 0.15
if "legal" in {q.target for q in ep.queries}:
i += 0.06
if "security" in {q.target for q in ep.queries}:
i += 0.06
# Decision
signals = sum([bank_found, gst_found, qty_found, domain_found])
dec = 0.0
if ep.decision == "reject":
dec = 0.08 + 0.03 * signals
elif ep.decision == "approve":
dec = -0.35
elif ep.decision == "partial_approve":
dec = -0.15
elif ep.decision == "hold":
dec = 0.06
# Routing
routes = set(ep.routed_to)
route = ((0.10 if "legal" in routes else 0)
+ (0.06 if "security" in routes else 0)
+ (0.04 if "finance" in routes else 0))
# Closure
closure = 0.06 if (ep.closed and ep.decision == "reject") else 0.0
# Efficiency
eff = max(0.0, 0.04 - 0.002 * max(0, ep.step_count - 12))
total = d + i + dec + route + closure + eff
return {
"score": round(max(0.0, min(1.0, total)), 4),
"signals_found": sum([bank_found, gst_found, qty_found, domain_found, price_found]),
"diagnosis_score": round(d, 4),
"investigation_score": round(i, 4),
"decision_score": round(dec, 4),
"routing_score": round(route, 4),
"closure_score": round(closure, 4),
"efficiency_score": round(eff, 4),
}
# ---------------------------------------------------------------------------
# Task Registry
# ---------------------------------------------------------------------------
TASK_REGISTRY: Dict[str, type] = {
"task1_price_variance": PriceVarianceTask,
"task2_duplicate_tax": DuplicateTaxErrorTask,
"task3_compound_fraud": CompoundFraudTask,
}
ALL_TASKS = list(TASK_REGISTRY.keys())
def make_task(task_id: str) -> BaseTask:
"""Instantiate a task by its ID. Raises ValueError for unknown IDs."""
cls = TASK_REGISTRY.get(task_id)
if cls is None:
raise ValueError(f"Unknown task '{task_id}'. Available: {ALL_TASKS}")
return cls()