Spaces:
Sleeping
Sleeping
File size: 47,433 Bytes
562f58d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 | """
Task definitions for the Invoice Exception Handler environment.
Each task defines a scenario with documents, simulator logic for every action
type, and a grader that produces sub-scores in [0.0, 1.0]. This is the biggest
file in the project — it contains all the business logic the environment needs.
"""
from __future__ import annotations
import time
from typing import Any, Dict, List, Optional, Tuple
from .models import (
ActionType, CheckResult, ExceptionFlag, GoodsReceiptNote,
InspectionResult, Invoice, LineItem, PurchaseOrder, QueryResult,
SupplierMaster,
)
# ---------------------------------------------------------------------------
# EpisodeData — mutable state for one episode
# ---------------------------------------------------------------------------
class EpisodeData:
"""Tracks the full history of one episode for grading and state building."""
def __init__(self) -> None:
self.inspections: List[InspectionResult] = []
self.checks: List[CheckResult] = []
self.queries: List[QueryResult] = []
self.rules_applied: List[str] = []
self.decision: Optional[str] = None
self.decision_reason: Optional[str] = None
self.routed_to: List[str] = []
self.closed: bool = False
self.close_summary: Optional[str] = None
self.step_count: int = 0
self.cumulative_reward: float = 0.0
def has_inspected(self, doc: str, field: str) -> bool:
"""Check if we already looked at this field in this document."""
return any(i.document == doc and i.field == field for i in self.inspections)
def has_checked(self, name: str) -> bool:
"""Check if this validation check has already been run."""
return any(c.check_name == name for c in self.checks)
def has_queried(self, target: str) -> bool:
"""Check if we already queried this person or department."""
return any(q.target == target for q in self.queries)
# ---------------------------------------------------------------------------
# BaseTask — abstract interface
# ---------------------------------------------------------------------------
class BaseTask:
"""Abstract base that all task classes inherit from."""
task_id: str = "base"
max_steps: int = 20
difficulty: str = "easy"
def get_purchase_order(self) -> PurchaseOrder:
raise NotImplementedError
def get_invoice(self) -> Invoice:
raise NotImplementedError
def get_grn(self) -> GoodsReceiptNote:
raise NotImplementedError
def get_supplier_master(self) -> SupplierMaster:
raise NotImplementedError
def get_exception_flag(self) -> ExceptionFlag:
raise NotImplementedError
def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
raise NotImplementedError
def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
raise NotImplementedError
def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
raise NotImplementedError
def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
raise NotImplementedError
def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
raise NotImplementedError
def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
raise NotImplementedError
def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
raise NotImplementedError
def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
raise NotImplementedError
def simulate_close(self, summary: str, ep: EpisodeData) -> float:
raise NotImplementedError
def grade(self, ep: EpisodeData) -> Dict[str, float]:
raise NotImplementedError
@property
def available_checks(self) -> List[str]:
return []
@property
def available_rules(self) -> List[str]:
return []
@property
def knowledge_base(self) -> List[str]:
return []
# ---------------------------------------------------------------------------
# Task 1 — Price Variance Exception (Easy)
# ---------------------------------------------------------------------------
class PriceVarianceTask(BaseTask):
"""
Office stationery invoice arrives 3.08% above the PO.
Company tolerance is +/-2% auto-approval. Supplier had verbal approval
from procurement for the price increase but the PO was never updated.
Optimal path: check tolerance -> cross-check prices -> verify GRN ->
query supplier -> query procurement -> apply exception rule -> approve ->
route to procurement for PO amendment -> close.
"""
task_id = "task1_price_variance"
max_steps = 18
difficulty = "easy"
def get_purchase_order(self) -> PurchaseOrder:
return PurchaseOrder(
po_number="PO-2024-1041",
vendor_name="OfficeNeed Supplies",
po_date="2024-02-15",
line_items=[
LineItem(description="A4 Paper", quantity=100, unit_price=220.0, total=22000.0, tax_rate=18.0),
LineItem(description="Ballpoint Pens", quantity=20, unit_price=450.0, total=9000.0, tax_rate=18.0),
LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0),
],
total_amount=50000.0,
payment_terms="Net-30",
)
def get_invoice(self) -> Invoice:
return Invoice(
invoice_number="INV-ON-8821",
supplier_name="OfficeNeed Supplies",
invoice_date="2024-03-05",
due_date="2024-04-04",
po_reference="PO-2024-1041",
line_items=[
LineItem(description="A4 Paper", quantity=100, unit_price=231.0, total=23100.0, tax_rate=18.0),
LineItem(description="Ballpoint Pens", quantity=20, unit_price=472.0, total=9440.0, tax_rate=18.0),
LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0),
],
subtotal=51540.0,
tax_amount=9277.20,
tax_rate=18.0,
total_amount=60817.20,
bank_account="9876543210",
bank_name="HDFC Bank",
ifsc_code="HDFC0001234",
supplier_gstin="29AABCO1234F1Z5",
supplier_email="accounts@officeneed.com",
)
def get_grn(self) -> GoodsReceiptNote:
return GoodsReceiptNote(
grn_number="GRN-2024-0892",
po_reference="PO-2024-1041",
receipt_date="2024-03-01",
items_received=[
{"description": "A4 Paper", "quantity_received": 100, "quantity_pending": 0, "quantity_rejected": 0},
{"description": "Ballpoint Pens", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0},
{"description": "Staplers", "quantity_received": 10, "quantity_pending": 0, "quantity_rejected": 0},
],
receiving_officer="Ramesh Kumar",
notes="All items received in good condition.",
)
def get_supplier_master(self) -> SupplierMaster:
return SupplierMaster(
supplier_id="SUP-0441",
supplier_name="OfficeNeed Supplies",
registered_address="45 MG Road, Bengaluru 560001",
gstin="29AABCO1234F1Z5",
bank_account="9876543210",
bank_name="HDFC Bank",
ifsc_code="HDFC0001234",
contact_email="sales@officeneed.com",
contact_phone="+91-80-4567-8901",
registered_domain="officeneed.com",
pan_number="AABCO1234F",
status="active",
)
def get_exception_flag(self) -> ExceptionFlag:
return ExceptionFlag(
flag_code="PRICE_MISMATCH",
flag_description=(
"Invoice total ₹51,540 exceeds PO ₹50,000 by ₹1,540 (3.08%). "
"Above auto-approval threshold."
),
auto_hold=True,
flagged_date="2024-03-06",
severity="medium",
)
@property
def available_checks(self) -> List[str]:
return ["tolerance_rule", "grn_match", "duplicate_detection",
"bank_account_verification", "gst_verification", "po_match"]
@property
def available_rules(self) -> List[str]:
return ["tolerance_2pct_auto_approve", "tolerance_exception_approval",
"rejection_with_reason", "partial_approval"]
@property
def knowledge_base(self) -> List[str]:
return [
"POL-001: Price variance ≤±2% may be auto-approved. Above 2% requires exception approval.",
"POL-002: Exception approval requires confirmation from originating department.",
"POL-003: Any approved invoice with a price change must be followed by a PO amendment request.",
"POL-004: Bank account on invoice must match supplier master.",
]
# --- Simulators ---
def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
"""Return meaningful values for key fields, small reward for others."""
key_fields = {
("invoice", "line_items"): ("A4 Paper @₹231 (+5%), Pens @₹472 (+4.9%), Staplers @₹1900 (unchanged)", 0.10),
("invoice", "total_amount"): ("₹51,540 (subtotal) + ₹9,277.20 (GST 18%) = ₹60,817.20", 0.08),
("po", "line_items"): ("A4 Paper @₹220, Pens @₹450, Staplers @₹1900. Total: ₹50,000", 0.06),
("grn", "items_received"): ("All 3 items fully received. No pending, no rejected.", 0.05),
("invoice", "bank_account"): ("9876543210 — HDFC Bank, IFSC HDFC0001234", 0.02),
("invoice", "supplier_gstin"): ("29AABCO1234F1Z5", 0.02),
}
key = (document.lower(), field.lower())
value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01))
result = InspectionResult(document=document, field=field, value=value, note="")
return result, reward
def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
"""Cross-check a field between two documents."""
checks = {
("unit_price", "invoice", "po"): (False, "MISMATCH: A4 Paper ₹231 vs ₹220 (+5.0%), Pens ₹472 vs ₹450 (+4.9%). Staplers match.", 0.12),
("total_amount", "invoice", "po"): (False, "Invoice subtotal ₹51,540 vs PO ₹50,000. Variance: +₹1,540 (+3.08%).", 0.10),
("bank_account", "invoice", "supplier_master"): (True, "Bank account 9876543210 matches supplier master.", 0.03),
("gstin", "invoice", "supplier_master"): (True, "GSTIN 29AABCO1234F1Z5 matches supplier master.", 0.02),
("quantity", "invoice", "grn"): (True, "All quantities match: 100 reams, 20 boxes, 10 units.", 0.04),
}
key = (field.lower(), doc_a.lower(), doc_b.lower())
passed, detail, reward = checks.get(key, (True, f"No mismatch found for {field} between {doc_a} and {doc_b}.", 0.01))
result = CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail)
return result, reward
def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
"""Run a named validation check."""
checks = {
"tolerance_rule": (False, "Price variance 3.08% exceeds ±2% auto-approval threshold. Manual exception approval required.", 0.14),
"grn_match": (True, "All items fully received. GRN matches invoice quantities.", 0.06),
"duplicate_detection": (True, "No duplicate invoice found in payment history.", 0.02),
"bank_account_verification": (True, "Bank account matches supplier master record.", 0.02),
"gst_verification": (True, "GSTIN matches supplier master. GST calculation correct.", 0.02),
"po_match": (False, "PO match FAILED on unit prices: 2 of 3 line items have price variance.", 0.08),
}
passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed — no issues found.", 0.01))
result = CheckResult(check_name=check_name, passed=passed, detail=detail)
return result, reward
def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
"""Query the supplier — returns email explaining the price increase."""
response = (
"Dear Sir/Madam, due to a 12% increase in raw material costs effective January 2024, "
"we revised prices for A4 Paper and Ballpoint Pens. This was communicated to Mr. Arjun Mehta "
"in your Procurement team via email on Feb 20, 2024. He acknowledged and verbally approved "
"the revised pricing. We can provide the email trail if needed. — OfficeNeed Supplies"
)
result = QueryResult(target="supplier", question=question, response=response, channel=channel)
return result, 0.10
def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
"""Query an internal department."""
if department.lower() == "procurement":
response = (
"Hi, this is Arjun Mehta from Procurement. Yes, I received the price revision email "
"from OfficeNeed on Feb 20. I verbally approved it as the increase was reasonable "
"(raw material cost pass-through). I should have raised a PO amendment but it slipped. "
"I'll raise the amendment today. Please go ahead and approve the invoice."
)
return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.12
response = f"{department.title()} department: We don't have specific information about this invoice exception."
return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03
def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
"""Apply a business rule."""
rules = {
"tolerance_2pct_auto_approve": ("BLOCKED: Cannot auto-approve. Price variance 3.08% exceeds ±2% threshold.", -0.05),
"tolerance_exception_approval": ("APPLIED: Exception approval pathway activated. Requires department confirmation (obtained from procurement).", 0.10),
"rejection_with_reason": ("APPLIED: Rejection rule activated. Invoice will be returned to supplier.", -0.08),
"partial_approval": ("NOT APPLICABLE: All items received in full. Partial approval not warranted.", -0.05),
}
detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found in policy database.", -0.03))
return detail, reward
def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
"""Score the agent's decision based on evidence gathered."""
checks_run = {c.check_name for c in ep.checks}
queries_to = {q.target for q in ep.queries}
if decision == "approve":
if "tolerance_rule" in checks_run and "procurement" in queries_to:
return 0.25
elif "tolerance_rule" in checks_run:
return 0.18
else:
return 0.05
elif decision == "reject":
return -0.10
elif decision == "hold":
return 0.08
return 0.0
def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
"""Score routing decisions."""
routes = {"procurement": 0.12, "finance": 0.03, "legal": -0.05}
return routes.get(team.lower(), 0.0)
def simulate_close(self, summary: str, ep: EpisodeData) -> float:
"""Score case closure."""
checks_run = {c.check_name for c in ep.checks}
if ep.decision == "approve" and "tolerance_rule" in checks_run and "procurement" in set(ep.routed_to):
return 0.12
elif ep.decision is not None:
return 0.06
return 0.0
def grade(self, ep: EpisodeData) -> Dict[str, float]:
"""Final grader producing sub-scores."""
checks_run = {c.check_name for c in ep.checks}
queries_to = {q.target for q in ep.queries}
# Diagnosis
d = 0.0
if any("unit_price" in c.check_name or "total" in c.check_name for c in ep.checks):
d += 0.12
if "tolerance_rule" in checks_run:
d += 0.14
if "grn_match" in checks_run:
d += 0.06
# Investigation
i = 0.0
if "supplier" in queries_to:
i += 0.10
if "procurement" in queries_to:
i += 0.12
if "tolerance_exception_approval" in ep.rules_applied:
i += 0.08
# Decision
dec = 0.0
if ep.decision == "approve":
dec += 0.18
elif ep.decision == "hold":
dec += 0.06
elif ep.decision == "reject":
dec -= 0.10
# Routing
route = 0.12 if "procurement" in ep.routed_to else 0.0
# Closure
closure = 0.08 if ep.closed else 0.0
# Efficiency
eff = max(0.0, 0.06 - 0.004 * max(0, ep.step_count - 9))
total = d + i + dec + route + closure + eff
return {
"score": round(max(0.0, min(1.0, total)), 4),
"diagnosis_score": round(d, 4),
"investigation_score": round(i, 4),
"decision_score": round(dec, 4),
"routing_score": round(route, 4),
"closure_score": round(closure, 4),
"efficiency_score": round(eff, 4),
}
# ---------------------------------------------------------------------------
# Task 2 — Duplicate Invoice with Hidden Tax Error (Medium)
# ---------------------------------------------------------------------------
class DuplicateTaxErrorTask(BaseTask):
"""
Logistics supplier submits INV-2024-891 which is a duplicate of already-paid
INV-2024-819 (digit transposition). The original invoice applied 15% GST
(wrong), correct rate is 18%. Company overpaid ₹3,240. The new invoice has
the correct rate. It's both a duplicate AND a legitimate correction.
"""
task_id = "task2_duplicate_tax"
max_steps = 20
difficulty = "medium"
def get_purchase_order(self) -> PurchaseOrder:
return PurchaseOrder(
po_number="PO-2024-0778",
vendor_name="FastMove Logistics",
po_date="2024-01-25",
line_items=[
LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0),
LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0),
],
total_amount=108000.0,
payment_terms="Net-15",
)
def get_invoice(self) -> Invoice:
return Invoice(
invoice_number="INV-2024-891",
supplier_name="FastMove Logistics",
invoice_date="2024-03-12",
due_date="2024-03-27",
po_reference="PO-2024-0778",
line_items=[
LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0),
LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0),
],
subtotal=108000.0,
tax_amount=19440.0,
tax_rate=18.0,
total_amount=127440.0,
bank_account="1122334455",
bank_name="ICICI Bank",
ifsc_code="ICIC0005678",
supplier_gstin="27AABCF5678G1Z3",
supplier_email="billing@fastmove.in",
)
def get_grn(self) -> GoodsReceiptNote:
return GoodsReceiptNote(
grn_number="GRN-2024-0740",
po_reference="PO-2024-0778",
receipt_date="2024-02-28",
items_received=[
{"description": "Mumbai-Pune Transport", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True},
{"description": "Warehousing charges Feb 2024", "quantity_received": 1, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True},
],
receiving_officer="Priya Sharma",
notes="All transport trips completed. Warehousing service confirmed for February.",
)
def get_supplier_master(self) -> SupplierMaster:
return SupplierMaster(
supplier_id="SUP-0229",
supplier_name="FastMove Logistics",
registered_address="12 Logistics Park, Navi Mumbai 400710",
gstin="27AABCF5678G1Z3",
bank_account="1122334455",
bank_name="ICICI Bank",
ifsc_code="ICIC0005678",
contact_email="accounts@fastmove.in",
contact_phone="+91-22-3456-7890",
registered_domain="fastmove.in",
pan_number="AABCF5678G",
status="active",
)
def get_exception_flag(self) -> ExceptionFlag:
return ExceptionFlag(
flag_code="POSSIBLE_DUPLICATE",
flag_description="Invoice INV-2024-891 closely matches previously processed invoice INV-2024-819. Possible duplicate submission.",
auto_hold=True,
flagged_date="2024-03-13",
severity="high",
)
@property
def available_checks(self) -> List[str]:
return ["duplicate_detection", "tax_calculation_verify", "grn_match",
"bank_account_verification", "gst_verification", "po_match"]
@property
def available_rules(self) -> List[str]:
return ["partial_approval", "credit_note_request", "full_rejection",
"duplicate_block", "tax_correction"]
@property
def knowledge_base(self) -> List[str]:
return [
"POL-005: Duplicate invoices must be rejected unless they represent a legitimate correction.",
"POL-006: Tax calculation errors on paid invoices require a credit note and correction entry.",
"POL-007: Partial approval may be used when only a portion of the invoice amount is valid.",
"POL-008: Any tax correction must be documented with the original invoice reference.",
]
def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
key_fields = {
("invoice", "invoice_number"): ("INV-2024-891 — note digit transposition vs INV-2024-819 (891 vs 819)", 0.10),
("invoice", "tax_amount"): ("₹19,440 (18% GST on ₹1,08,000) — this is the CORRECT rate", 0.08),
("invoice", "total_amount"): ("₹1,27,440 (subtotal ₹1,08,000 + 18% GST ₹19,440)", 0.05),
("invoice", "line_items"): ("Transport 20×₹4,500 = ₹90,000 + Warehousing ₹18,000 = ₹1,08,000", 0.04),
}
key = (document.lower(), field.lower())
value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01))
return InspectionResult(document=document, field=field, value=value, note=""), reward
def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
checks = {
("invoice_number", "invoice", "payment_history"): (False, "MATCH FOUND: INV-2024-819 paid 12 days ago for ₹1,24,200. Digit transposition: 891 vs 819.", 0.15),
("tax_amount", "invoice", "payment_history"): (False, "TAX DISCREPANCY: Original INV-2024-819 had 15% GST (₹16,200). Current INV-2024-891 has 18% GST (₹19,440). Delta: ₹3,240.", 0.14),
("total_amount", "invoice", "po"): (True, "Invoice subtotal ₹1,08,000 matches PO total ₹1,08,000.", 0.03),
("bank_account", "invoice", "supplier_master"): (True, "Bank account matches supplier master.", 0.02),
}
key = (field.lower(), doc_a.lower(), doc_b.lower())
passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01))
return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward
def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
checks = {
"duplicate_detection": (False, "DUPLICATE FOUND: INV-2024-891 matches INV-2024-819 (paid 12 days ago, ₹1,24,200). Invoice numbers differ by digit transposition (891 vs 819).", 0.18),
"tax_calculation_verify": (False, "TAX ERROR on ORIGINAL: INV-2024-819 applied 15% GST (₹16,200) instead of correct 18% (₹19,440). Company overpaid ₹3,240 in tax on already-paid invoice.", 0.16),
"grn_match": (True, "Services fully confirmed. GRN matches invoice.", 0.04),
"bank_account_verification": (True, "Bank account matches supplier master.", 0.02),
"gst_verification": (True, "GSTIN matches supplier master.", 0.02),
"po_match": (True, "PO amounts and line items match current invoice.", 0.03),
}
passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01))
return CheckResult(check_name=check_name, passed=passed, detail=detail), reward
def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
response = (
"We are aware that INV-2024-819 was submitted with incorrect 15% GST. The correct rate "
"is 18%. INV-2024-891 is a corrected resubmission. We request partial approval for the "
"₹3,240 tax differential only, not the full invoice amount. We will issue a credit note "
"for the remaining amount."
)
return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.10
def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
if department.lower() == "finance":
response = (
"Confirmed: INV-2024-819 was paid on March 1 for ₹1,24,200 (₹1,08,000 + 15% GST of "
"₹16,200). The correct GST rate for logistics services is 18%. We overpaid — the "
"correct total should have been ₹1,27,440. The tax differential is ₹3,240. This "
"can be corrected via partial approval of the new invoice for ₹3,240 only."
)
return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.12
response = f"{department.title()}: No specific information available."
return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03
def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
rules = {
"partial_approval": ("APPLIED: Partial approval for ₹3,240 (tax correction delta). Main invoice amount blocked as duplicate.", 0.12),
"credit_note_request": ("APPLIED: Credit note requested from supplier for balance amount. Reference: INV-2024-819 tax correction.", 0.10),
"full_rejection": ("APPLIED: Full rejection. Invoice returned to supplier.", -0.05),
"duplicate_block": ("APPLIED: Duplicate block activated. Full payment prevented.", 0.04),
"tax_correction": ("APPLIED: Tax correction entry created referencing original INV-2024-819.", 0.08),
}
detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found.", -0.03))
return detail, reward
def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
checks_run = {c.check_name for c in ep.checks}
dup_found = "duplicate_detection" in checks_run
tax_found = "tax_calculation_verify" in checks_run
if decision == "partial_approve":
if dup_found and tax_found:
return 0.28
elif dup_found:
return 0.14
return 0.06
elif decision == "reject":
if dup_found:
return 0.08
return 0.02
elif decision == "approve":
return -0.15
elif decision == "hold":
return 0.06
return 0.0
def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
routes = {"finance": 0.08, "procurement": 0.03, "legal": 0.02}
return routes.get(team.lower(), 0.0)
def simulate_close(self, summary: str, ep: EpisodeData) -> float:
if ep.decision == "partial_approve" and ep.closed is False:
return 0.06
elif ep.decision is not None:
return 0.03
return 0.0
def grade(self, ep: EpisodeData) -> Dict[str, float]:
checks_run = {c.check_name for c in ep.checks}
queries_to = {q.target for q in ep.queries}
# Diagnosis (max 0.30)
d = 0.0
if "duplicate_detection" in checks_run:
d += 0.16
if "tax_calculation_verify" in checks_run:
d += 0.14
# Investigation (max 0.32)
i = 0.0
if "finance" in queries_to:
i += 0.12
if "supplier" in queries_to:
i += 0.10
if "partial_approval" in ep.rules_applied:
i += 0.06
if "credit_note_request" in ep.rules_applied:
i += 0.04
# Decision (max 0.20)
dec = 0.0
if ep.decision == "partial_approve":
dec = 0.20
elif ep.decision == "reject":
dec = 0.05
elif ep.decision == "approve":
dec = -0.15
elif ep.decision == "hold":
dec = 0.04
# Routing (max 0.08)
route = 0.08 if "finance" in ep.routed_to else 0.0
# Closure (max 0.06)
closure = 0.06 if ep.closed else 0.0
# Efficiency
eff = max(0.0, 0.04 - 0.003 * max(0, ep.step_count - 10))
total = d + i + dec + route + closure + eff
return {
"score": round(max(0.0, min(1.0, total)), 4),
"diagnosis_score": round(d, 4),
"investigation_score": round(i, 4),
"decision_score": round(dec, 4),
"routing_score": round(route, 4),
"closure_score": round(closure, 4),
"efficiency_score": round(eff, 4),
}
# ---------------------------------------------------------------------------
# Task 3 — Compound Fraud Signals (Hard)
# ---------------------------------------------------------------------------
class CompoundFraudTask(BaseTask):
"""
IT supplier submits ₹8,47,500 invoice for 15 laptops. System flags a bank
account change. But there are FOUR simultaneous fraud signals: bank BEC,
GSTIN mismatch, quantity mismatch (13 vs 15), and price inflation (8.65%).
Critical trap: querying supplier via email contacts the fraudster (-0.15).
Must use phone to reach real supplier (+0.15).
"""
task_id = "task3_compound_fraud"
max_steps = 25
difficulty = "hard"
def get_purchase_order(self) -> PurchaseOrder:
return PurchaseOrder(
po_number="PO-2024-0955",
vendor_name="TechCore Solutions",
po_date="2024-03-08",
line_items=[
LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=52000.0, total=780000.0, tax_rate=18.0),
],
total_amount=780000.0,
payment_terms="Net-30",
)
def get_invoice(self) -> Invoice:
return Invoice(
invoice_number="INV-TC-2024-0312",
supplier_name="TechCore Solutions",
invoice_date="2024-03-10",
due_date="2024-04-09",
po_reference="PO-2024-0955",
line_items=[
LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=56500.0, total=847500.0, tax_rate=18.0),
],
subtotal=847500.0,
tax_amount=152550.0,
tax_rate=18.0,
total_amount=1000050.0,
bank_account="5566778899",
bank_name="Yes Bank",
ifsc_code="YESB0000999",
supplier_gstin="07AABCT9999X1Z8",
supplier_email="accounts@techcore-solutions.com",
)
def get_grn(self) -> GoodsReceiptNote:
return GoodsReceiptNote(
grn_number="GRN-2024-0901",
po_reference="PO-2024-0955",
receipt_date="2024-03-15",
items_received=[
{"description": "Business Laptop (14-inch, i7, 16GB)", "quantity_received": 13, "quantity_pending": 2, "quantity_rejected": 0},
],
receiving_officer="Vikram Singh",
notes="13 of 15 laptops received. 2 units still in transit.",
)
def get_supplier_master(self) -> SupplierMaster:
return SupplierMaster(
supplier_id="SUP-0187",
supplier_name="TechCore Solutions",
registered_address="88 Tech Park, Sector 62, Noida 201301",
gstin="07AABCT1234Y1Z5",
bank_account="1234567890",
bank_name="State Bank of India",
ifsc_code="SBIN0001234",
contact_email="sales@techcore-solutions.in",
contact_phone="+91-120-456-7890",
registered_domain="techcore-solutions.in",
pan_number="AABCT1234Y",
status="active",
)
def get_exception_flag(self) -> ExceptionFlag:
return ExceptionFlag(
flag_code="BANK_ACCOUNT_CHANGE",
flag_description=(
"Invoice bank account (5566778899, Yes Bank) does not match supplier master "
"(1234567890, SBI). Bank account change request received from "
"accounts@techcore-solutions.com."
),
auto_hold=True,
flagged_date="2024-03-16",
severity="critical",
)
@property
def available_checks(self) -> List[str]:
return ["bank_account_verification", "gst_verification", "grn_match",
"email_domain_verification", "invoice_date_validation",
"quantity_check", "price_check", "duplicate_detection", "po_match"]
@property
def available_rules(self) -> List[str]:
return ["fraud_hold", "rejection_with_reason", "bank_change_verification",
"escalate_to_security"]
@property
def knowledge_base(self) -> List[str]:
return [
"POL-004: Bank account on invoice must match supplier master.",
"POL-009: Bank account change must be verified via registered phone number — NEVER via email.",
"POL-010: GSTIN on invoice must match supplier master. Mismatch is a fraud indicator.",
"POL-011: Invoice quantities must not exceed GRN quantities.",
"POL-012: Any suspected fraud must be escalated to Legal and Security teams.",
"POL-013: Do not process payment while fraud investigation is pending.",
]
def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
key_fields = {
("invoice", "bank_account"): ("5566778899 (Yes Bank) — DOES NOT MATCH supplier master (1234567890, SBI)", 0.12),
("invoice", "supplier_gstin"): ("07AABCT9999X1Z8 — DOES NOT MATCH supplier master (07AABCT1234Y1Z5)", 0.10),
("invoice", "supplier_email"): ("accounts@techcore-solutions.com — domain is .com, registered domain is .in", 0.08),
("grn", "items_received"): ("13 of 15 laptops received. 2 pending delivery.", 0.08),
("invoice", "line_items"): ("15 laptops @ ₹56,500 = ₹8,47,500. PO price was ₹52,000/unit.", 0.06),
("invoice", "invoice_date"): ("2024-03-10 (Sunday) — unusual for B2B invoicing", 0.04),
("invoice", "total_amount"): ("₹10,00,050 (₹8,47,500 + 18% GST ₹1,52,550)", 0.03),
}
key = (document.lower(), field.lower())
value, reward = key_fields.get(key, (f"{document}.{field} — value noted", 0.01))
return InspectionResult(document=document, field=field, value=value, note=""), reward
def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
checks = {
("bank_account", "invoice", "supplier_master"): (False, "MISMATCH: Invoice has 5566778899 (Yes Bank). Supplier master has 1234567890 (SBI). Change request from lookalike domain.", 0.14),
("gstin", "invoice", "supplier_master"): (False, "MISMATCH: Invoice GSTIN 07AABCT9999X1Z8 belongs to 'TechCore Trading Pvt Ltd' (different entity). Supplier master: 07AABCT1234Y1Z5.", 0.14),
("quantity", "invoice", "grn"): (False, "MISMATCH: Invoice claims 15 units. GRN shows only 13 received, 2 pending.", 0.10),
("unit_price", "invoice", "po"): (False, "MISMATCH: Invoice ₹56,500/unit vs PO ₹52,000/unit. Variance: +8.65%. No approved revision.", 0.08),
}
key = (field.lower(), doc_a.lower(), doc_b.lower())
passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01))
return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward
def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
checks = {
"bank_account_verification": (False, "FAILED: Bank account mismatch. Change request from techcore-solutions.com (lookalike of registered .in domain). Suspected BEC attack.", 0.18),
"gst_verification": (False, "FAILED: GSTIN 07AABCT9999X1Z8 registered to 'TechCore Trading Pvt Ltd' in Delhi — a DIFFERENT entity. Supplier master shows 07AABCT1234Y1Z5 for 'TechCore Solutions'.", 0.18),
"grn_match": (False, "FAILED: Invoice claims 15 laptops. GRN confirms only 13 received. 2 units still in transit.", 0.14),
"email_domain_verification": (False, "FAILED: Invoice email domain techcore-solutions.com does not match registered domain techcore-solutions.in. Lookalike domain — possible BEC.", 0.16),
"invoice_date_validation": (False, "WARNING: Invoice dated 2024-03-10 is a Sunday. Unusual for B2B invoicing.", 0.08),
"quantity_check": (False, "FAILED: Invoiced quantity (15) exceeds received quantity (13). 2 units undelivered.", 0.12),
"price_check": (False, "FAILED: Unit price ₹56,500 is 8.65% above PO price ₹52,000. No price revision approved.", 0.10),
"duplicate_detection": (True, "No duplicate invoice found.", 0.02),
"po_match": (False, "FAILED: Multiple mismatches — GSTIN, quantity, and unit price all differ from PO/supplier master.", 0.08),
}
passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01))
return CheckResult(check_name=check_name, passed=passed, detail=detail), reward
def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
"""Critical trap: email contacts the fraudster, phone reaches real supplier."""
if channel.lower() in ("email", "mail"):
response = (
"Dear Customer, thank you for reaching out. We recently changed our banking details "
"due to a corporate restructuring. The new account (5566778899, Yes Bank) is correct. "
"Please process the payment at your earliest convenience. We can provide updated "
"KYC documents if needed. — TechCore Solutions Finance Team"
)
return QueryResult(target="supplier", question=question, response=response, channel=channel), -0.15
else:
response = (
"This is Rajesh from TechCore Solutions. We have NOT sent any bank account change "
"request. Our bank account is still 1234567890 with SBI. We also have not sent any "
"invoice from techcore-solutions.com — our domain is techcore-solutions.in. This "
"looks like a fraud attempt. Please do NOT process payment to the new account. "
"We will file a complaint with our legal team."
)
return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.15
def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
dept = department.lower()
if dept == "security":
response = (
"Security Team: We will initiate a BEC investigation. The lookalike domain "
"techcore-solutions.com was registered 3 days before the invoice date. This is "
"a classic Business Email Compromise pattern. Do NOT process any payment. "
"We are preserving email headers for forensic analysis."
)
return QueryResult(target="security", question=question, response=response, channel="internal"), 0.10
elif dept == "legal":
response = (
"Legal Team: Based on the fraud indicators you've documented, we recommend: "
"1) Immediate payment block, 2) Formal complaint to cybercrime authorities, "
"3) Supplier audit of TechCore Solutions, 4) Review of all recent invoices "
"from this supplier."
)
return QueryResult(target="legal", question=question, response=response, channel="internal"), 0.08
elif dept == "finance":
response = "Finance: Payment has been blocked pending investigation. No funds released."
return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.04
elif dept == "procurement":
response = "Procurement: PO-2024-0955 was raised on March 8. Standard 2-day processing for IT equipment."
return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.03
response = f"{department.title()}: No specific information available."
return QueryResult(target=dept, question=question, response=response, channel="internal"), 0.02
def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
rules = {
"fraud_hold": ("APPLIED: Fraud hold activated. All payments to this supplier frozen pending investigation.", 0.10),
"rejection_with_reason": ("APPLIED: Invoice rejected with documented fraud signals.", 0.06),
"bank_change_verification": ("APPLIED: Bank change verification protocol activated. Phone verification required per POL-009.", 0.08),
"escalate_to_security": ("APPLIED: Case escalated to Information Security team for BEC investigation.", 0.08),
}
detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not applicable.", -0.03))
return detail, reward
def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
checks_run = {c.check_name for c in ep.checks}
signals = sum([
"bank_account_verification" in checks_run,
"gst_verification" in checks_run,
"grn_match" in checks_run,
"email_domain_verification" in checks_run,
])
if decision == "reject":
return 0.10 + 0.05 * signals
elif decision == "approve":
return -0.40
elif decision == "partial_approve":
return -0.20
elif decision == "hold":
return 0.08 + 0.03 * signals
return 0.0
def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
routes = {"legal": 0.14, "security": 0.12, "finance": 0.08, "procurement": 0.06}
return routes.get(team.lower(), 0.0)
def simulate_close(self, summary: str, ep: EpisodeData) -> float:
if ep.closed is False and ep.decision == "reject":
return 0.06
return 0.0
def grade(self, ep: EpisodeData) -> Dict[str, float]:
checks_run = {c.check_name for c in ep.checks}
bank_found = "bank_account_verification" in checks_run
gst_found = "gst_verification" in checks_run
qty_found = "grn_match" in checks_run
domain_found = "email_domain_verification" in checks_run
price_found = "price_check" in checks_run
# Diagnosis
d = ((0.12 if bank_found else 0) + (0.12 if gst_found else 0)
+ (0.10 if qty_found else 0) + (0.10 if domain_found else 0)
+ (0.06 if price_found else 0))
# Investigation — reward phone, penalise email
i = 0.0
for q in ep.queries:
if q.target == "supplier" and q.channel not in ("email", "mail"):
i += 0.10
elif q.target == "supplier" and q.channel in ("email", "mail"):
i -= 0.15
if "legal" in {q.target for q in ep.queries}:
i += 0.06
if "security" in {q.target for q in ep.queries}:
i += 0.06
# Decision
signals = sum([bank_found, gst_found, qty_found, domain_found])
dec = 0.0
if ep.decision == "reject":
dec = 0.08 + 0.03 * signals
elif ep.decision == "approve":
dec = -0.35
elif ep.decision == "partial_approve":
dec = -0.15
elif ep.decision == "hold":
dec = 0.06
# Routing
routes = set(ep.routed_to)
route = ((0.10 if "legal" in routes else 0)
+ (0.06 if "security" in routes else 0)
+ (0.04 if "finance" in routes else 0))
# Closure
closure = 0.06 if (ep.closed and ep.decision == "reject") else 0.0
# Efficiency
eff = max(0.0, 0.04 - 0.002 * max(0, ep.step_count - 12))
total = d + i + dec + route + closure + eff
return {
"score": round(max(0.0, min(1.0, total)), 4),
"signals_found": sum([bank_found, gst_found, qty_found, domain_found, price_found]),
"diagnosis_score": round(d, 4),
"investigation_score": round(i, 4),
"decision_score": round(dec, 4),
"routing_score": round(route, 4),
"closure_score": round(closure, 4),
"efficiency_score": round(eff, 4),
}
# ---------------------------------------------------------------------------
# Task Registry
# ---------------------------------------------------------------------------
TASK_REGISTRY: Dict[str, type] = {
"task1_price_variance": PriceVarianceTask,
"task2_duplicate_tax": DuplicateTaxErrorTask,
"task3_compound_fraud": CompoundFraudTask,
}
ALL_TASKS = list(TASK_REGISTRY.keys())
def make_task(task_id: str) -> BaseTask:
"""Instantiate a task by its ID. Raises ValueError for unknown IDs."""
cls = TASK_REGISTRY.get(task_id)
if cls is None:
raise ValueError(f"Unknown task '{task_id}'. Available: {ALL_TASKS}")
return cls()
|