Spaces:

YUS200619
/

invoice-exception-handler

Sleeping

File size: 47,433 Bytes

562f58d

"""
Task definitions for the Invoice Exception Handler environment.

Each task defines a scenario with documents, simulator logic for every action
type, and a grader that produces sub-scores in [0.0, 1.0]. This is the biggest
file in the project — it contains all the business logic the environment needs.
"""
from __future__ import annotations

import time
from typing import Any, Dict, List, Optional, Tuple

from .models import (
    ActionType, CheckResult, ExceptionFlag, GoodsReceiptNote,
    InspectionResult, Invoice, LineItem, PurchaseOrder, QueryResult,
    SupplierMaster,
)


# ---------------------------------------------------------------------------
# EpisodeData — mutable state for one episode
# ---------------------------------------------------------------------------

class EpisodeData:
    """Tracks the full history of one episode for grading and state building."""

    def __init__(self) -> None:
        self.inspections: List[InspectionResult] = []
        self.checks: List[CheckResult] = []
        self.queries: List[QueryResult] = []
        self.rules_applied: List[str] = []
        self.decision: Optional[str] = None
        self.decision_reason: Optional[str] = None
        self.routed_to: List[str] = []
        self.closed: bool = False
        self.close_summary: Optional[str] = None
        self.step_count: int = 0
        self.cumulative_reward: float = 0.0

    def has_inspected(self, doc: str, field: str) -> bool:
        """Check if we already looked at this field in this document."""
        return any(i.document == doc and i.field == field for i in self.inspections)

    def has_checked(self, name: str) -> bool:
        """Check if this validation check has already been run."""
        return any(c.check_name == name for c in self.checks)

    def has_queried(self, target: str) -> bool:
        """Check if we already queried this person or department."""
        return any(q.target == target for q in self.queries)


# ---------------------------------------------------------------------------
# BaseTask — abstract interface
# ---------------------------------------------------------------------------

class BaseTask:
    """Abstract base that all task classes inherit from."""

    task_id: str = "base"
    max_steps: int = 20
    difficulty: str = "easy"

    def get_purchase_order(self) -> PurchaseOrder:
        raise NotImplementedError

    def get_invoice(self) -> Invoice:
        raise NotImplementedError

    def get_grn(self) -> GoodsReceiptNote:
        raise NotImplementedError

    def get_supplier_master(self) -> SupplierMaster:
        raise NotImplementedError

    def get_exception_flag(self) -> ExceptionFlag:
        raise NotImplementedError

    def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
        raise NotImplementedError

    def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
        raise NotImplementedError

    def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
        raise NotImplementedError

    def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
        raise NotImplementedError

    def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
        raise NotImplementedError

    def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
        raise NotImplementedError

    def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
        raise NotImplementedError

    def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
        raise NotImplementedError

    def simulate_close(self, summary: str, ep: EpisodeData) -> float:
        raise NotImplementedError

    def grade(self, ep: EpisodeData) -> Dict[str, float]:
        raise NotImplementedError

    @property
    def available_checks(self) -> List[str]:
        return []

    @property
    def available_rules(self) -> List[str]:
        return []

    @property
    def knowledge_base(self) -> List[str]:
        return []


# ---------------------------------------------------------------------------
# Task 1 — Price Variance Exception (Easy)
# ---------------------------------------------------------------------------

class PriceVarianceTask(BaseTask):
    """
    Office stationery invoice arrives 3.08% above the PO.
    Company tolerance is +/-2% auto-approval. Supplier had verbal approval
    from procurement for the price increase but the PO was never updated.

    Optimal path: check tolerance -> cross-check prices -> verify GRN ->
    query supplier -> query procurement -> apply exception rule -> approve ->
    route to procurement for PO amendment -> close.
    """

    task_id = "task1_price_variance"
    max_steps = 18
    difficulty = "easy"

    def get_purchase_order(self) -> PurchaseOrder:
        return PurchaseOrder(
            po_number="PO-2024-1041",
            vendor_name="OfficeNeed Supplies",
            po_date="2024-02-15",
            line_items=[
                LineItem(description="A4 Paper", quantity=100, unit_price=220.0, total=22000.0, tax_rate=18.0),
                LineItem(description="Ballpoint Pens", quantity=20, unit_price=450.0, total=9000.0, tax_rate=18.0),
                LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0),
            ],
            total_amount=50000.0,
            payment_terms="Net-30",
        )

    def get_invoice(self) -> Invoice:
        return Invoice(
            invoice_number="INV-ON-8821",
            supplier_name="OfficeNeed Supplies",
            invoice_date="2024-03-05",
            due_date="2024-04-04",
            po_reference="PO-2024-1041",
            line_items=[
                LineItem(description="A4 Paper", quantity=100, unit_price=231.0, total=23100.0, tax_rate=18.0),
                LineItem(description="Ballpoint Pens", quantity=20, unit_price=472.0, total=9440.0, tax_rate=18.0),
                LineItem(description="Staplers", quantity=10, unit_price=1900.0, total=19000.0, tax_rate=18.0),
            ],
            subtotal=51540.0,
            tax_amount=9277.20,
            tax_rate=18.0,
            total_amount=60817.20,
            bank_account="9876543210",
            bank_name="HDFC Bank",
            ifsc_code="HDFC0001234",
            supplier_gstin="29AABCO1234F1Z5",
            supplier_email="accounts@officeneed.com",
        )

    def get_grn(self) -> GoodsReceiptNote:
        return GoodsReceiptNote(
            grn_number="GRN-2024-0892",
            po_reference="PO-2024-1041",
            receipt_date="2024-03-01",
            items_received=[
                {"description": "A4 Paper", "quantity_received": 100, "quantity_pending": 0, "quantity_rejected": 0},
                {"description": "Ballpoint Pens", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0},
                {"description": "Staplers", "quantity_received": 10, "quantity_pending": 0, "quantity_rejected": 0},
            ],
            receiving_officer="Ramesh Kumar",
            notes="All items received in good condition.",
        )

    def get_supplier_master(self) -> SupplierMaster:
        return SupplierMaster(
            supplier_id="SUP-0441",
            supplier_name="OfficeNeed Supplies",
            registered_address="45 MG Road, Bengaluru 560001",
            gstin="29AABCO1234F1Z5",
            bank_account="9876543210",
            bank_name="HDFC Bank",
            ifsc_code="HDFC0001234",
            contact_email="sales@officeneed.com",
            contact_phone="+91-80-4567-8901",
            registered_domain="officeneed.com",
            pan_number="AABCO1234F",
            status="active",
        )

    def get_exception_flag(self) -> ExceptionFlag:
        return ExceptionFlag(
            flag_code="PRICE_MISMATCH",
            flag_description=(
                "Invoice total ₹51,540 exceeds PO ₹50,000 by ₹1,540 (3.08%). "
                "Above auto-approval threshold."
            ),
            auto_hold=True,
            flagged_date="2024-03-06",
            severity="medium",
        )

    @property
    def available_checks(self) -> List[str]:
        return ["tolerance_rule", "grn_match", "duplicate_detection",
                "bank_account_verification", "gst_verification", "po_match"]

    @property
    def available_rules(self) -> List[str]:
        return ["tolerance_2pct_auto_approve", "tolerance_exception_approval",
                "rejection_with_reason", "partial_approval"]

    @property
    def knowledge_base(self) -> List[str]:
        return [
            "POL-001: Price variance ≤±2% may be auto-approved. Above 2% requires exception approval.",
            "POL-002: Exception approval requires confirmation from originating department.",
            "POL-003: Any approved invoice with a price change must be followed by a PO amendment request.",
            "POL-004: Bank account on invoice must match supplier master.",
        ]

    # --- Simulators ---

    def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
        """Return meaningful values for key fields, small reward for others."""
        key_fields = {
            ("invoice", "line_items"): ("A4 Paper @₹231 (+5%), Pens @₹472 (+4.9%), Staplers @₹1900 (unchanged)", 0.10),
            ("invoice", "total_amount"): ("₹51,540 (subtotal) + ₹9,277.20 (GST 18%) = ₹60,817.20", 0.08),
            ("po", "line_items"): ("A4 Paper @₹220, Pens @₹450, Staplers @₹1900. Total: ₹50,000", 0.06),
            ("grn", "items_received"): ("All 3 items fully received. No pending, no rejected.", 0.05),
            ("invoice", "bank_account"): ("9876543210 — HDFC Bank, IFSC HDFC0001234", 0.02),
            ("invoice", "supplier_gstin"): ("29AABCO1234F1Z5", 0.02),
        }
        key = (document.lower(), field.lower())
        value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01))
        result = InspectionResult(document=document, field=field, value=value, note="")
        return result, reward

    def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
        """Cross-check a field between two documents."""
        checks = {
            ("unit_price", "invoice", "po"): (False, "MISMATCH: A4 Paper ₹231 vs ₹220 (+5.0%), Pens ₹472 vs ₹450 (+4.9%). Staplers match.", 0.12),
            ("total_amount", "invoice", "po"): (False, "Invoice subtotal ₹51,540 vs PO ₹50,000. Variance: +₹1,540 (+3.08%).", 0.10),
            ("bank_account", "invoice", "supplier_master"): (True, "Bank account 9876543210 matches supplier master.", 0.03),
            ("gstin", "invoice", "supplier_master"): (True, "GSTIN 29AABCO1234F1Z5 matches supplier master.", 0.02),
            ("quantity", "invoice", "grn"): (True, "All quantities match: 100 reams, 20 boxes, 10 units.", 0.04),
        }
        key = (field.lower(), doc_a.lower(), doc_b.lower())
        passed, detail, reward = checks.get(key, (True, f"No mismatch found for {field} between {doc_a} and {doc_b}.", 0.01))
        result = CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail)
        return result, reward

    def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
        """Run a named validation check."""
        checks = {
            "tolerance_rule": (False, "Price variance 3.08% exceeds ±2% auto-approval threshold. Manual exception approval required.", 0.14),
            "grn_match": (True, "All items fully received. GRN matches invoice quantities.", 0.06),
            "duplicate_detection": (True, "No duplicate invoice found in payment history.", 0.02),
            "bank_account_verification": (True, "Bank account matches supplier master record.", 0.02),
            "gst_verification": (True, "GSTIN matches supplier master. GST calculation correct.", 0.02),
            "po_match": (False, "PO match FAILED on unit prices: 2 of 3 line items have price variance.", 0.08),
        }
        passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed — no issues found.", 0.01))
        result = CheckResult(check_name=check_name, passed=passed, detail=detail)
        return result, reward

    def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
        """Query the supplier — returns email explaining the price increase."""
        response = (
            "Dear Sir/Madam, due to a 12% increase in raw material costs effective January 2024, "
            "we revised prices for A4 Paper and Ballpoint Pens. This was communicated to Mr. Arjun Mehta "
            "in your Procurement team via email on Feb 20, 2024. He acknowledged and verbally approved "
            "the revised pricing. We can provide the email trail if needed. — OfficeNeed Supplies"
        )
        result = QueryResult(target="supplier", question=question, response=response, channel=channel)
        return result, 0.10

    def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
        """Query an internal department."""
        if department.lower() == "procurement":
            response = (
                "Hi, this is Arjun Mehta from Procurement. Yes, I received the price revision email "
                "from OfficeNeed on Feb 20. I verbally approved it as the increase was reasonable "
                "(raw material cost pass-through). I should have raised a PO amendment but it slipped. "
                "I'll raise the amendment today. Please go ahead and approve the invoice."
            )
            return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.12
        response = f"{department.title()} department: We don't have specific information about this invoice exception."
        return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03

    def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
        """Apply a business rule."""
        rules = {
            "tolerance_2pct_auto_approve": ("BLOCKED: Cannot auto-approve. Price variance 3.08% exceeds ±2% threshold.", -0.05),
            "tolerance_exception_approval": ("APPLIED: Exception approval pathway activated. Requires department confirmation (obtained from procurement).", 0.10),
            "rejection_with_reason": ("APPLIED: Rejection rule activated. Invoice will be returned to supplier.", -0.08),
            "partial_approval": ("NOT APPLICABLE: All items received in full. Partial approval not warranted.", -0.05),
        }
        detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found in policy database.", -0.03))
        return detail, reward

    def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
        """Score the agent's decision based on evidence gathered."""
        checks_run = {c.check_name for c in ep.checks}
        queries_to = {q.target for q in ep.queries}

        if decision == "approve":
            if "tolerance_rule" in checks_run and "procurement" in queries_to:
                return 0.25
            elif "tolerance_rule" in checks_run:
                return 0.18
            else:
                return 0.05
        elif decision == "reject":
            return -0.10
        elif decision == "hold":
            return 0.08
        return 0.0

    def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
        """Score routing decisions."""
        routes = {"procurement": 0.12, "finance": 0.03, "legal": -0.05}
        return routes.get(team.lower(), 0.0)

    def simulate_close(self, summary: str, ep: EpisodeData) -> float:
        """Score case closure."""
        checks_run = {c.check_name for c in ep.checks}
        if ep.decision == "approve" and "tolerance_rule" in checks_run and "procurement" in set(ep.routed_to):
            return 0.12
        elif ep.decision is not None:
            return 0.06
        return 0.0

    def grade(self, ep: EpisodeData) -> Dict[str, float]:
        """Final grader producing sub-scores."""
        checks_run = {c.check_name for c in ep.checks}
        queries_to = {q.target for q in ep.queries}

        # Diagnosis
        d = 0.0
        if any("unit_price" in c.check_name or "total" in c.check_name for c in ep.checks):
            d += 0.12
        if "tolerance_rule" in checks_run:
            d += 0.14
        if "grn_match" in checks_run:
            d += 0.06

        # Investigation
        i = 0.0
        if "supplier" in queries_to:
            i += 0.10
        if "procurement" in queries_to:
            i += 0.12
        if "tolerance_exception_approval" in ep.rules_applied:
            i += 0.08

        # Decision
        dec = 0.0
        if ep.decision == "approve":
            dec += 0.18
        elif ep.decision == "hold":
            dec += 0.06
        elif ep.decision == "reject":
            dec -= 0.10

        # Routing
        route = 0.12 if "procurement" in ep.routed_to else 0.0

        # Closure
        closure = 0.08 if ep.closed else 0.0

        # Efficiency
        eff = max(0.0, 0.06 - 0.004 * max(0, ep.step_count - 9))

        total = d + i + dec + route + closure + eff
        return {
            "score": round(max(0.0, min(1.0, total)), 4),
            "diagnosis_score": round(d, 4),
            "investigation_score": round(i, 4),
            "decision_score": round(dec, 4),
            "routing_score": round(route, 4),
            "closure_score": round(closure, 4),
            "efficiency_score": round(eff, 4),
        }


# ---------------------------------------------------------------------------
# Task 2 — Duplicate Invoice with Hidden Tax Error (Medium)
# ---------------------------------------------------------------------------

class DuplicateTaxErrorTask(BaseTask):
    """
    Logistics supplier submits INV-2024-891 which is a duplicate of already-paid
    INV-2024-819 (digit transposition). The original invoice applied 15% GST
    (wrong), correct rate is 18%. Company overpaid ₹3,240. The new invoice has
    the correct rate. It's both a duplicate AND a legitimate correction.
    """

    task_id = "task2_duplicate_tax"
    max_steps = 20
    difficulty = "medium"

    def get_purchase_order(self) -> PurchaseOrder:
        return PurchaseOrder(
            po_number="PO-2024-0778",
            vendor_name="FastMove Logistics",
            po_date="2024-01-25",
            line_items=[
                LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0),
                LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0),
            ],
            total_amount=108000.0,
            payment_terms="Net-15",
        )

    def get_invoice(self) -> Invoice:
        return Invoice(
            invoice_number="INV-2024-891",
            supplier_name="FastMove Logistics",
            invoice_date="2024-03-12",
            due_date="2024-03-27",
            po_reference="PO-2024-0778",
            line_items=[
                LineItem(description="Mumbai-Pune Transport", quantity=20, unit_price=4500.0, total=90000.0, tax_rate=18.0),
                LineItem(description="Warehousing charges Feb 2024", quantity=1, unit_price=18000.0, total=18000.0, tax_rate=18.0),
            ],
            subtotal=108000.0,
            tax_amount=19440.0,
            tax_rate=18.0,
            total_amount=127440.0,
            bank_account="1122334455",
            bank_name="ICICI Bank",
            ifsc_code="ICIC0005678",
            supplier_gstin="27AABCF5678G1Z3",
            supplier_email="billing@fastmove.in",
        )

    def get_grn(self) -> GoodsReceiptNote:
        return GoodsReceiptNote(
            grn_number="GRN-2024-0740",
            po_reference="PO-2024-0778",
            receipt_date="2024-02-28",
            items_received=[
                {"description": "Mumbai-Pune Transport", "quantity_received": 20, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True},
                {"description": "Warehousing charges Feb 2024", "quantity_received": 1, "quantity_pending": 0, "quantity_rejected": 0, "service_confirmed": True},
            ],
            receiving_officer="Priya Sharma",
            notes="All transport trips completed. Warehousing service confirmed for February.",
        )

    def get_supplier_master(self) -> SupplierMaster:
        return SupplierMaster(
            supplier_id="SUP-0229",
            supplier_name="FastMove Logistics",
            registered_address="12 Logistics Park, Navi Mumbai 400710",
            gstin="27AABCF5678G1Z3",
            bank_account="1122334455",
            bank_name="ICICI Bank",
            ifsc_code="ICIC0005678",
            contact_email="accounts@fastmove.in",
            contact_phone="+91-22-3456-7890",
            registered_domain="fastmove.in",
            pan_number="AABCF5678G",
            status="active",
        )

    def get_exception_flag(self) -> ExceptionFlag:
        return ExceptionFlag(
            flag_code="POSSIBLE_DUPLICATE",
            flag_description="Invoice INV-2024-891 closely matches previously processed invoice INV-2024-819. Possible duplicate submission.",
            auto_hold=True,
            flagged_date="2024-03-13",
            severity="high",
        )

    @property
    def available_checks(self) -> List[str]:
        return ["duplicate_detection", "tax_calculation_verify", "grn_match",
                "bank_account_verification", "gst_verification", "po_match"]

    @property
    def available_rules(self) -> List[str]:
        return ["partial_approval", "credit_note_request", "full_rejection",
                "duplicate_block", "tax_correction"]

    @property
    def knowledge_base(self) -> List[str]:
        return [
            "POL-005: Duplicate invoices must be rejected unless they represent a legitimate correction.",
            "POL-006: Tax calculation errors on paid invoices require a credit note and correction entry.",
            "POL-007: Partial approval may be used when only a portion of the invoice amount is valid.",
            "POL-008: Any tax correction must be documented with the original invoice reference.",
        ]

    def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
        key_fields = {
            ("invoice", "invoice_number"): ("INV-2024-891 — note digit transposition vs INV-2024-819 (891 vs 819)", 0.10),
            ("invoice", "tax_amount"): ("₹19,440 (18% GST on ₹1,08,000) — this is the CORRECT rate", 0.08),
            ("invoice", "total_amount"): ("₹1,27,440 (subtotal ₹1,08,000 + 18% GST ₹19,440)", 0.05),
            ("invoice", "line_items"): ("Transport 20×₹4,500 = ₹90,000 + Warehousing ₹18,000 = ₹1,08,000", 0.04),
        }
        key = (document.lower(), field.lower())
        value, reward = key_fields.get(key, (f"{document}.{field} — no anomaly detected", 0.01))
        return InspectionResult(document=document, field=field, value=value, note=""), reward

    def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
        checks = {
            ("invoice_number", "invoice", "payment_history"): (False, "MATCH FOUND: INV-2024-819 paid 12 days ago for ₹1,24,200. Digit transposition: 891 vs 819.", 0.15),
            ("tax_amount", "invoice", "payment_history"): (False, "TAX DISCREPANCY: Original INV-2024-819 had 15% GST (₹16,200). Current INV-2024-891 has 18% GST (₹19,440). Delta: ₹3,240.", 0.14),
            ("total_amount", "invoice", "po"): (True, "Invoice subtotal ₹1,08,000 matches PO total ₹1,08,000.", 0.03),
            ("bank_account", "invoice", "supplier_master"): (True, "Bank account matches supplier master.", 0.02),
        }
        key = (field.lower(), doc_a.lower(), doc_b.lower())
        passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01))
        return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward

    def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
        checks = {
            "duplicate_detection": (False, "DUPLICATE FOUND: INV-2024-891 matches INV-2024-819 (paid 12 days ago, ₹1,24,200). Invoice numbers differ by digit transposition (891 vs 819).", 0.18),
            "tax_calculation_verify": (False, "TAX ERROR on ORIGINAL: INV-2024-819 applied 15% GST (₹16,200) instead of correct 18% (₹19,440). Company overpaid ₹3,240 in tax on already-paid invoice.", 0.16),
            "grn_match": (True, "Services fully confirmed. GRN matches invoice.", 0.04),
            "bank_account_verification": (True, "Bank account matches supplier master.", 0.02),
            "gst_verification": (True, "GSTIN matches supplier master.", 0.02),
            "po_match": (True, "PO amounts and line items match current invoice.", 0.03),
        }
        passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01))
        return CheckResult(check_name=check_name, passed=passed, detail=detail), reward

    def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
        response = (
            "We are aware that INV-2024-819 was submitted with incorrect 15% GST. The correct rate "
            "is 18%. INV-2024-891 is a corrected resubmission. We request partial approval for the "
            "₹3,240 tax differential only, not the full invoice amount. We will issue a credit note "
            "for the remaining amount."
        )
        return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.10

    def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
        if department.lower() == "finance":
            response = (
                "Confirmed: INV-2024-819 was paid on March 1 for ₹1,24,200 (₹1,08,000 + 15% GST of "
                "₹16,200). The correct GST rate for logistics services is 18%. We overpaid — the "
                "correct total should have been ₹1,27,440. The tax differential is ₹3,240. This "
                "can be corrected via partial approval of the new invoice for ₹3,240 only."
            )
            return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.12
        response = f"{department.title()}: No specific information available."
        return QueryResult(target=department.lower(), question=question, response=response, channel="internal"), 0.03

    def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
        rules = {
            "partial_approval": ("APPLIED: Partial approval for ₹3,240 (tax correction delta). Main invoice amount blocked as duplicate.", 0.12),
            "credit_note_request": ("APPLIED: Credit note requested from supplier for balance amount. Reference: INV-2024-819 tax correction.", 0.10),
            "full_rejection": ("APPLIED: Full rejection. Invoice returned to supplier.", -0.05),
            "duplicate_block": ("APPLIED: Duplicate block activated. Full payment prevented.", 0.04),
            "tax_correction": ("APPLIED: Tax correction entry created referencing original INV-2024-819.", 0.08),
        }
        detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not found.", -0.03))
        return detail, reward

    def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
        checks_run = {c.check_name for c in ep.checks}
        dup_found = "duplicate_detection" in checks_run
        tax_found = "tax_calculation_verify" in checks_run

        if decision == "partial_approve":
            if dup_found and tax_found:
                return 0.28
            elif dup_found:
                return 0.14
            return 0.06
        elif decision == "reject":
            if dup_found:
                return 0.08
            return 0.02
        elif decision == "approve":
            return -0.15
        elif decision == "hold":
            return 0.06
        return 0.0

    def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
        routes = {"finance": 0.08, "procurement": 0.03, "legal": 0.02}
        return routes.get(team.lower(), 0.0)

    def simulate_close(self, summary: str, ep: EpisodeData) -> float:
        if ep.decision == "partial_approve" and ep.closed is False:
            return 0.06
        elif ep.decision is not None:
            return 0.03
        return 0.0

    def grade(self, ep: EpisodeData) -> Dict[str, float]:
        checks_run = {c.check_name for c in ep.checks}
        queries_to = {q.target for q in ep.queries}

        # Diagnosis (max 0.30)
        d = 0.0
        if "duplicate_detection" in checks_run:
            d += 0.16
        if "tax_calculation_verify" in checks_run:
            d += 0.14

        # Investigation (max 0.32)
        i = 0.0
        if "finance" in queries_to:
            i += 0.12
        if "supplier" in queries_to:
            i += 0.10
        if "partial_approval" in ep.rules_applied:
            i += 0.06
        if "credit_note_request" in ep.rules_applied:
            i += 0.04

        # Decision (max 0.20)
        dec = 0.0
        if ep.decision == "partial_approve":
            dec = 0.20
        elif ep.decision == "reject":
            dec = 0.05
        elif ep.decision == "approve":
            dec = -0.15
        elif ep.decision == "hold":
            dec = 0.04

        # Routing (max 0.08)
        route = 0.08 if "finance" in ep.routed_to else 0.0

        # Closure (max 0.06)
        closure = 0.06 if ep.closed else 0.0

        # Efficiency
        eff = max(0.0, 0.04 - 0.003 * max(0, ep.step_count - 10))

        total = d + i + dec + route + closure + eff
        return {
            "score": round(max(0.0, min(1.0, total)), 4),
            "diagnosis_score": round(d, 4),
            "investigation_score": round(i, 4),
            "decision_score": round(dec, 4),
            "routing_score": round(route, 4),
            "closure_score": round(closure, 4),
            "efficiency_score": round(eff, 4),
        }


# ---------------------------------------------------------------------------
# Task 3 — Compound Fraud Signals (Hard)
# ---------------------------------------------------------------------------

class CompoundFraudTask(BaseTask):
    """
    IT supplier submits ₹8,47,500 invoice for 15 laptops. System flags a bank
    account change. But there are FOUR simultaneous fraud signals: bank BEC,
    GSTIN mismatch, quantity mismatch (13 vs 15), and price inflation (8.65%).
    
    Critical trap: querying supplier via email contacts the fraudster (-0.15).
    Must use phone to reach real supplier (+0.15).
    """

    task_id = "task3_compound_fraud"
    max_steps = 25
    difficulty = "hard"

    def get_purchase_order(self) -> PurchaseOrder:
        return PurchaseOrder(
            po_number="PO-2024-0955",
            vendor_name="TechCore Solutions",
            po_date="2024-03-08",
            line_items=[
                LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=52000.0, total=780000.0, tax_rate=18.0),
            ],
            total_amount=780000.0,
            payment_terms="Net-30",
        )

    def get_invoice(self) -> Invoice:
        return Invoice(
            invoice_number="INV-TC-2024-0312",
            supplier_name="TechCore Solutions",
            invoice_date="2024-03-10",
            due_date="2024-04-09",
            po_reference="PO-2024-0955",
            line_items=[
                LineItem(description="Business Laptop (14-inch, i7, 16GB)", quantity=15, unit_price=56500.0, total=847500.0, tax_rate=18.0),
            ],
            subtotal=847500.0,
            tax_amount=152550.0,
            tax_rate=18.0,
            total_amount=1000050.0,
            bank_account="5566778899",
            bank_name="Yes Bank",
            ifsc_code="YESB0000999",
            supplier_gstin="07AABCT9999X1Z8",
            supplier_email="accounts@techcore-solutions.com",
        )

    def get_grn(self) -> GoodsReceiptNote:
        return GoodsReceiptNote(
            grn_number="GRN-2024-0901",
            po_reference="PO-2024-0955",
            receipt_date="2024-03-15",
            items_received=[
                {"description": "Business Laptop (14-inch, i7, 16GB)", "quantity_received": 13, "quantity_pending": 2, "quantity_rejected": 0},
            ],
            receiving_officer="Vikram Singh",
            notes="13 of 15 laptops received. 2 units still in transit.",
        )

    def get_supplier_master(self) -> SupplierMaster:
        return SupplierMaster(
            supplier_id="SUP-0187",
            supplier_name="TechCore Solutions",
            registered_address="88 Tech Park, Sector 62, Noida 201301",
            gstin="07AABCT1234Y1Z5",
            bank_account="1234567890",
            bank_name="State Bank of India",
            ifsc_code="SBIN0001234",
            contact_email="sales@techcore-solutions.in",
            contact_phone="+91-120-456-7890",
            registered_domain="techcore-solutions.in",
            pan_number="AABCT1234Y",
            status="active",
        )

    def get_exception_flag(self) -> ExceptionFlag:
        return ExceptionFlag(
            flag_code="BANK_ACCOUNT_CHANGE",
            flag_description=(
                "Invoice bank account (5566778899, Yes Bank) does not match supplier master "
                "(1234567890, SBI). Bank account change request received from "
                "accounts@techcore-solutions.com."
            ),
            auto_hold=True,
            flagged_date="2024-03-16",
            severity="critical",
        )

    @property
    def available_checks(self) -> List[str]:
        return ["bank_account_verification", "gst_verification", "grn_match",
                "email_domain_verification", "invoice_date_validation",
                "quantity_check", "price_check", "duplicate_detection", "po_match"]

    @property
    def available_rules(self) -> List[str]:
        return ["fraud_hold", "rejection_with_reason", "bank_change_verification",
                "escalate_to_security"]

    @property
    def knowledge_base(self) -> List[str]:
        return [
            "POL-004: Bank account on invoice must match supplier master.",
            "POL-009: Bank account change must be verified via registered phone number — NEVER via email.",
            "POL-010: GSTIN on invoice must match supplier master. Mismatch is a fraud indicator.",
            "POL-011: Invoice quantities must not exceed GRN quantities.",
            "POL-012: Any suspected fraud must be escalated to Legal and Security teams.",
            "POL-013: Do not process payment while fraud investigation is pending.",
        ]

    def simulate_inspect(self, document: str, field: str) -> Tuple[InspectionResult, float]:
        key_fields = {
            ("invoice", "bank_account"): ("5566778899 (Yes Bank) — DOES NOT MATCH supplier master (1234567890, SBI)", 0.12),
            ("invoice", "supplier_gstin"): ("07AABCT9999X1Z8 — DOES NOT MATCH supplier master (07AABCT1234Y1Z5)", 0.10),
            ("invoice", "supplier_email"): ("accounts@techcore-solutions.com — domain is .com, registered domain is .in", 0.08),
            ("grn", "items_received"): ("13 of 15 laptops received. 2 pending delivery.", 0.08),
            ("invoice", "line_items"): ("15 laptops @ ₹56,500 = ₹8,47,500. PO price was ₹52,000/unit.", 0.06),
            ("invoice", "invoice_date"): ("2024-03-10 (Sunday) — unusual for B2B invoicing", 0.04),
            ("invoice", "total_amount"): ("₹10,00,050 (₹8,47,500 + 18% GST ₹1,52,550)", 0.03),
        }
        key = (document.lower(), field.lower())
        value, reward = key_fields.get(key, (f"{document}.{field} — value noted", 0.01))
        return InspectionResult(document=document, field=field, value=value, note=""), reward

    def simulate_cross_check(self, field: str, doc_a: str, doc_b: str) -> Tuple[CheckResult, float]:
        checks = {
            ("bank_account", "invoice", "supplier_master"): (False, "MISMATCH: Invoice has 5566778899 (Yes Bank). Supplier master has 1234567890 (SBI). Change request from lookalike domain.", 0.14),
            ("gstin", "invoice", "supplier_master"): (False, "MISMATCH: Invoice GSTIN 07AABCT9999X1Z8 belongs to 'TechCore Trading Pvt Ltd' (different entity). Supplier master: 07AABCT1234Y1Z5.", 0.14),
            ("quantity", "invoice", "grn"): (False, "MISMATCH: Invoice claims 15 units. GRN shows only 13 received, 2 pending.", 0.10),
            ("unit_price", "invoice", "po"): (False, "MISMATCH: Invoice ₹56,500/unit vs PO ₹52,000/unit. Variance: +8.65%. No approved revision.", 0.08),
        }
        key = (field.lower(), doc_a.lower(), doc_b.lower())
        passed, detail, reward = checks.get(key, (True, f"No mismatch for {field}.", 0.01))
        return CheckResult(check_name=f"cross_{field}_{doc_a}_{doc_b}", passed=passed, detail=detail), reward

    def simulate_run_check(self, check_name: str) -> Tuple[CheckResult, float]:
        checks = {
            "bank_account_verification": (False, "FAILED: Bank account mismatch. Change request from techcore-solutions.com (lookalike of registered .in domain). Suspected BEC attack.", 0.18),
            "gst_verification": (False, "FAILED: GSTIN 07AABCT9999X1Z8 registered to 'TechCore Trading Pvt Ltd' in Delhi — a DIFFERENT entity. Supplier master shows 07AABCT1234Y1Z5 for 'TechCore Solutions'.", 0.18),
            "grn_match": (False, "FAILED: Invoice claims 15 laptops. GRN confirms only 13 received. 2 units still in transit.", 0.14),
            "email_domain_verification": (False, "FAILED: Invoice email domain techcore-solutions.com does not match registered domain techcore-solutions.in. Lookalike domain — possible BEC.", 0.16),
            "invoice_date_validation": (False, "WARNING: Invoice dated 2024-03-10 is a Sunday. Unusual for B2B invoicing.", 0.08),
            "quantity_check": (False, "FAILED: Invoiced quantity (15) exceeds received quantity (13). 2 units undelivered.", 0.12),
            "price_check": (False, "FAILED: Unit price ₹56,500 is 8.65% above PO price ₹52,000. No price revision approved.", 0.10),
            "duplicate_detection": (True, "No duplicate invoice found.", 0.02),
            "po_match": (False, "FAILED: Multiple mismatches — GSTIN, quantity, and unit price all differ from PO/supplier master.", 0.08),
        }
        passed, detail, reward = checks.get(check_name, (True, f"Check '{check_name}' passed.", 0.01))
        return CheckResult(check_name=check_name, passed=passed, detail=detail), reward

    def simulate_query_supplier(self, question: str, channel: str) -> Tuple[QueryResult, float]:
        """Critical trap: email contacts the fraudster, phone reaches real supplier."""
        if channel.lower() in ("email", "mail"):
            response = (
                "Dear Customer, thank you for reaching out. We recently changed our banking details "
                "due to a corporate restructuring. The new account (5566778899, Yes Bank) is correct. "
                "Please process the payment at your earliest convenience. We can provide updated "
                "KYC documents if needed. — TechCore Solutions Finance Team"
            )
            return QueryResult(target="supplier", question=question, response=response, channel=channel), -0.15
        else:
            response = (
                "This is Rajesh from TechCore Solutions. We have NOT sent any bank account change "
                "request. Our bank account is still 1234567890 with SBI. We also have not sent any "
                "invoice from techcore-solutions.com — our domain is techcore-solutions.in. This "
                "looks like a fraud attempt. Please do NOT process payment to the new account. "
                "We will file a complaint with our legal team."
            )
            return QueryResult(target="supplier", question=question, response=response, channel=channel), 0.15

    def simulate_query_internal(self, department: str, question: str) -> Tuple[QueryResult, float]:
        dept = department.lower()
        if dept == "security":
            response = (
                "Security Team: We will initiate a BEC investigation. The lookalike domain "
                "techcore-solutions.com was registered 3 days before the invoice date. This is "
                "a classic Business Email Compromise pattern. Do NOT process any payment. "
                "We are preserving email headers for forensic analysis."
            )
            return QueryResult(target="security", question=question, response=response, channel="internal"), 0.10
        elif dept == "legal":
            response = (
                "Legal Team: Based on the fraud indicators you've documented, we recommend: "
                "1) Immediate payment block, 2) Formal complaint to cybercrime authorities, "
                "3) Supplier audit of TechCore Solutions, 4) Review of all recent invoices "
                "from this supplier."
            )
            return QueryResult(target="legal", question=question, response=response, channel="internal"), 0.08
        elif dept == "finance":
            response = "Finance: Payment has been blocked pending investigation. No funds released."
            return QueryResult(target="finance", question=question, response=response, channel="internal"), 0.04
        elif dept == "procurement":
            response = "Procurement: PO-2024-0955 was raised on March 8. Standard 2-day processing for IT equipment."
            return QueryResult(target="procurement", question=question, response=response, channel="internal"), 0.03
        response = f"{department.title()}: No specific information available."
        return QueryResult(target=dept, question=question, response=response, channel="internal"), 0.02

    def simulate_apply_rule(self, rule_id: str) -> Tuple[str, float]:
        rules = {
            "fraud_hold": ("APPLIED: Fraud hold activated. All payments to this supplier frozen pending investigation.", 0.10),
            "rejection_with_reason": ("APPLIED: Invoice rejected with documented fraud signals.", 0.06),
            "bank_change_verification": ("APPLIED: Bank change verification protocol activated. Phone verification required per POL-009.", 0.08),
            "escalate_to_security": ("APPLIED: Case escalated to Information Security team for BEC investigation.", 0.08),
        }
        detail, reward = rules.get(rule_id, (f"Rule '{rule_id}' not applicable.", -0.03))
        return detail, reward

    def simulate_make_decision(self, decision: str, reason: str, ep: EpisodeData) -> float:
        checks_run = {c.check_name for c in ep.checks}
        signals = sum([
            "bank_account_verification" in checks_run,
            "gst_verification" in checks_run,
            "grn_match" in checks_run,
            "email_domain_verification" in checks_run,
        ])

        if decision == "reject":
            return 0.10 + 0.05 * signals
        elif decision == "approve":
            return -0.40
        elif decision == "partial_approve":
            return -0.20
        elif decision == "hold":
            return 0.08 + 0.03 * signals
        return 0.0

    def simulate_route_to(self, team: str, notes: str, ep: EpisodeData) -> float:
        routes = {"legal": 0.14, "security": 0.12, "finance": 0.08, "procurement": 0.06}
        return routes.get(team.lower(), 0.0)

    def simulate_close(self, summary: str, ep: EpisodeData) -> float:
        if ep.closed is False and ep.decision == "reject":
            return 0.06
        return 0.0

    def grade(self, ep: EpisodeData) -> Dict[str, float]:
        checks_run = {c.check_name for c in ep.checks}

        bank_found = "bank_account_verification" in checks_run
        gst_found = "gst_verification" in checks_run
        qty_found = "grn_match" in checks_run
        domain_found = "email_domain_verification" in checks_run
        price_found = "price_check" in checks_run

        # Diagnosis
        d = ((0.12 if bank_found else 0) + (0.12 if gst_found else 0)
             + (0.10 if qty_found else 0) + (0.10 if domain_found else 0)
             + (0.06 if price_found else 0))

        # Investigation — reward phone, penalise email
        i = 0.0
        for q in ep.queries:
            if q.target == "supplier" and q.channel not in ("email", "mail"):
                i += 0.10
            elif q.target == "supplier" and q.channel in ("email", "mail"):
                i -= 0.15
        if "legal" in {q.target for q in ep.queries}:
            i += 0.06
        if "security" in {q.target for q in ep.queries}:
            i += 0.06

        # Decision
        signals = sum([bank_found, gst_found, qty_found, domain_found])
        dec = 0.0
        if ep.decision == "reject":
            dec = 0.08 + 0.03 * signals
        elif ep.decision == "approve":
            dec = -0.35
        elif ep.decision == "partial_approve":
            dec = -0.15
        elif ep.decision == "hold":
            dec = 0.06

        # Routing
        routes = set(ep.routed_to)
        route = ((0.10 if "legal" in routes else 0)
                 + (0.06 if "security" in routes else 0)
                 + (0.04 if "finance" in routes else 0))

        # Closure
        closure = 0.06 if (ep.closed and ep.decision == "reject") else 0.0

        # Efficiency
        eff = max(0.0, 0.04 - 0.002 * max(0, ep.step_count - 12))

        total = d + i + dec + route + closure + eff
        return {
            "score": round(max(0.0, min(1.0, total)), 4),
            "signals_found": sum([bank_found, gst_found, qty_found, domain_found, price_found]),
            "diagnosis_score": round(d, 4),
            "investigation_score": round(i, 4),
            "decision_score": round(dec, 4),
            "routing_score": round(route, 4),
            "closure_score": round(closure, 4),
            "efficiency_score": round(eff, 4),
        }


# ---------------------------------------------------------------------------
# Task Registry
# ---------------------------------------------------------------------------

TASK_REGISTRY: Dict[str, type] = {
    "task1_price_variance": PriceVarianceTask,
    "task2_duplicate_tax": DuplicateTaxErrorTask,
    "task3_compound_fraud": CompoundFraudTask,
}

ALL_TASKS = list(TASK_REGISTRY.keys())


def make_task(task_id: str) -> BaseTask:
    """Instantiate a task by its ID. Raises ValueError for unknown IDs."""
    cls = TASK_REGISTRY.get(task_id)
    if cls is None:
        raise ValueError(f"Unknown task '{task_id}'. Available: {ALL_TASKS}")
    return cls()