Spaces:

YUS200619
/

invoice-exception-handler

Sleeping

App Files Files Community

YUS200619 commited on 30 days ago

Commit

6ed2433

1 Parent(s): d8c9b01

fix: correct field names in build_prompt to match Pydantic models

Browse files

Files changed (1) hide show

inference.py +95 -137

inference.py CHANGED Viewed

@@ -11,6 +11,7 @@ Usage:
     export HF_TOKEN="your-token"
     python inference.py
 """
 from __future__ import annotations
 import json
@@ -20,152 +21,115 @@ import sys
 from openai import OpenAI
-from env import InvoiceExceptionEnv, Action, ALL_TASKS
 # ---------------------------------------------------------------------------
-# Configuration from environment variables
 # ---------------------------------------------------------------------------
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
-HF_TOKEN = os.getenv("HF_TOKEN")
 # ---------------------------------------------------------------------------
-# System prompt — tells the LLM how to act
 # ---------------------------------------------------------------------------
 SYSTEM_PROMPT = """You are an expert Accounts Payable (AP) analyst handling flagged invoice exceptions.
-⚠️ CRITICAL RULE: If there is ANY suspicion of bank account fraud, BEC attack, or
-supplier impersonation, you MUST contact the supplier via PHONE (channel="phone"),
-NEVER via email. Emailing a potentially compromised account will contact the fraudster
-and incur a severe penalty.
-You have access to a document packet: Purchase Order (PO), Invoice, Goods Receipt Note
-(GRN), Supplier Master, and an Exception Flag explaining why this invoice was flagged.
-The actual document values are provided in each prompt — use them to reason.
-You must investigate the root cause, apply business rules, make a decision, and close the case.
-**Your action space** (respond with exactly ONE JSON action per turn):
-1. inspect_field: {"type": "inspect_field", "params": {"document": "invoice|po|grn|supplier_master", "field": "field_name"}}
-2. cross_check: {"type": "cross_check", "params": {"field": "field_name", "doc_a": "doc1", "doc_b": "doc2"}}
-3. run_check: {"type": "run_check", "params": {"check_name": "check_name"}}
-4. query_supplier: {"type": "query_supplier", "params": {"question": "your question", "channel": "phone|email"}}
-5. query_internal: {"type": "query_internal", "params": {"department": "dept_name", "question": "your question"}}
-6. apply_rule: {"type": "apply_rule", "params": {"rule_id": "rule_id"}}
-7. make_decision: {"type": "make_decision", "params": {"decision": "approve|reject|hold|partial_approve", "reason": "explanation"}}
-8. route_to: {"type": "route_to", "params": {"team": "team_name", "notes": "routing notes"}}
-9. close_case: {"type": "close_case", "params": {"summary": "audit trail summary"}}
-**Rules:**
-- Always investigate before making a decision
-- Never approve without running checks first
-- Compare document values carefully — look for mismatches between PO, Invoice, GRN, and Supplier Master
-- If bank account or email domain looks suspicious, use phone channel for supplier queries
-- Respond with ONLY a JSON object, no extra text
-"""
 # ---------------------------------------------------------------------------
-# Prompt builder
 # ---------------------------------------------------------------------------
 def build_prompt(obs, step: int, max_steps: int, history: list) -> str:
-    """Build the user prompt from the current observation state, including document data."""
-    # Build GRN summary safely from the dict-based items_received
-    grn_items = obs.grn.items_received
-    grn_received = sum(item.get("quantity_received", 0) for item in grn_items)
-    grn_pending = sum(item.get("quantity_pending", 0) for item in grn_items)
-    grn_details = "; ".join(
-        f"{item.get('description', 'item')}: {item.get('quantity_received', '?')} received, {item.get('quantity_pending', 0)} pending"
-        for item in grn_items
-    )
     lines = [
         f"Step {step} of {max_steps}.",
-        f"",
         f"EXCEPTION FLAG: {obs.exception_flag.flag_code}",
         f"{obs.exception_flag.flag_description}",
-        f"",
-        f"=== DOCUMENT SUMMARY ===",
-        f"PO #{obs.purchase_order.po_number} | Total: INR {obs.purchase_order.total_amount:,.2f} | Terms: {obs.purchase_order.payment_terms}",
-        f"PO Line Items:",
     ]
-    for item in obs.purchase_order.line_items:
-        lines.append(f"  - {item.description}: qty={item.quantity}, unit_price=INR {item.unit_price:,.2f}, total=INR {item.total:,.2f}")
-    lines.extend([
-        f"",
-        f"Invoice #{obs.invoice.invoice_number} | Date: {obs.invoice.invoice_date} | Total: INR {obs.invoice.total_amount:,.2f}",
-        f"Invoice Subtotal: INR {obs.invoice.subtotal:,.2f} | Tax ({obs.invoice.tax_rate}%): INR {obs.invoice.tax_amount:,.2f}",
-        f"Invoice Bank Account: {obs.invoice.bank_account} ({obs.invoice.bank_name})",
-        f"Invoice GSTIN: {obs.invoice.supplier_gstin}",
-        f"Invoice Email: {obs.invoice.supplier_email}",
-        f"Invoice Line Items:",
-    ])
-    for item in obs.invoice.line_items:
-        lines.append(f"  - {item.description}: qty={item.quantity}, unit_price=INR {item.unit_price:,.2f}, total=INR {item.total:,.2f}")
-    lines.extend([
-        f"",
-        f"GRN #{obs.grn.grn_number} | Date: {obs.grn.receipt_date}",
-        f"GRN Items: {grn_details}",
-        f"GRN Total received: {grn_received}, pending: {grn_pending}",
-        f"",
-        f"Supplier Master: {obs.supplier_master.supplier_name} ({obs.supplier_master.supplier_id})",
-        f"Supplier Bank Account: {obs.supplier_master.bank_account} ({obs.supplier_master.bank_name})",
-        f"Supplier GSTIN: {obs.supplier_master.gstin}",
-        f"Supplier Email Domain: {obs.supplier_master.registered_domain}",
-        f"Supplier Phone: {obs.supplier_master.contact_phone}",
-        f"",
-        f"=== AVAILABLE ACTIONS ===",
-        f"Available checks: {', '.join(obs.available_checks)}",
-        f"Available rules: {', '.join(obs.available_rules)}",
-        f"",
-        f"Knowledge base:",
-    ])
     for entry in obs.knowledge_base:
         lines.append(f"  - {entry}")
     lines.append("")
-    lines.append(f"Cumulative reward so far: {obs.cumulative_reward:.2f}")
-    lines.append(f"Case status: {obs.case_status}")
     if obs.checks_run:
-        lines.append(f"Checks already run:")
-        for c in obs.checks_run:
-            lines.append(f"  - {c.check_name}: {'PASSED' if c.passed else 'FAILED'} — {c.detail[:100]}")
     if obs.queries:
-        lines.append(f"Queries made:")
-        for q in obs.queries:
-            lines.append(f"  - {q.target} (via {q.channel}): {q.response[:100]}...")
     if obs.inspections:
-        lines.append(f"Fields inspected:")
-        for i in obs.inspections:
-            lines.append(f"  - {i.document}.{i.field}: {str(i.value)[:100]}")
     if obs.rules_applied:
-        lines.append(f"Rules applied: {', '.join(obs.rules_applied)}")
     if obs.decision:
-        lines.append(f"Decision made: {obs.decision}")
     if obs.routed_to:
-        lines.append(f"Routed to: {', '.join(obs.routed_to)}")
     if history:
         lines.append("")
-        lines.append("Recent history:")
         for h in history[-5:]:
             lines.append(f"  {h}")
     lines.append("")
-    lines.append("What is your next action? Respond with a single JSON object.")
     return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # LLM caller
 # ---------------------------------------------------------------------------
@@ -177,7 +141,7 @@ def call_llm(client: OpenAI, user_prompt: str) -> str:
             model=MODEL_NAME,
             messages=[
                 {"role": "system", "content": SYSTEM_PROMPT},
-                {"role": "user", "content": user_prompt},
             ],
             temperature=0.1,
             max_tokens=256,
@@ -187,30 +151,28 @@ def call_llm(client: OpenAI, user_prompt: str) -> str:
         print(f"LLM call failed: {e}", file=sys.stderr)
         return '{"type": "run_check", "params": {"check_name": "po_match"}}'
 # ---------------------------------------------------------------------------
 # Action parser
 # ---------------------------------------------------------------------------
 def parse_action(raw_text: str) -> dict:
     """
-    Parse the model's response into an action dict.
-    Handles markdown code fences, extra whitespace, and minor formatting errors.
-    Falls back to run_check(po_match) if parsing fails.
     """
     text = raw_text.strip()
-    # Remove ```json or ``` fences if present
     if text.startswith("```"):
-        lines = text.split("\n")
-        text = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
     try:
         return json.loads(text.strip())
     except json.JSONDecodeError:
         pass
-    # Try to find JSON within the text
     match = re.search(r'\{.*\}', text, re.DOTALL)
     if match:
         try:
@@ -218,42 +180,38 @@ def parse_action(raw_text: str) -> dict:
         except json.JSONDecodeError:
             pass
-    # Safe fallback
     return {"type": "run_check", "params": {"check_name": "po_match"}}
 # ---------------------------------------------------------------------------
-# Task runner
 # ---------------------------------------------------------------------------
 def run_task(client: OpenAI, env: InvoiceExceptionEnv, task_id: str) -> tuple:
-    """Run one task episode and return (steps_taken, score, rewards)."""
-    rewards = []
     print(f"[START] task={task_id} env=invoice-exception-handler model={MODEL_NAME}", flush=True)
     obs = env.reset(task_id)
-    max_steps = env._task.max_steps  # read from the task itself
-    history = []
     for step in range(1, max_steps + 1):
-        # Build prompt from observation
         user_prompt = build_prompt(obs, step, max_steps, history)
-        # Call LLM
-        raw = call_llm(client, user_prompt)
         action_dict = parse_action(raw)
-        # Execute
         try:
             result = env.step(action_dict)
             reward = result.reward
-            done = result.done
-            error = None
-        except Exception as e:
             reward = 0.0
-            done = False
-            error = str(e)
             result = None
         rewards.append(reward)
@@ -268,14 +226,14 @@ def run_task(client: OpenAI, env: InvoiceExceptionEnv, task_id: str) -> tuple:
         history.append(f"Step {step}: {action_str} -> reward {reward:+.2f}")
-        if result:
             obs = result.observation
         if done:
             break
-    score = env.grade()["score"]
-    success = score >= 0.5
     steps_taken = min(step, max_steps)
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)
@@ -287,24 +245,24 @@ def run_task(client: OpenAI, env: InvoiceExceptionEnv, task_id: str) -> tuple:
     return steps_taken, score, rewards
 # ---------------------------------------------------------------------------
-# Main
 # ---------------------------------------------------------------------------
 def main() -> None:
-    """Run inference on all tasks."""
     client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
-    env = InvoiceExceptionEnv(seed=42)
-    all_scores = []
     for task_id in ALL_TASKS:
         _, score, _ = run_task(client, env, task_id)
         all_scores.append(score)
     avg = sum(all_scores) / len(all_scores) if all_scores else 0.0
-    print(f"\nAverage score: {avg:.3f}", flush=True)
 if __name__ == "__main__":
-    main()

     export HF_TOKEN="your-token"
     python inference.py
 """
 from __future__ import annotations
 import json
 from openai import OpenAI
+from env import InvoiceExceptionEnv, ALL_TASKS
 # ---------------------------------------------------------------------------
+# Configuration — read from environment variables exactly as the spec requires
 # ---------------------------------------------------------------------------
 API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+MODEL_NAME   = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
+HF_TOKEN     = os.getenv("HF_TOKEN")   # no default — spec requirement
 # ---------------------------------------------------------------------------
+# System prompt
 # ---------------------------------------------------------------------------
 SYSTEM_PROMPT = """You are an expert Accounts Payable (AP) analyst handling flagged invoice exceptions.
+You receive a full document packet: Purchase Order (PO), Invoice, Goods Receipt Note (GRN),
+Supplier Master record, and an Exception Flag explaining why the invoice was flagged.
+Your job: investigate the root cause, apply business rules, make a decision, and close the case.
+CRITICAL RULE: If there is ANY suspicion of bank account fraud or BEC attack, contact the
+supplier via PHONE only — never via email. Emailing may reach the fraudster.
+Your action space — respond with exactly ONE JSON object per turn:
+1. {"type": "inspect_field",  "params": {"document": "invoice|po|grn|supplier_master", "field": "field_name"}}
+2. {"type": "cross_check",    "params": {"field": "field_name", "doc_a": "doc1", "doc_b": "doc2"}}
+3. {"type": "run_check",      "params": {"check_name": "check_name"}}
+4. {"type": "query_supplier", "params": {"question": "your question", "channel": "phone|email"}}
+5. {"type": "query_internal", "params": {"department": "dept_name", "question": "your question"}}
+6. {"type": "apply_rule",     "params": {"rule_id": "rule_id"}}
+7. {"type": "make_decision",  "params": {"decision": "approve|reject|hold|partial_approve", "reason": "explanation"}}
+8. {"type": "route_to",       "params": {"team": "team_name", "notes": "routing notes"}}
+9. {"type": "close_case",     "params": {"summary": "audit trail summary"}}
+Rules:
+- Always run checks BEFORE making a decision
+- Never approve without verifying the root cause
+- Use phone (not email) if fraud is suspected
+- Respond with ONLY a JSON object, no explanation, no markdown fences
+"""
 # ---------------------------------------------------------------------------
+# Prompt builder — shows the LLM the actual document data
 # ---------------------------------------------------------------------------
 def build_prompt(obs, step: int, max_steps: int, history: list) -> str:
+    """Build the user prompt from the current observation state."""
+    po  = obs.purchase_order
+    inv = obs.invoice
+    grn = obs.grn
+    sm  = obs.supplier_master
     lines = [
         f"Step {step} of {max_steps}.",
+        "",
         f"EXCEPTION FLAG: {obs.exception_flag.flag_code}",
         f"{obs.exception_flag.flag_description}",
+        "",
+        "=== DOCUMENT DATA ===",
+        f"PO #{po.po_number} | Supplier: {po.vendor_name} | Total: {po.total_amount} | Terms: {po.payment_terms}",
+        f"PO lines: {[(i.description[:30], 'qty='+str(i.quantity), 'unit='+str(i.unit_price)) for i in po.line_items]}",
+        "",
+        f"Invoice #{inv.invoice_number} | Date: {inv.invoice_date} | Subtotal: {inv.subtotal} | Tax: {inv.tax_amount} | Total: {inv.total_amount}",
+        f"Invoice GSTIN: {inv.supplier_gstin} | Bank: {inv.bank_account} {inv.ifsc_code}",
+        f"Invoice lines: {[(i.description[:30], 'qty='+str(i.quantity), 'unit='+str(i.unit_price)) for i in inv.line_items]}",
+        "",
+        f"GRN: received={sum(i.get('quantity_received', 0) for i in grn.items_received)} units | pending={sum(i.get('quantity_pending', 0) for i in grn.items_received)} units",
+        "",
+        f"Supplier Master: GSTIN={sm.gstin} | Bank={sm.bank_account} {sm.ifsc_code} | Domain={sm.registered_domain}",
+        "",
+        "=== AVAILABLE ACTIONS ===",
+        f"Checks you can run: {', '.join(obs.available_checks)}",
+        f"Rules you can apply: {', '.join(obs.available_rules)}",
+        "",
+        "Knowledge base (company policies):",
     ]
     for entry in obs.knowledge_base:
         lines.append(f"  - {entry}")
     lines.append("")
+    lines.append(f"Cumulative reward: {obs.cumulative_reward:.2f} | Status: {obs.case_status}")
     if obs.checks_run:
+        lines.append(f"Checks already run: {', '.join(c.check_name for c in obs.checks_run)}")
     if obs.queries:
+        lines.append(f"Queries already made: {', '.join(q.target for q in obs.queries)}")
     if obs.inspections:
+        lines.append(f"Fields already inspected: {', '.join(f'{i.document}.{i.field}' for i in obs.inspections)}")
     if obs.rules_applied:
+        lines.append(f"Rules already applied: {', '.join(obs.rules_applied)}")
     if obs.decision:
+        lines.append(f"Decision already made: {obs.decision}")
     if obs.routed_to:
+        lines.append(f"Already routed to: {', '.join(obs.routed_to)}")
     if history:
         lines.append("")
+        lines.append("Recent steps:")
         for h in history[-5:]:
             lines.append(f"  {h}")
     lines.append("")
+    lines.append("What is your next action? Respond with a single JSON object only.")
     return "\n".join(lines)
 # ---------------------------------------------------------------------------
 # LLM caller
 # ---------------------------------------------------------------------------
             model=MODEL_NAME,
             messages=[
                 {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user",   "content": user_prompt},
             ],
             temperature=0.1,
             max_tokens=256,
         print(f"LLM call failed: {e}", file=sys.stderr)
         return '{"type": "run_check", "params": {"check_name": "po_match"}}'
 # ---------------------------------------------------------------------------
 # Action parser
 # ---------------------------------------------------------------------------
 def parse_action(raw_text: str) -> dict:
     """
+    Parse the model response into an action dict.
+    Strips markdown fences, handles whitespace, falls back on parse failure.
     """
     text = raw_text.strip()
+    # Strip ```json ... ``` or ``` ... ``` fences
     if text.startswith("```"):
+        parts = text.split("\n")
+        text = "\n".join(parts[1:-1] if parts[-1].strip() == "```" else parts[1:])
     try:
         return json.loads(text.strip())
     except json.JSONDecodeError:
         pass
+    # Try to find JSON anywhere in the text
     match = re.search(r'\{.*\}', text, re.DOTALL)
     if match:
         try:
         except json.JSONDecodeError:
             pass
+    # Safe fallback — never crash
     return {"type": "run_check", "params": {"check_name": "po_match"}}
 # ---------------------------------------------------------------------------
+# Task runner — one full episode
 # ---------------------------------------------------------------------------
 def run_task(client: OpenAI, env: InvoiceExceptionEnv, task_id: str) -> tuple:
+    """Run one task episode. Returns (steps_taken, score, rewards)."""
+    rewards: list[float] = []
     print(f"[START] task={task_id} env=invoice-exception-handler model={MODEL_NAME}", flush=True)
     obs = env.reset(task_id)
+    max_steps = env._task.max_steps   # reads the correct limit per task: 18 / 20 / 25
+    history: list[str] = []
     for step in range(1, max_steps + 1):
         user_prompt = build_prompt(obs, step, max_steps, history)
+        raw        = call_llm(client, user_prompt)
         action_dict = parse_action(raw)
         try:
             result = env.step(action_dict)
             reward = result.reward
+            done   = result.done
+            error  = None
+        except Exception as exc:
             reward = 0.0
+            done   = False
+            error  = str(exc)
             result = None
         rewards.append(reward)
         history.append(f"Step {step}: {action_str} -> reward {reward:+.2f}")
+        if result is not None:
             obs = result.observation
         if done:
             break
+    score       = env.grade()["score"]
+    success     = score >= 0.5
     steps_taken = min(step, max_steps)
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)
     return steps_taken, score, rewards
 # ---------------------------------------------------------------------------
+# Main — run all three tasks in sequence
 # ---------------------------------------------------------------------------
 def main() -> None:
+    """Entry point — runs inference on all tasks and prints average score."""
     client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
+    env    = InvoiceExceptionEnv(seed=42)
+    all_scores: list[float] = []
     for task_id in ALL_TASKS:
         _, score, _ = run_task(client, env, task_id)
         all_scores.append(score)
     avg = sum(all_scores) / len(all_scores) if all_scores else 0.0
+    print(f"\nAverage score across all tasks: {avg:.3f}", flush=True)
 if __name__ == "__main__":
+    main()