Spaces:

prashantmatlani
/

csa01

Sleeping

App Files Files Community

prashantmatlani commited on 25 days ago

Commit

0894e25

1 Parent(s): 8e156dc

implemented agents' self-learning, self-correcting without explicit training

Browse files

Files changed (6) hide show

agent_llm.py +65 -91
app/dataset.py +201 -88
app/env.py +110 -148
graders.py:Zone.Identifier +0 -0
inference.py +2 -1
tasks.py:Zone.Identifier +0 -0

agent_llm.py CHANGED Viewed

@@ -15,6 +15,7 @@ import json
 import time
 #from groq import Groq
 #from openai import OpenAI
 from app.env import CustomerSupportEnv
@@ -25,7 +26,7 @@ from app.env import CustomerSupportEnv
 #client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 # =========================
-# OPTIONAL IMPORTS (SAFE)
 # =========================
 try:
     from openai import OpenAI
@@ -45,61 +46,47 @@ def get_llm_client():
     if OpenAI is None:
         return None
-    api_key = os.getenv("API_KEY") or os.getenv("GROQ_API_KEY")
-    if not api_key:
-        return None  # 🔥 critical
-    try:
-        return OpenAI(
-            base_url=os.getenv(
-                "API_BASE_URL",
-                "https://router.huggingface.co/v1"
-            ),
-            api_key=api_key
-        )
-    except Exception:
         return None
 client = get_llm_client()
 # =========================
-# PROMPT (STRICT + MINIMAL)
 # =========================
-def build_prompt(obs, valid_actions):
     return f"""
-You are a decision agent for customer support.
-Return ONLY JSON.
-INPUT:
-Customer message: {obs["customer_message"]}
-Known info: {obs["known_info"]}
-Required fields: {obs.get("required", [])}
-RULES:
-1. First classify (billing / technical / delivery)
-2. Then collect ALL required fields
-3. Then resolve
-4. NEVER resolve early
-5. DO NOT ask for fields already known
-VALID ACTION TYPES:
-- classify
-- ask_info
-- resolve
-FORMAT:
-{{
-  "action": {{
-    "type": "...",
-    "category": "...",
-    "priority": "...",
-    "field": "..."
-  }}
-}}
-"""
 # =========================
@@ -107,20 +94,20 @@ FORMAT:
 # =========================
 def call_llm(prompt):
     if client is None:
-        return None  # 🔥 triggers fallback
     try:
         completion = client.chat.completions.create(
             model=os.getenv("MODEL_NAME", "unknown-model"),
             messages=[{"role": "user", "content": prompt}],
-            temperature=0.2,
             response_format={"type": "json_object"}
         )
         return completion.choices[0].message.content.strip()
     except Exception:
-        return None  # 🔥 triggers fallback
 # =========================
@@ -144,22 +131,21 @@ def parse_output(text):
 # =========================
-# FALLBACK (CRITICAL)
 # =========================
-def fallback_policy(obs):
-    msg = obs["customer_message"].lower()
     known = obs.get("known_info", {})
     required = obs.get("required", [])
-    # classify once
-    if "category" not in known:
-        if "refund" in msg or "charged" in msg:
-            return {"type": "classify", "category": "billing", "priority": "high"}
-        if "delivery" in msg or "order" in msg:
-            return {"type": "classify", "category": "delivery", "priority": "high"}
-        return {"type": "classify", "category": "technical", "priority": "medium"}
-    # ask missing (🔥 critical)
     missing = [f for f in required if f not in known]
     if missing:
         return {"type": "ask_info", "field": missing[0]}
@@ -188,46 +174,34 @@ def is_valid_action(action, valid_actions):
     return True
 # =========================
-# ACTION SELECTOR
 # =========================
 def get_action(obs, valid_actions):
-    #known = obs.get("known_info", {})
-    # HARD GUARD: prevent re-classification
-    #if "category" in known:
-    #    valid_actions = [a for a in valid_actions if a["type"] != "classify"]
-    known = obs.get("known_info", {})
-    required = obs.get("required", [])
-    missing = [f for f in required if f not in known]
-    # HARD OVERRIDE (prevents LLM mistakes)
-    if "category" in known:
-        if missing:
-            return {"type": "ask_info", "field": missing[0]}
-        else:
-            return {"type": "resolve"}
-    prompt = build_prompt(obs, valid_actions)
-    for _ in range(2):  # retry loop
-        try:
-            output = call_llm(prompt)
-            action = parse_output(output)
-            if is_valid_action(action, valid_actions):
                 return action
-        except Exception:
-            time.sleep(0.5)
-    # fallback if LLM fails
-    return fallback_policy(obs)
 # =========================

 import time
 #from groq import Groq
 #from openai import OpenAI
+import random
 from app.env import CustomerSupportEnv
 #client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 # =========================
+# PURPOSE: Safe OpenAI client init
 # =========================
 try:
     from openai import OpenAI
     if OpenAI is None:
         return None
+    key = os.getenv("API_KEY") or os.getenv("GROQ_API_KEY")
+    if not key:
         return None
+    return OpenAI(
+        base_url=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"),
+        api_key=key
+    )
 client = get_llm_client()
 # =========================
+# PURPOSE: Prompt - Strict + Minimal - encourages uncertainty-aware reasoning
 # =========================
+def build_prompt(obs):
     return f"""
+    You are a customer support agent.
+    Customer message:
+    {obs.get("customer_message")}
+    Known info:
+    {obs.get("known_info")}
+    Required fields:
+    {obs.get("required")}
+    Your goal is to resolve the ticket efficiently.
+    Think carefully:
+    - You may revise earlier decisions
+    - Do not commit too early
+    - Ask missing info if unsure
+    - The message may be ambiguous
+    - Do not assume category prematurely
+    - Ask only necessary questions
+    - Avoid redundant actions
+    Return JSON:
+    {{"action": {{...}}}}
+    """
 # =========================
 # =========================
 def call_llm(prompt):
     if client is None:
+        return None  # triggers fallback
     try:
         completion = client.chat.completions.create(
             model=os.getenv("MODEL_NAME", "unknown-model"),
             messages=[{"role": "user", "content": prompt}],
+            temperature=0.3,
             response_format={"type": "json_object"}
         )
         return completion.choices[0].message.content.strip()
     except Exception:
+        return None  # triggers fallback
 # =========================
 # =========================
+# PURPOSE: Fallback is intentionally imperfect
 # =========================
+def fallback(obs):
     known = obs.get("known_info", {})
     required = obs.get("required", [])
+    # allow reclassification even if already classified
+    if "category" not in known or random.random() < 0.3:
+        return {
+            "type": "classify",
+            "category": "technical",
+            "priority": "medium"
+        }
     missing = [f for f in required if f not in known]
     if missing:
         return {"type": "ask_info", "field": missing[0]}
     return True
 # =========================
+# PURPOSE: Hybrid control (LLM + adaptive fallback)
 # =========================
 def get_action(obs, valid_actions):
+    prompt = build_prompt(obs)
+    if client:
+        try:
+            resp = client.chat.completions.create(
+                model=os.getenv("MODEL_NAME"),
+                messages=[{"role": "user", "content": prompt}],
+                temperature=0.4,
+                response_format={"type": "json_object"}
+            )
+            text = resp.choices[0].message.content
+            parsed = json.loads(text)
+            action = parsed.get("action")
+            if action and "type" in action:
                 return action
+        except:
+            pass
+    return fallback(obs)
 # =========================

app/dataset.py CHANGED Viewed

@@ -2,113 +2,226 @@
 # app/dataset.py
 TICKETS = [
-    # Billing Issues
-    {
-        "ticket_id": "T1",
-        "customer_message": "I was charged twice for my order #1234. Please refund.",
-        "category": "billing",
-        "priority": "high",
-        "required_info": ["order_id"]
-    },
-    {
-        "ticket_id": "T2",
-        "customer_message": "I want to cancel my subscription and get a refund.",
-        "category": "billing",
-        "priority": "medium",
-        "required_info": ["account_email"]
-    },
-    {
-        "ticket_id": "T3",
-        "customer_message": "Why was I billed after cancelling my plan?",
-        "category": "billing",
-        "priority": "high",
-        "required_info": ["account_email"]
-    },
     {
-        "ticket_id": "T20",
-        "customer_message": "I was charged twice and want a refund.",
-        "category": "billing",
-        "priority": "high",
-        "required_info": ["order_id", "account_email"]
-    },
-    # Technical Issues
-    {
-        "ticket_id": "T4",
-        "customer_message": "I can't log into my account. It says invalid credentials.",
-        "category": "technical",
-        "priority": "high",
-        "required_info": ["account_email"]
-    },
-    {
-        "ticket_id": "T5",
-        "customer_message": "The app crashes every time I upload a file.",
-        "category": "technical",
-        "priority": "medium",
-        "required_info": ["device_type"]
-    },
-    {
-        "ticket_id": "T6",
-        "customer_message": "Page not loading on checkout.",
-        "category": "technical",
-        "priority": "high",
-        "required_info": ["browser"]
     },
     {
-        "ticket_id": "T21",
-        "customer_message": "App crashes when I try to checkout.",
-        "category": "technical",
-        "priority": "high",
-        "required_info": ["device_type", "browser"]
     },
     {
-        "ticket_id": "T12",
-        "customer_message": "App is very slow lately.",
-        "category": "technical",
-        "priority": "low",
-        "required_info": ["device_type"]
     },
-    # Account Issues
     {
-        "ticket_id": "T7",
-        "customer_message": "I forgot my password and can't reset it.",
-        "category": "account",
-        "priority": "medium",
-        "required_info": ["account_email"]
     },
     {
-        "ticket_id": "T8",
-        "customer_message": "My account got locked for no reason.",
-        "category": "account",
-        "priority": "high",
-        "required_info": ["account_email"]
     },
     {
-        "ticket_id": "T9",
-        "customer_message": "How do I change my registered email address?",
-        "category": "account",
-        "priority": "low",
-        "required_info": ["account_email"]
     },
-    # Edge Cases
     {
-        "ticket_id": "T10",
-        "customer_message": "Something is wrong with my account.",
-        "category": "other",
-        "priority": "medium",
-        "required_info": ["account_email"]
     },
     {
-        "ticket_id": "T11",
-        "customer_message": "I didn't receive my order but it shows delivered.",
-        "category": "other",
-        "priority": "high",
-        "required_info": ["order_id"]
     }
-    ]

 # app/dataset.py
+"""
+PURPOSE: Production-grade multi-intent dataset
+- Introduces ambiguity (multiple valid interpretations)
+- Separates perceived vs true intent
+- Supports stochastic + difficulty-aware environments
+"""
 TICKETS = [
+    # =========================
+    # 1. BILLING vs DELIVERY
+    # =========================
     {
+        "ticket_id": "T001",
+        "variants": [
+            "I was charged but didn’t receive my order",
+            "Payment went through but nothing arrived",
+            "Got billed but package is missing"
+        ],
+        "noise": [
+            "pls check asap",
+            "this is urgent",
+            ""
+        ],
+        # AGENT CONFUSION SPACE
+        "possible_categories": ["billing", "delivery"],
+        "ground_truth": {
+            "category": "delivery",
+            "priority": "high",
+            "required_info": ["order_id", "account_email"]
+        }
     },
+    # =========================
+    # 2. TECH vs ACCOUNT
+    # =========================
     {
+        "ticket_id": "T002",
+        "variants": [
+            "I can’t log into my account",
+            "Login keeps failing with error",
+            "Account not accessible"
+        ],
+        "noise": [
+            "tried multiple times",
+            "not sure what's wrong",
+            ""
+        ],
+        "possible_categories": ["technical", "account"],
+        "ground_truth": {
+            "category": "account",
+            "priority": "medium",
+            "required_info": ["account_email", "device_type"]
+        }
     },
+    # =========================
+    # 3. BILLING vs TECH
+    # =========================
     {
+        "ticket_id": "T003",
+        "variants": [
+            "I got charged twice for the same order",
+            "Duplicate charge happened",
+            "Payment processed twice"
+        ],
+        "noise": [
+            "this is frustrating",
+            "",
+        ],
+        "possible_categories": ["billing", "technical"],
+        "ground_truth": {
+            "category": "billing",
+            "priority": "high",
+            "required_info": ["order_id", "account_email"]
+        }
     },
+    # =========================
+    # 4. DELIVERY (CLEAR)
+    # =========================
     {
+        "ticket_id": "T004",
+        "variants": [
+            "My order hasn’t arrived yet",
+            "Delivery is delayed",
+            "Still waiting for package"
+        ],
+        "noise": [
+            "been 5 days",
+            "",
+        ],
+        "possible_categories": ["delivery"],
+        "ground_truth": {
+            "category": "delivery",
+            "priority": "medium",
+            "required_info": ["order_id"]
+        }
     },
+    # =========================
+    # 5. TECH (AMBIGUOUS UI ISSUE)
+    # =========================
     {
+        "ticket_id": "T005",
+        "variants": [
+            "App crashes when I open it",
+            "Screen goes blank after launch",
+            "Something is wrong with the app"
+        ],
+        "noise": [
+            "happens randomly",
+            "",
+        ],
+        "possible_categories": ["technical"],
+        "ground_truth": {
+            "category": "technical",
+            "priority": "high",
+            "required_info": ["device_type", "browser"]
+        }
     },
+    # =========================
+    # 6. ACCOUNT vs BILLING
+    # =========================
     {
+        "ticket_id": "T006",
+        "variants": [
+            "My subscription is active but I can’t use features",
+            "Paid but features locked",
+            "Account says active but not working"
+        ],
+        "noise": [
+            "pls fix",
+            "",
+        ],
+        "possible_categories": ["account", "billing"],
+        "ground_truth": {
+            "category": "account",
+            "priority": "high",
+            "required_info": ["account_email"]
+        }
     },
+    # =========================
+    # 7. HARD: MULTI-LAYER ISSUE
+    # =========================
     {
+        "ticket_id": "T007",
+        "variants": [
+            "Order delayed and I was charged twice",
+            "Late delivery and duplicate payment issue",
+            "Package not here and billing looks wrong"
+        ],
+        "noise": [
+            "very frustrating",
+            "please resolve quickly",
+            ""
+        ],
+        "possible_categories": ["billing", "delivery"],
+        "ground_truth": {
+            "category": "billing",  # root cause focus
+            "priority": "high",
+            "required_info": ["order_id", "account_email"]
+        }
     },
+    # =========================
+    # 8. HARD: VAGUE + NOISY
+    # =========================
     {
+        "ticket_id": "T008",
+        "variants": [
+            "Something is wrong with my account",
+            "Not working properly",
+            "Issue with my profile"
+        ],
+        "noise": [
+            "not sure what exactly",
+            "pls help",
+            ""
+        ],
+        "possible_categories": ["technical", "account"],
+        "ground_truth": {
+            "category": "technical",
+            "priority": "medium",
+            "required_info": ["device_type"]
+        }
     }
+]

app/env.py CHANGED Viewed

@@ -6,10 +6,15 @@ from app.models import Observation, Action, Reward
 from app.dataset import TICKETS
 import random
 from graders import grade_easy, grade_medium, grade_hard
-from tasks import TASKS
 import sys
 DIFFICULTY_CONFIG = {
     "easy": {
         "max_steps": 8,
@@ -28,9 +33,9 @@ DIFFICULTY_CONFIG = {
     }
 }
-# --- TASKS ---
-#AVAILABLE_TASKS = TASKS
 AVAILABLE_TASKS = [
     {
         "id": "easy-info-collection",
@@ -74,37 +79,33 @@ class CustomerSupportEnv:
             },
         ]
-    # INTERNAL STATE REPRESENTATION
     def _get_observation(self):
-        total_required = len(self.ticket.get("required_info", []))
-        collected_required = sum(
-            1 for f in self.ticket.get("required_info", [])
-            if f in self.state_data["collected_info"]
-        )
-        info_progress = collected_required / max(1, total_required)
         return {
-        "ticket_id": self.ticket["ticket_id"],
-        "customer_message": self.ticket["customer_message"],
-        "history": [],
-        "known_info": self.state_data["collected_info"],
-        "required": self.ticket.get("required_info", []),  # FULL requirement space (agent uses this)
-        #"remaining_required": self.state_data["required_info"],   # OPTIONAL (env/debug/analysis); agent_llm shouldn't use this directly - it should infer from known_info + customer_message
-        "missing_required": [
-            f for f in self.ticket.get("required_info", [])
-            if f not in self.state_data["collected_info"]
-        ],
-        #"info_progress": len(self.state_data["collected_info"]) / 3,
-        "info_progress": info_progress,
-        "status": self.state_data["status"],
-        "step_count": self.state_data["steps_taken"],
-        "remaining_steps": self.max_steps - self.state_data["steps_taken"],
-        "difficulty": self.difficulty # difficulty awareness
         }
     def __init__(self, difficulty="medium", seed=None):
         self.difficulty = difficulty
@@ -117,82 +118,86 @@ class CustomerSupportEnv:
         self.max_steps = self.config["max_steps"]
         self.last_action = None
         # METRICS TRACKING
         self.episode_stats = []
     def list_tasks(self):
         return self.tasks
     def reset(self):
         self.last_action = None
-        self.current_episode_reward = 0.0
         self.current_steps = 0
         self.success = False
-        # 🎯 Controlled ticket sampling
         self.ticket = random.choice(TICKETS)
-        # 🎯 Inject stochasticity (controlled)
-        noisy_message = self._inject_noise(self.ticket["customer_message"])
         self.state_data = {
             "ticket_id": self.ticket["ticket_id"],
-            "customer_message": noisy_message,
-            "history": [],
             "status": "open",
-            "priority": None,
             "category": None,
-            "required_info": self._mask_required_info(self.ticket["required_info"]),
             "collected_info": {},
             "steps_taken": 0,
-            "max_steps": self.max_steps,
-            "ground_truth": self.ticket
         }
         return self._get_observation()
     def step(self, action: dict):
-         # SAFETY: ensure environment initialized
         if self.state_data is None:
-            print("step() called before reset — auto-resetting", flush=True)
             self.reset()
-        reward = 0.0
         done = False
         info = {}
-        #info = {
-        #"final_score": self._compute_final_score() if done else None
-        #}
         collected = self.state_data["collected_info"]
-        required = self.state_data["required_info"]
-        gt = self.ticket
         # -----------------------
-        # STEP PENALTY
         # -----------------------
-        reward -= 0.05
-        action_type = action.get("type")
-        # -----------------------
-        # REPEAT PENALTY
-        # -----------------------
-        if self.last_action == action:
-            reward -= 0.2
-        # -----------------------
-        # CLASSIFY
-        # -----------------------
-        if action_type == "classify":
-            collected["category"] = gt["category"]
-            collected["priority"] = gt["priority"]
-            reward += 0.2
         # -----------------------
         # ASK INFO
@@ -202,13 +207,10 @@ class CustomerSupportEnv:
             field = action.get("field")
             if field not in collected:
-                collected[field] = "sample_value"
-                reward += 0.3
-                if field in required:
-                    required.remove(field)
             else:
-                reward -= 0.3
         # -----------------------
         # RESOLVE
@@ -216,57 +218,26 @@ class CustomerSupportEnv:
         elif action_type == "resolve":
             done = True
-            final_score = 0.0
-            # classification
-            if collected.get("category") == gt.get("category"):
-                final_score += 0.3
-            if collected.get("priority") == gt.get("priority"):
-                final_score += 0.2
-            # required info
-            required_fields = gt.get("required_info", [])
-            if all(f in collected for f in required_fields):
-                final_score += 0.3
-                self.success = True
-            else:
-                reward -= 0.5
-            # resolve bonus
-            final_score += 0.2
-            reward += final_score
-            # efficiency bonus
-            optimal_steps = len(required_fields) + 1
-            if self.state_data["steps_taken"] <= optimal_steps:
                 reward += 0.3
-            # episode stats
-            collected_required = sum(1 for f in required_fields if f in collected)
-            episode_data = {
-                "success": self.success,
-                "steps": self.state_data["steps_taken"],
-                "reward": reward,
-                "info_efficiency": collected_required / max(1, len(required_fields))
-            }
-            self.episode_stats.append(episode_data)
-            info = {
-                "final_score": final_score,
-                "task_success": self.success,
-                "collected_info": collected
-            }
-            self.last_action = action
-            return self._get_observation(), reward, done, info
-        # -----------------------
-        # INVALID
-        # -----------------------
         else:
             reward -= 0.3
@@ -274,35 +245,14 @@ class CustomerSupportEnv:
         # STEP UPDATE
         # -----------------------
         self.state_data["steps_taken"] += 1
-        self.current_steps += 1
-        # -----------------------
-        # MAX STEP TERMINATION
-        # -----------------------
-        if self.state_data["steps_taken"] >= self.state_data["max_steps"]:
             done = True
-            reward -= 2.0
-            # record failure episode
-            self.episode_stats.append({
-                "success": False,
-                "steps": self.state_data["steps_taken"],
-                "reward": reward,
-                "info_efficiency": 0
-            })
-            info = {
-                "final_score": 0.0,
-                "task_success": False
-            }
-        # -----------------------
-        # SAVE STATE
-        # -----------------------
-        self.last_action = action
-        self.current_episode_reward += reward
-        return self._get_observation(), reward, done, info
     def state(self) -> Dict:
         return self.state_data
@@ -326,21 +276,32 @@ class CustomerSupportEnv:
             "info_efficiency": round(info_eff, 3)
         }
     def _inject_noise(self, message):
         if random.random() < self.config["noise_prob"]:
-            noise_phrases = [
                 "pls help asap",
-                "this is urgent",
                 "not sure what's wrong",
-                "it’s been days"
-            ]
-            return message + " " + random.choice(noise_phrases)
         return message
     def _mask_required_info(self, required_fields):
         masked = []
@@ -350,4 +311,5 @@ class CustomerSupportEnv:
                 masked.append(field)
         # ensure at least 1 required field remains
-        return masked if masked else required_fields

 from app.dataset import TICKETS
 import random
 from graders import grade_easy, grade_medium, grade_hard
+#from tasks import TASKS
 import sys
+# =========================
+# PURPOSE: Controls difficulty-driven stochasticity
+# - noise_prob → message distortion
+# - missing_info_prob → partial observability
+# =========================
 DIFFICULTY_CONFIG = {
     "easy": {
         "max_steps": 8,
     }
 }
+# =========================
+# PURPOSE: Defines tasks exposed to validator
+# =========================
 AVAILABLE_TASKS = [
     {
         "id": "easy-info-collection",
             },
         ]
+    # =========================
+    # PURPOSE: Build observation exposed to agent
+    # =========================
     def _get_observation(self):
+        required = self.state_data["required_info"]
+        collected = self.state_data["collected_info"]
+        total = len(required)
+        collected_count = sum(1 for f in required if f in collected)
         return {
+            "ticket_id": self.ticket["ticket_id"],
+            "customer_message": self.state_data["customer_message"],
+            "known_info": collected,
+            "required": required,
+            "missing_required": [f for f in required if f not in collected],
+            "info_progress": collected_count / max(1, total),
+            "status": self.state_data["status"],
+            "step_count": self.state_data["steps_taken"],
+            "remaining_steps": self.max_steps - self.state_data["steps_taken"],
+            "difficulty": self.difficulty # difficulty awareness
         }
+    # =========================
+    # PURPOSE: Initialize environment with difficulty & randomness
+    # =========================
     def __init__(self, difficulty="medium", seed=None):
         self.difficulty = difficulty
         self.max_steps = self.config["max_steps"]
         self.last_action = None
+        # self-correction tracking
+        self.classification_history = []
         # METRICS TRACKING
         self.episode_stats = []
     def list_tasks(self):
         return self.tasks
     def reset(self):
         self.last_action = None
+        #self.current_episode_reward = 0.0
         self.current_steps = 0
         self.success = False
         self.ticket = random.choice(TICKETS)
+        gt = self.ticket["ground_truth"]
+        msg = random.choice(self.ticket["variants"])
+        msg = self._inject_noise(msg)
+        masked_required = self._mask_required_info(gt["required_info"])
         self.state_data = {
             "ticket_id": self.ticket["ticket_id"],
+            "customer_message": msg,
             "status": "open",
             "category": None,
+            "priority": None,
+            "required_info": masked_required,
             "collected_info": {},
             "steps_taken": 0,
+            "ground_truth": gt
         }
         return self._get_observation()
+    # =========================
+    # PURPOSE: Core transition function with self-correction logic
+    # =========================
     def step(self, action: dict):
         if self.state_data is None:
             self.reset()
+        reward = -0.05
         done = False
         info = {}
         collected = self.state_data["collected_info"]
+        gt = self.ticket["ground_truth"]
+        action_type = action.get("type") if isinstance(action, dict) else None
         # -----------------------
+        # CLASSIFY (SELF-CORRECTION ENABLED)
         # -----------------------
+        if action_type == "classify":
+            new_cat = action.get("category")
+            prev_cat = collected.get("category")
+            collected["category"] = new_cat
+            collected["priority"] = action.get("priority")
+            self.classification_history.append(new_cat)
+            # correct classification
+            if new_cat == gt["category"]:
+                reward += 0.3
+            # self-correction bonus
+            if prev_cat and prev_cat != gt["category"] and new_cat == gt["category"]:
+                reward += 0.5  # major reward
+            # flip-flop penalty
+            if len(self.classification_history) >= 3:
+                if len(set(self.classification_history[-3:])) > 2:
+                    reward -= 0.3
         # -----------------------
         # ASK INFO
             field = action.get("field")
             if field not in collected:
+                collected[field] = "value"
+                reward += 0.25
             else:
+                reward -= 0.2
         # -----------------------
         # RESOLVE
         elif action_type == "resolve":
             done = True
+            required = gt["required_info"]
+            all_info = all(f in collected for f in required)
+            correct_cat = collected.get("category") == gt["category"]
+            # 🔥 premature penalty
+            if not all_info:
+                reward -= 0.7
+            # scoring
+            if correct_cat:
                 reward += 0.3
+            if all_info:
+                reward += 0.3
+                self.success = True
+            reward += 0.2  # completion bonus
         else:
             reward -= 0.3
         # STEP UPDATE
         # -----------------------
         self.state_data["steps_taken"] += 1
+        if self.state_data["steps_taken"] >= self.max_steps:
             done = True
+            reward -= 1.5
+        return self._get_observation(), reward, done, {
+            "task_success": self.success
+        }
     def state(self) -> Dict:
         return self.state_data
             "info_efficiency": round(info_eff, 3)
         }
+    # =========================
+    # PURPOSE: Apply noise to simulate real-world messy input
+    # =========================
     def _inject_noise(self, message):
         if random.random() < self.config["noise_prob"]:
+            noise = random.choice([
                 "pls help asap",
                 "not sure what's wrong",
+                "this is urgent",
+                "been days"
+            ])
+            return message + " " + noise
         return message
+     # =========================
+    # PURPOSE: Mask required fields → partial observability
+    # =========================
+    def _mask_required_info(self, required_fields):
+        masked = [
+            f for f in required_fields
+            if random.random() > self.config["missing_info_prob"]
+        ]
+        return masked if masked else required_fields
+    """
     def _mask_required_info(self, required_fields):
         masked = []
                 masked.append(field)
         # ensure at least 1 required field remains
+        return masked if masked else required_fields
+    """

graders.py:Zone.Identifier ADDED Viewed

Binary file (25 Bytes). View file

inference.py CHANGED Viewed

@@ -54,7 +54,8 @@ def run_single_task(task):
     task_name = task["id"]
     task_type = task["difficulty"]
-    env = CustomerSupportEnv()
     obs = env.reset()
     step_count = 0

     task_name = task["id"]
     task_type = task["difficulty"]
+    #env = CustomerSupportEnv()
+    env = CustomerSupportEnv(difficulty=task["difficulty"])
     obs = env.reset()
     step_count = 0

tasks.py:Zone.Identifier ADDED Viewed

Binary file (25 Bytes). View file