Spaces:

prashantmatlani
/

csa01

Sleeping

App Files Files Community

prashantmatlani commited on 29 days ago

Commit

d6a76d5

0 Parent(s):

fresh clean commit

Browse files

Files changed (21) hide show

.gitattributes +35 -0
.gitignore +4 -0
Dockerfile +13 -0
README.md +125 -0
agent.py +416 -0
agent_llm.py +220 -0
agent_py_output.txt +130 -0
agent_rule_based.py +22 -0
app/__init__.py +0 -0
app/dataset.py +114 -0
app/env.py +244 -0
app/graders.py +45 -0
app/models.py +26 -0
app/tasks.py +18 -0
app/test_env.py +134 -0
inference.py +94 -0
requirements.txt +8 -0
server/__init__.py +0 -0
server/__init__.py:Zone.Identifier +0 -0
server/main.py +41 -0
test_rule_agent.py +20 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+__pycache__/
+*.pyc
+csvenv/

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.10-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+COPY . .
+RUN pip install --no-cache-dir -r requirements.txt
+CMD ["python", "inference.py"]

README.md ADDED Viewed

	@@ -0,0 +1,125 @@

+---
+title: Customer Support Agent
+emoji: 🤖
+colorFrom: blue
+colorTo: green
+sdk: docker
+tags:
+  - openenv
+---
+# Customer Support RL + LLM Agent — Overview
+## Overview
+This project implements a hybrid agent for customer support automation.
+The agent:
+1. Classifies customer queries
+2. Collects required information
+3. Resolves efficiently
+---
+## Environment
+The environment simulates customer support tickets with:
+- Customer message
+- Required information fields
+- Ground truth classification
+The agent uses a hybrid approach:
+- LLM for classification
+- deterministic policy for information gathering
+- reward-shaped environment for optimization
+🎯 Objective
+Build an intelligent agent that:
+- Classifies customer issues
+- Collects required information
+- Resolves efficiently
+🏗 Architecture
+1. Environment (env.py)
+Simulates customer support workflow.
+State:
+customer_message
+known_info
+required fields
+progress
+Actions:
+classify
+ask_info
+resolve
+2. Reward Design
+Action	            Reward
+Correct classify	 +0.5
+Ask required info	 +0.3
+Repeat ask	         -0.3
+Step penalty	     -0.05
+Successful resolve	 +1.0
+3. Observation Design
+{
+  "customer_message": str,
+  "known_info": dict,
+  "required": list   # full schema
+}
+4. Agent Types
+Rule Agent (agent.py)
+. Deterministic
+. Uses required fields
+. Computes missing info
+LLM Agent (agent_llm.py)
+. Uses prompt reasoning
+. Strict JSON output
+. Retry + fallback
+5. Core Logic
+if not classified:
+    classify
+elif missing fields:
+    ask_info
+else:
+    resolve
+6. Key Improvements Made
+- Removed ground-truth leakage
+- Added reward shaping
+- Added efficiency scoring
+- Added schema-based reasoning
+- Added fallback policy
+- Added metrics tracking
+7. Metrics
+{
+  success_rate,
+  avg_steps,
+  avg_reward,
+  info_efficiency
+}
+8. Inference
+python inference.py
+9. Deployment
+docker build -t support-agent .
+docker run support-agent

agent.py ADDED Viewed

	@@ -0,0 +1,416 @@

+# agent.py
+import sys
+from unicodedata import category
+import requests
+import os
+import time
+import json
+import random
+from dotenv import load_dotenv
+# from openai import OpenAI
+from groq import Groq
+from app.env import CustomerSupportEnv
+# load_dotenv()
+# client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# BASE_URL = "http://127.0.0.1:8001"
+#load_dotenv("/home/pb/projects/openenv-customer-support/.env")
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+ENV_PATH = os.path.join(BASE_DIR, ".env")
+load_dotenv(ENV_PATH)
+print(f"\nCWD: {os.getcwd()}")
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+#client = os.getenv("GROQ_API_KEY")
+#print(f"\nENV PATH: {ENV_PATH}")
+#print(f"\ngroq api key: {client}")
+##print(f"\ngroq api key: {os.getenv('GROQ_API_KEY')}")
+##print("KEY:", os.getenv("GROQ_API_KEY"))
+#print(f"\nmodel name: {os.getenv('MODEL_NAME')}")
+print("Sending request...")
+#sys.exit()
+# =========================
+# Smarter, mapped ask_info - boosts info_progress speed, reward per episode
+# =========================
+def pick_field(category, known):
+    if category == "billing":
+        return "order_id"
+    if category == "technical":
+        return "account_email"
+    if category == "delivery":
+        return "order_id"
+    return "account_email"
+# =========================
+# CLASSIFIER TO REDUCE LLM RELIANCE
+# =========================
+def smart_classify(message):
+    msg = message.lower()
+    if any(x in msg for x in ["refund", "cancel", "subscription", "charge"]):
+        return {"category": "billing", "priority": "high"}
+    if any(x in msg for x in ["crash", "bug", "error", "slow"]):
+        return {"category": "technical", "priority": "high"}
+    return {"category": "general", "priority": "medium"}
+def override_classify(message):
+    msg = message.lower()
+    if any(x in msg for x in ["charged", "refund", "billing", "cancel", "subscription"]):
+        return {"type": "classify", "category": "billing", "priority": "high"}
+    if any(x in msg for x in ["checkout", "crash", "bug", "error", "not loading", "login"]):
+        return {"type": "classify", "category": "technical", "priority": "high"}
+    if any(x in msg for x in ["delivery", "order not arrived", "shipping"]):
+        return {"type": "classify", "category": "delivery", "priority": "medium"}
+    return {"type": "classify", "category": "general", "priority": "medium"}
+def is_ready_to_resolve(category, known):
+    if category == "billing":
+        return "order_id" in known
+    if category == "technical":
+        return "account_email" in known
+    if category == "delivery":
+        return "order_id" in known
+    return False
+# =========================
+# POLICY ENFORCEMENT INTEAD OF LLM DECISION
+# =========================
+def enforce_policy(obs, action):
+    known = obs["known_info"]
+    category = known.get("category")
+    # Never re-classify
+    if action["type"] == "classify" and category:
+        return {"type": "ask_info", "field": pick_field(category, known)}
+    # Force correct ask_info
+    if action["type"] == "ask_info":
+        action["field"] = pick_field(category, known)
+    # if already asked → resolve instead of repeating
+    if action["type"] == "ask_info":
+        if action["field"] in known:
+            return {"type": "resolve"}
+    # Only resolve when ready
+    if action["type"] == "resolve":
+        if not is_ready_to_resolve(category, known):
+            return {"type": "ask_info", "field": pick_field(category, known)}
+    return action
+# =========================
+# PROMPT
+# =========================
+def build_prompt(obs, valid_actions):
+    return f"""
+You are a customer support decision agent.
+Return ONLY valid JSON.
+IMPORTANT DECISION RULES:
+1. DO NOT ask for unnecessary information
+2. If the issue is clear (e.g., password reset, login failure), resolve directly
+3. Only ask for information that is REQUIRED to solve the issue
+4. NEVER ask for order_id in login/password issues
+5. If sufficient information is already available, choose "resolve"
+6. Avoid repeating the same question
+Customer message:
+{obs["customer_message"]}
+Known info:
+{obs["known_info"]}
+Progress:
+{obs["info_progress"]}
+VALID ACTIONS:
+{valid_actions}
+RULES:
+- ONLY pick from VALID ACTIONS
+- "charged", "refund" → billing
+- "slow", "crash" → technical
+- Do NOT hallucinate
+CRITICAL DECISION RULE:
+Only choose "resolve" IF:
+1. You have correctly classified the issue
+2. You have collected ALL required fields
+3. You are confident you can solve the user's problem
+If ANY doubt remains → ask_info
+NEVER resolve early.
+CLASSIFICATION RULES (STRICT):
+You MUST classify into ONLY ONE of:
+- billing
+- technical
+- delivery
+NEVER output "general" or any other category.
+---
+BILLING:
+charged, refund, payment, invoice, subscription, billing issues
+TECHNICAL:
+login issues, account problems, crashes, errors, bugs, slow performance, app issues
+IMPORTANT:
+ANY issue related to app behavior (slow, crash, not working, locked account)
+→ ALWAYS technical
+---
+DELIVERY:
+shipping, delivery delay, order not received
+---
+PRIORITY RULE:
+If message involves money → billing (even if order mentioned)
+Example:
+"I was charged twice for my order"
+→ billing
+FORMAT:
+{{
+  "thought": "...",
+  "action": {{ ... }}
+}}
+"""
+# =========================
+# LLM CALL
+# =========================
+def call_llm(prompt):
+    completion = client.chat.completions.create(
+        #model=os.getenv("MODEL_NAME"),
+        model="llama-3.1-8b-instant",
+        messages=[{"role": "user", "content": prompt}],
+        temperature=0.2,
+        response_format={"type": "json_object"}
+    )
+    return completion.choices[0].message.content.strip()
+# =========================
+# PARSER (MANDATORY)
+# =========================
+def parse_output(text):
+    try:
+        if "```" in text:
+            text = text.split("```")[1]
+        start = text.find("{")
+        end = text.rfind("}") + 1
+        text = text[start:end]
+        parsed = json.loads(text)
+        action = parsed.get("action")
+        if not action or "type" not in action:
+            raise ValueError("Invalid action format")
+        return action
+    except Exception as e:
+        print("❌ PARSE ERROR:", e)
+        print("RAW:", text)
+        return None
+# =========================
+# VALIDATION
+# =========================
+def is_valid_action(action, valid_actions):
+    if not action or "type" not in action:
+        return False
+    action_type = action["type"]
+    # ✅ check type exists
+    valid_types = [a["type"] for a in valid_actions]
+    if action_type not in valid_types:
+        return False
+    # ✅ ask_info must match field
+    if action_type == "ask_info":
+        valid_fields = [a["field"] for a in valid_actions if a["type"] == "ask_info"]
+        return action.get("field") in valid_fields
+    # ✅ classify must have required keys (NOT exact match)
+    if action_type == "classify":
+        return "category" in action and "priority" in action
+    # resolve always valid
+    return True
+# =========================
+# VALID ACTION SPACE
+# =========================
+def get_valid_actions():
+    actions = [
+        {"type": "ask_info", "field": "order_id"},
+        {"type": "ask_info", "field": "account_email"},
+        {"type": "ask_info", "field": "device_type"},
+        {"type": "ask_info", "field": "browser"},
+        {"type": "resolve"},
+    ]
+    # ✅ allow flexible classification
+    actions.append({"type": "classify"})
+    return actions
+# =========================
+# ACTION PIPELINE
+# =========================
+def get_action(obs):
+    msg = obs["customer_message"].lower()
+    # ✅ NEW: use env-provided structure
+    known = obs.get("known_info", {})
+    required = obs.get("required", [])
+    # =====================
+    # 1. CLASSIFY (only once)
+    # =====================
+    if "category" not in known:
+        if any(x in msg for x in [
+            "charged", "refund", "billed", "payment", "invoice", "cancel"
+        ]):
+            return {"type": "classify", "category": "billing", "priority": "high"}
+        if any(x in msg for x in [
+            "delivery", "delivered", "not received", "shipment", "order"
+        ]):
+            return {"type": "classify", "category": "delivery", "priority": "high"}
+        if any(x in msg for x in [
+            "login", "password", "error", "crash", "bug", "checkout"
+        ]):
+            return {"type": "classify", "category": "technical", "priority": "high"}
+        return {"type": "classify", "category": "technical", "priority": "medium"}
+    # =====================
+    # 2. COMPUTE MISSING INFO (🔥 KEY CHANGE)
+    # =====================
+    missing = [f for f in required if f not in known]
+    # =====================
+    # 3. ASK FOR NEXT FIELD
+    # =====================
+    if missing:
+        return {"type": "ask_info", "field": missing[0]}
+    # =====================
+    # 4. RESOLVE
+    # =====================
+    return {"type": "resolve"}
+# =========================
+# RUN
+# =========================
+def run_agent():
+    print("🚀 Starting agent...")
+    env = CustomerSupportEnv()
+    obs = env.reset()
+    done = False
+    trajectory = []
+    while not done:
+        print("\n📥 OBS:", obs)
+        action = get_action(obs)
+        print("🧠 ACTION:", action)
+        next_obs, reward, done, info = env.step(action)
+        print("🎯 REWARD:", reward)
+        print("✅ DONE:", done)
+        trajectory.append({
+            "state": obs,
+            "action": action,
+            "reward": reward
+        })
+        obs = next_obs
+        print("OBS:", obs)
+        print("ACTION:", action)
+        print("REWARD:", reward)
+        print("DONE:", done)
+    #print("\n🏁 FINAL INFO:", info)
+    print("FINAL:", info if info else "No info returned")
+    return {
+        "final_score": info.get("final_score", 0),
+        "trajectory": trajectory
+    }
+def run_multiple(n=3):
+    scores = []
+    for i in range(n):
+        print(f"\n===== EPISODE {i+1} =====")
+        result = run_agent()
+        scores.append(result["final_score"])
+    avg = sum(scores) / len(scores)
+    print("\n📊 AVERAGE SCORE:", avg)
+if __name__ == "__main__":
+    run_multiple(3)

agent_llm.py ADDED Viewed

	@@ -0,0 +1,220 @@

+# agent_llm.py
+"""
+- Uses LLM (requirement satisfied)
+- Robust (fallback present)
+- Structured output (strict JSON)
+- No hallucination risk
+- Reproducible
+"""
+import os
+import json
+import time
+from dotenv import load_dotenv
+from groq import Groq
+from app.env import CustomerSupportEnv
+load_dotenv()
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+# =========================
+# PROMPT (STRICT + MINIMAL)
+# =========================
+def build_prompt(obs, valid_actions):
+    return f"""
+You are a decision agent for customer support.
+Return ONLY JSON.
+INPUT:
+Customer message: {obs["customer_message"]}
+Known info: {obs["known_info"]}
+Required fields: {obs.get("required", [])}
+RULES:
+1. First classify (billing / technical / delivery)
+2. Then collect ALL required fields
+3. Then resolve
+4. NEVER resolve early
+5. DO NOT ask for fields already known
+VALID ACTION TYPES:
+- classify
+- ask_info
+- resolve
+FORMAT:
+{{
+  "action": {{
+    "type": "...",
+    "category": "...",
+    "priority": "...",
+    "field": "..."
+  }}
+}}
+"""
+# =========================
+# LLM CALL
+# =========================
+def call_llm(prompt):
+    completion = client.chat.completions.create(
+        model=os.getenv("MODEL_NAME"),
+        messages=[{"role": "user", "content": prompt}],
+        temperature=0.2,
+        response_format={"type": "json_object"}
+    )
+    return completion.choices[0].message.content.strip()
+# =========================
+# PARSER (STRICT)
+# =========================
+def parse_output(text):
+    try:
+        start = text.find("{")
+        end = text.rfind("}") + 1
+        parsed = json.loads(text[start:end])
+        action = parsed.get("action")
+        if not action or "type" not in action:
+            return None
+        return action
+    except:
+        return None
+# =========================
+# FALLBACK (CRITICAL)
+# =========================
+def fallback_policy(obs):
+    msg = obs["customer_message"].lower()
+    known = obs.get("known_info", {})
+    required = obs.get("required", [])
+    # classify once
+    if "category" not in known:
+        if "refund" in msg or "charged" in msg:
+            return {"type": "classify", "category": "billing", "priority": "high"}
+        if "delivery" in msg or "order" in msg:
+            return {"type": "classify", "category": "delivery", "priority": "high"}
+        return {"type": "classify", "category": "technical", "priority": "medium"}
+    # ask missing (🔥 critical)
+    missing = [f for f in required if f not in known]
+    if missing:
+        return {"type": "ask_info", "field": missing[0]}
+    return {"type": "resolve"}
+# =========================
+# VALIDATION
+# =========================
+def is_valid_action(action, valid_actions):
+    if not action or "type" not in action:
+        return False
+    valid_types = [a["type"] for a in valid_actions]
+    if action["type"] not in valid_types:
+        return False
+    if action["type"] == "ask_info":
+        valid_fields = [a["field"] for a in valid_actions if a["type"] == "ask_info"]
+        return action.get("field") in valid_fields
+    if action["type"] == "classify":
+        return "category" in action and "priority" in action
+    return True
+# =========================
+# ACTION SELECTOR
+# =========================
+def get_action(obs, valid_actions):
+    #known = obs.get("known_info", {})
+    # HARD GUARD: prevent re-classification
+    #if "category" in known:
+    #    valid_actions = [a for a in valid_actions if a["type"] != "classify"]
+    known = obs.get("known_info", {})
+    required = obs.get("required", [])
+    missing = [f for f in required if f not in known]
+    # HARD OVERRIDE (prevents LLM mistakes)
+    if "category" in known:
+        if missing:
+            return {"type": "ask_info", "field": missing[0]}
+        else:
+            return {"type": "resolve"}
+    prompt = build_prompt(obs, valid_actions)
+    for _ in range(2):  # retry loop
+        try:
+            output = call_llm(prompt)
+            action = parse_output(output)
+            if is_valid_action(action, valid_actions):
+                return action
+        except Exception:
+            time.sleep(0.5)
+    # fallback if LLM fails
+    return fallback_policy(obs)
+# =========================
+# RUN
+# =========================
+def run_agent():
+    env = CustomerSupportEnv()
+    obs = env.reset()
+    done = False
+    while not done:
+        valid_actions = [
+            {"type": "ask_info", "field": "order_id"},
+            {"type": "ask_info", "field": "account_email"},
+            {"type": "ask_info", "field": "device_type"},
+            {"type": "ask_info", "field": "browser"},
+            {"type": "resolve"},
+            {"type": "classify"},
+        ]
+        action = get_action(obs, valid_actions)
+        obs, reward, done, info = env.step(action)
+        print(f"\nOBS: {obs}")
+        print(f"\nACTION: {action}")
+        print(f"\nREWARD: {reward}")
+        print(f"\nDONE: {done}")
+    #print("FINAL:", info)
+    print(f"\nFINAL: {info if info else 'No info returned'}")
+    print(f"\nMETRICS: {env.get_metrics()}")
+if __name__ == "__main__":
+    run_agent()

agent_py_output.txt ADDED Viewed

	@@ -0,0 +1,130 @@

+#agent.py output
+output with prompt - 04032026:
+prompt = f"""
+You are a customer support agent.
+STRICT RULES:
+- If any required info is missing → use ask_info
+- Only resolve AFTER all required info is collected
+Return ONLY JSON.
+Actions:
+1. ask_info → {{"type": "ask_info", "field": "..."}}
+2. resolve → {{"type": "resolve"}}
+Allowed fields: account_email, order_id, device_type, browser
+Observation:
+{observation}
+"""
+Sending request...
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {}, 'missing_info': ['order_id'], 'status': 'open', 'step_count': 0, 'remaining_steps': 10}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'order_id'}
+🎯 REWARD: 0.3
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 1, 'remaining_steps': 9}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.1
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 2, 'remaining_steps': 8}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.1
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 3, 'remaining_steps': 7}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.1
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 4, 'remaining_steps': 6}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.1
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 5, 'remaining_steps': 5}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.1
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 6, 'remaining_steps': 4}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.1
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 7, 'remaining_steps': 3}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.1
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 8, 'remaining_steps': 2}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.1
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T11', 'customer_message': "I didn't receive my order but it shows delivered.", 'history': [], 'known_info': {'order_id': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 9, 'remaining_steps': 1}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: -0.8
+✅ DONE: True
+🏁 FINAL INFO: {'final_score': 0.3}
+output with prompt - 04032026:
+prompt = f"""
+You are a customer support agent.
+STRICT RULES:
+1. If missing_info list is NOT empty → you MUST ask for ONE of those fields
+2. If missing_info list is EMPTY → you MUST resolve
+3. NEVER ask for a field that is NOT in missing_info
+4. NEVER repeat asking for the same field
+Return ONLY JSON.
+Actions:
+- ask_info → {{"type": "ask_info", "field": "..."}}
+- resolve → {{"type": "resolve"}}
+Observation:
+{observation}
+"""
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+📥 OBS: {'ticket_id': 'T10', 'customer_message': 'Something is wrong with my account.', 'history': [], 'known_info': {}, 'missing_info': ['account_email'], 'status': 'open', 'step_count': 0, 'remaining_steps': 10}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'ask_info', 'field': 'account_email'}
+🎯 REWARD: 0.3
+✅ DONE: False
+📥 OBS: {'ticket_id': 'T10', 'customer_message': 'Something is wrong with my account.', 'history': [], 'known_info': {'account_email': 'sample_value'}, 'missing_info': [], 'status': 'open', 'step_count': 1, 'remaining_steps': 9}
+📡 Calling Groq...
+🧠 ACTION: {'type': 'resolve'}
+🎯 REWARD: 1.7
+✅ DONE: True
+🏁 FINAL INFO: {'final_score': 0.7}

agent_rule_based.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# agent_rule_based.py
+def get_action(obs):
+#def act(obs):
+    known = obs.get("known_info", {})
+    required_full = obs.get("required_info_full", [])
+    # 1. classify first
+    if "category" not in known or "priority" not in known:
+        return {"type": "classify"}
+    # 2. collect missing info
+    missing = [f for f in required_full if f not in known]
+    if len(missing) > 0:
+        return {"type": "ask_info", "field": missing[0]}
+    # 3. resolve only when complete
+    return {"type": "resolve"}

app/__init__.py ADDED Viewed

File without changes

app/dataset.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# app/dataset.py
+TICKETS = [
+    # Billing Issues
+    {
+        "ticket_id": "T1",
+        "customer_message": "I was charged twice for my order #1234. Please refund.",
+        "category": "billing",
+        "priority": "high",
+        "required_info": ["order_id"]
+    },
+    {
+        "ticket_id": "T2",
+        "customer_message": "I want to cancel my subscription and get a refund.",
+        "category": "billing",
+        "priority": "medium",
+        "required_info": ["account_email"]
+    },
+    {
+        "ticket_id": "T3",
+        "customer_message": "Why was I billed after cancelling my plan?",
+        "category": "billing",
+        "priority": "high",
+        "required_info": ["account_email"]
+    },
+    {
+        "ticket_id": "T20",
+        "customer_message": "I was charged twice and want a refund.",
+        "category": "billing",
+        "priority": "high",
+        "required_info": ["order_id", "account_email"]
+    },
+    # Technical Issues
+    {
+        "ticket_id": "T4",
+        "customer_message": "I can't log into my account. It says invalid credentials.",
+        "category": "technical",
+        "priority": "high",
+        "required_info": ["account_email"]
+    },
+    {
+        "ticket_id": "T5",
+        "customer_message": "The app crashes every time I upload a file.",
+        "category": "technical",
+        "priority": "medium",
+        "required_info": ["device_type"]
+    },
+    {
+        "ticket_id": "T6",
+        "customer_message": "Page not loading on checkout.",
+        "category": "technical",
+        "priority": "high",
+        "required_info": ["browser"]
+    },
+    {
+        "ticket_id": "T21",
+        "customer_message": "App crashes when I try to checkout.",
+        "category": "technical",
+        "priority": "high",
+        "required_info": ["device_type", "browser"]
+    },
+    {
+        "ticket_id": "T12",
+        "customer_message": "App is very slow lately.",
+        "category": "technical",
+        "priority": "low",
+        "required_info": ["device_type"]
+    },
+    # Account Issues
+    {
+        "ticket_id": "T7",
+        "customer_message": "I forgot my password and can't reset it.",
+        "category": "account",
+        "priority": "medium",
+        "required_info": ["account_email"]
+    },
+    {
+        "ticket_id": "T8",
+        "customer_message": "My account got locked for no reason.",
+        "category": "account",
+        "priority": "high",
+        "required_info": ["account_email"]
+    },
+    {
+        "ticket_id": "T9",
+        "customer_message": "How do I change my registered email address?",
+        "category": "account",
+        "priority": "low",
+        "required_info": ["account_email"]
+    },
+    # Edge Cases
+    {
+        "ticket_id": "T10",
+        "customer_message": "Something is wrong with my account.",
+        "category": "other",
+        "priority": "medium",
+        "required_info": ["account_email"]
+    },
+    {
+        "ticket_id": "T11",
+        "customer_message": "I didn't receive my order but it shows delivered.",
+        "category": "other",
+        "priority": "high",
+        "required_info": ["order_id"]
+    }
+    ]

app/env.py ADDED Viewed

	@@ -0,0 +1,244 @@

+# app/env.py
+from typing import Tuple, Dict, Any
+from app.models import Observation, Action, Reward
+from app.dataset import TICKETS
+import random
+import sys
+class CustomerSupportEnv:
+    # INTERNAL STATE REPRESENTATION -
+    def _get_observation(self):
+        total_required = len(self.ticket.get("required_info", []))
+        collected_required = sum(
+            1 for f in self.ticket.get("required_info", [])
+            if f in self.state_data["collected_info"]
+        )
+        info_progress = collected_required / max(1, total_required)
+        return {
+        "ticket_id": self.ticket["ticket_id"],
+        "customer_message": self.ticket["customer_message"],
+        "history": [],
+        "known_info": self.state_data["collected_info"],
+        "required": self.ticket.get("required_info", []),  # FULL requirement space (agent uses this)
+        #"remaining_required": self.state_data["required_info"],   # OPTIONAL (env/debug/analysis); agent_llm shouldn't use this directly - it should infer from known_info + customer_message
+        "missing_required": [
+            f for f in self.ticket.get("required_info", [])
+            if f not in self.state_data["collected_info"]
+        ],
+        #"info_progress": len(self.state_data["collected_info"]) / 3,
+        "info_progress": info_progress,
+        "status": self.state_data["status"],
+        "step_count": self.state_data["steps_taken"],
+        "remaining_steps": self.max_steps - self.state_data["steps_taken"],
+        }
+    def __init__(self):
+        self.state_data = None
+        self.max_steps = 10
+        self.last_action = None
+        # ✅ METRICS TRACKING
+        self.episode_stats = []
+    def reset(self):
+        self.last_action = None
+        # ✅ episode tracking
+        self.current_episode_reward = 0.0
+        self.current_steps = 0
+        self.success = False
+        self.ticket = random.choice(TICKETS)
+        self.state_data = {
+            "ticket_id": self.ticket["ticket_id"],
+            "customer_message": self.ticket["customer_message"],
+            "history": [],
+            "status": "open",
+            "priority": None,
+            "category": None,
+            "required_info": self.ticket["required_info"].copy(),
+            "collected_info": {},
+            "steps_taken": 0,
+            "max_steps": self.max_steps,
+            "ground_truth": self.ticket
+        }
+        return self._get_observation()
+    def step(self, action: dict):
+        reward = 0.0
+        done = False
+        #info = {}
+        info = {
+        "final_score": self._compute_final_score() if done else None
+        }
+        collected = self.state_data["collected_info"]
+        required = self.state_data["required_info"]
+        gt = self.ticket
+        # -----------------------
+        # STEP PENALTY
+        # -----------------------
+        reward -= 0.05
+        action_type = action.get("type")
+        # -----------------------
+        # REPEAT PENALTY
+        # -----------------------
+        if self.last_action == action:
+            reward -= 0.2
+        # -----------------------
+        # CLASSIFY
+        # -----------------------
+        if action_type == "classify":
+            collected["category"] = gt["category"]
+            collected["priority"] = gt["priority"]
+            reward += 0.2
+        # -----------------------
+        # ASK INFO
+        # -----------------------
+        elif action_type == "ask_info":
+            field = action.get("field")
+            if field not in collected:
+                collected[field] = "sample_value"
+                reward += 0.3
+                if field in required:
+                    required.remove(field)
+            else:
+                reward -= 0.3
+        # -----------------------
+        # RESOLVE
+        # -----------------------
+        elif action_type == "resolve":
+            done = True
+            final_score = 0.0
+            # classification
+            if collected.get("category") == gt.get("category"):
+                final_score += 0.3
+            if collected.get("priority") == gt.get("priority"):
+                final_score += 0.2
+            # required info
+            required_fields = gt.get("required_info", [])
+            if all(f in collected for f in required_fields):
+                final_score += 0.3
+                self.success = True
+            else:
+                reward -= 0.5
+            # resolve bonus
+            final_score += 0.2
+            reward += final_score
+            # efficiency bonus
+            optimal_steps = len(required_fields) + 1
+            if self.state_data["steps_taken"] <= optimal_steps:
+                reward += 0.3
+            # episode stats
+            collected_required = sum(1 for f in required_fields if f in collected)
+            episode_data = {
+                "success": self.success,
+                "steps": self.state_data["steps_taken"],
+                "reward": reward,
+                "info_efficiency": collected_required / max(1, len(required_fields))
+            }
+            self.episode_stats.append(episode_data)
+            info = {
+                "final_score": final_score,
+                "task_success": self.success,
+                "collected_info": collected
+            }
+            self.last_action = action
+            return self._get_observation(), reward, done, info
+        # -----------------------
+        # INVALID
+        # -----------------------
+        else:
+            reward -= 0.3
+        # -----------------------
+        # STEP UPDATE
+        # -----------------------
+        self.state_data["steps_taken"] += 1
+        self.current_steps += 1
+        # -----------------------
+        # MAX STEP TERMINATION
+        # -----------------------
+        if self.state_data["steps_taken"] >= self.state_data["max_steps"]:
+            done = True
+            reward -= 2.0
+            # record failure episode
+            self.episode_stats.append({
+                "success": False,
+                "steps": self.state_data["steps_taken"],
+                "reward": reward,
+                "info_efficiency": 0
+            })
+            info = {
+                "final_score": 0.0,
+                "task_success": False
+            }
+        # -----------------------
+        # SAVE STATE
+        # -----------------------
+        self.last_action = action
+        self.current_episode_reward += reward
+        return self._get_observation(), reward, done, info
+    def state(self) -> Dict:
+        return self.state_data
+    def get_metrics(self):
+        if not self.episode_stats:
+            return {}
+        total = len(self.episode_stats)
+        success_rate = sum(e["success"] for e in self.episode_stats) / total
+        avg_steps = sum(e["steps"] for e in self.episode_stats) / total
+        avg_reward = sum(e["reward"] for e in self.episode_stats) / total
+        info_eff = sum(e["info_efficiency"] for e in self.episode_stats) / total
+        return {
+            "success_rate": round(success_rate, 3),
+            "avg_steps": round(avg_steps, 3),
+            "avg_reward": round(avg_reward, 3),
+            "info_efficiency": round(info_eff, 3)
+        }

app/graders.py ADDED Viewed

	@@ -0,0 +1,45 @@

+# app/graders.py
+def grade_task1(state):
+    score = 0.0
+    gt = state["ground_truth"]
+    if state["category"] == gt["category"]:
+        score += 0.5
+    if state["priority"] == gt["priority"]:
+        score += 0.5
+    return score
+def grade_task2(state):
+    required = set(state["ground_truth"]["required_info"])
+    collected = set(state["collected_info"].keys())
+    if not required:
+        return 1.0
+    return len(collected & required) / len(required)
+def grade_task3(state):
+    score = 0.0
+    gt = state["ground_truth"]
+    # classification
+    if state["category"] == gt["category"]:
+        score += 0.3
+    # info collection
+    required = set(gt["required_info"])
+    collected = set(state["collected_info"].keys())
+    if required:
+        score += 0.3 * (len(collected & required) / len(required))
+    # resolution
+    if state["status"] == "resolved":
+        score += 0.4
+    return score

app/models.py ADDED Viewed

	@@ -0,0 +1,26 @@

+#app/models.py
+from pydantic import BaseModel
+from typing import List, Dict, Optional
+class Observation(BaseModel):
+    ticket_id: str
+    customer_message: str
+    history: List[str]
+    known_info: Dict
+    #missing_info: List[str]
+    status: str
+    step_count: int
+    remaining_steps: int
+class Action(BaseModel):
+    action_type: str
+    content: Optional[str] = ""
+    metadata: Optional[Dict] = {}
+class Reward(BaseModel):
+    value: float
+    reason: str

app/tasks.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# app/tasks.py
+TASKS = {
+    "task1": {
+        "description": "Classify ticket category and priority",
+        "max_steps": 2
+    },
+    "task2": {
+        "description": "Gather required information",
+        "max_steps": 5
+    },
+    "task3": {
+        "description": "Full resolution workflow",
+        "max_steps": 10
+    }
+}

app/test_env.py ADDED Viewed

	@@ -0,0 +1,134 @@

+"""
+// Testing, conceptually
+Test	            What it verifies
+ask_info	    info collection logic
+resolve (after)	    success path
+resolve (before)	penalty logic
+reward values	correctness of shaping
+done flag	    termination logic
+> Detailed test flow
+- ask_info
+Conceptually, checks whether agent can reduce uncerainiy by asking the correct question
+-- The environment is partially observable — the agent doesn’t know everything upfront --
+Real-world analogy:
+Support agent asking the client of their email
+- resolve (after)
+Conceptually, checks:
+“Can the agent complete the task after gathering required info?”
+This is goal completion
+- resolve (before)
+Conceptually, checks:
+“Does the system penalize shortcut / lazy behavior?”
+Without this:
+Agent would always jump to resolve
+- Reward values
+Conceptually, checks:
+“Is the agent receiving useful learning signals?”
+With the reward-mechanism implemented:
+Behavior	           Reward
+correct info	        +0.3
+correct resolution	    +1.0
+final score	        +0.0 → +1.0
+wrong action	      negative
+ technically, we validate:
+reward accumulation works
+no random jumps
+consistent scaling
+This is critical, because:
+. Bad reward = bad agent/system
+. Good reward = learnable system
+- done flag
+Conceptually, checks:
+“Does the environment know when the episode ends?”
+- no score field in /reset, since at reset:
+Episode has not happened yet
+→ No performance → No score
+These tests collectively validate:
+MDP (Markov Decision Process) -> (State, Action, Reward, Transition, Termination) -> Thorough RL Environment
+Component	    Verified by
+State	          reset
+Action	    ask_info / resolve
+Reward	        reward tests
+Transition	    state updates
+Termination	    done flag
+// Expected behavior
+Good Agent Flow:
+Reset
+→ ask_info (+0.3)
+→ resolve (+1.0 + bonus)
+Bad Agent Flow:
+Reset
+→ resolve (-0.3)
+→ ask random info (-0.1)
+→ timeout (-1.0)
+"""
+import requests
+BASE = "http://127.0.0.1:8001"
+# Reset
+r = requests.get(f"{BASE}/reset")
+print(f"\nRESET: \n\n{r.json()}")
+# Ask info
+r = requests.post(f"{BASE}/step", json={
+    "type": "ask_info",
+    "field": "account_email"
+})
+#print("ASK INFO:", r.json())
+print(f"\nASK INFO: \n\n{r.json()}")
+# Resolve
+r = requests.post(f"{BASE}/step", json={
+    "type": "resolve"
+})
+print(f"\nRESOLVE: \n\n{r.json()}")
+#print(f"\n"RESOLVE:", {r.json()})

inference.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# inference.py
+import os
+from agent_llm import get_action
+from app.env import CustomerSupportEnv
+def format_action(action: dict) -> str:
+    """Convert action dict → string"""
+    if not action:
+        return "null"
+    return str(action).replace("\n", "").replace("  ", " ")
+def main():
+    env = CustomerSupportEnv()
+    obs = env.reset()
+    #model_name = os.getenv("MODEL_NAME", "unknown-model")
+    model_name="llama-3.1-8b-instant"
+    task_name = "customer-support"
+    benchmark = "openenv"
+    step_count = 0
+    rewards = []
+    success = False
+    # =========================
+    # START
+    # =========================
+    print(f"[START] task={task_name} env={benchmark} model={model_name}")
+    try:
+        done = False
+        while not done:
+            valid_actions = [
+                {"type": "ask_info", "field": "order_id"},
+                {"type": "ask_info", "field": "account_email"},
+                {"type": "ask_info", "field": "device_type"},
+                {"type": "ask_info", "field": "browser"},
+                {"type": "resolve"},
+                {"type": "classify"},
+            ]
+            action = get_action(obs, valid_actions)
+            next_obs, reward, done, info = env.step(action)
+            step_count += 1
+            rewards.append(reward)
+            # =========================
+            # STEP
+            # =========================
+            print(
+                f"[STEP] step={step_count} "
+                f"action={format_action(action)} "
+                f"reward={reward:.2f} "
+                f"done={'true' if done else 'false'} "
+                f"error=null"
+            )
+            obs = next_obs
+        # success from env
+        success = info.get("task_success", False)
+    except Exception as e:
+        # still must print END
+        print(
+            f"[STEP] step={step_count+1} "
+            f"action=null reward=0.00 done=true error={str(e)}"
+        )
+    finally:
+        # =========================
+        # END
+        # =========================
+        rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+        print(
+            f"[END] success={'true' if success else 'false'} "
+            f"steps={step_count} "
+            f"rewards={rewards_str}"
+        )
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi
+uvicorn
+pydantic
+openai
+groq
+python-dotenv
+pyyaml
+requests

server/__init__.py ADDED Viewed

File without changes

server/__init__.py:Zone.Identifier ADDED Viewed

Binary file (25 Bytes). View file

server/main.py ADDED Viewed

	@@ -0,0 +1,41 @@

+# server/main.py
+from fastapi import FastAPI
+from app.env import CustomerSupportEnv
+import sys
+import os
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
+app = FastAPI()
+env = CustomerSupportEnv()
+@app.get("/reset")
+def reset():
+    return env.reset()
+"""
+@app.post("/step")
+def step(action: dict):
+    return env.step(action)
+"""
+@app.post("/step")
+def step(action: dict):
+    obs, reward, done, info = env.step(action)
+    return {
+        "observation": obs,
+        "reward": reward,
+        "done": done,
+        "info": info
+    }
+@app.get("/state")
+def state():
+    return env.state()
+@app.get("/health")
+def health():
+    return {"status": "ok"}

test_rule_agent.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# test_rule_agent.py
+from app.env import CustomerSupportEnv
+from agent_rule_based import get_action
+env = CustomerSupportEnv()
+for i in range(5):
+    obs = env.reset()
+    done = False
+    print(f"\n===== EPISODE {i+1} =====")
+    while not done:
+        action = get_action(obs)
+        obs, reward, done, info = env.step(action)
+    print("FINAL:", info)
+    print(env.get_metrics())