Spaces:

prashantmatlani
/

csa01

Sleeping

prashantmatlani commited on 27 days ago

Commit

bf34481

1 Parent(s): bce7dd2

updated yaml, added graders to root, modified inference

Files changed (3) hide show

graders.py ADDED Viewed

+# graders.py
+def get_info_efficiency(env):
+    if hasattr(env, "episode_stats") and env.episode_stats:
+        return env.episode_stats[-1].get("info_efficiency", 0)
+    return 0
+def grade_easy(env, trajectory=None, final_state=None):
+    rewards = [step.get("reward", 0) for step in (trajectory or [])]
+    score = 0.3 + 0.1 * len(rewards)
+    return max(0.0, min(1.0, score))
+def grade_medium(env, trajectory=None, final_state=None):
+    info_eff = get_info_efficiency(env)
+    score = 0.5 * info_eff
+    return max(0.0, min(1.0, score))
+def grade_hard(env, trajectory=None, final_state=None):
+    info_eff = get_info_efficiency(env)
+    success = False
+    steps = len(trajectory or [])
+    if hasattr(env, "episode_stats") and env.episode_stats:
+        success = env.episode_stats[-1].get("success", False)
+    score = (
+        0.5 * (1 if success else 0) +
+        0.3 * info_eff +
+        0.2 * (1 / (1 + steps))
+    )
+    return max(0.0, min(1.0, score))

inference.py CHANGED Viewed

@@ -5,7 +5,9 @@ import os
 import json
 from agent_llm import get_action
 from app.env import CustomerSupportEnv
 # =========================
 # TASK DEFINITIONS
@@ -16,7 +18,7 @@ TASKS = [
     {"name": "hard-efficient-resolution", "type": "hard"},
 ]
 # =========================
 # GRADERS (DETERMINISTIC)
 # =========================
@@ -45,7 +47,7 @@ def grade_hard(env, success, steps, rewards):
     )
     return max(0.01, min(0.99, score))
 def compute_score(task_type, env, success, steps, rewards):
@@ -137,7 +139,17 @@ def run_single_task(task):
     # =========================
     # SCORE USING TASK-SPECIFIC GRADER
     # =========================
-    score = compute_score(task_type, env, success, step_count, rewards)
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)

 import json
 from agent_llm import get_action
 from app.env import CustomerSupportEnv
+from graders import grade_easy, grade_medium, grade_hard
+import sys
 # =========================
 # TASK DEFINITIONS
     {"name": "hard-efficient-resolution", "type": "hard"},
 ]
+"""
 # =========================
 # GRADERS (DETERMINISTIC)
 # =========================
     )
     return max(0.01, min(0.99, score))
+"""
 def compute_score(task_type, env, success, steps, rewards):
     # =========================
     # SCORE USING TASK-SPECIFIC GRADER
     # =========================
+    #score = compute_score(task_type, env, success, step_count, rewards)
+    if task_type == "easy":
+        score = grade_easy(env)
+    elif task_type == "medium":
+        score = grade_medium(env)
+    elif task_type == "hard":
+        score = grade_hard(env)
+    else:
+        score = 0.5
     rewards_str = ",".join(f"{r:.2f}" for r in rewards)

openenv.yaml CHANGED Viewed

@@ -29,6 +29,26 @@ reward_range:
   min: -1.0
   max: 2.0
 # =========================
 # ACTION SPACE
 # =========================
@@ -111,6 +131,4 @@ termination:
 tags:
   - customer-support
   - goal-oriented
-  - information-gathering
-difficulty: medium

   min: -1.0
   max: 2.0
+# =========================
+# TASKS + GRADERS
+# =========================
+tasks:
+  - id: easy-info-collection
+    description: Collect at least some relevant information
+    difficulty: easy
+    grader: graders.grade_easy
+  - id: medium-complete-info
+    description: Collect all required information efficiently
+    difficulty: medium
+    grader: graders.grade_medium
+  - id: hard-efficient-resolution
+    description: Fully resolve ticket with optimal steps
+    difficulty: hard
+    grader: graders.grade_hard
 # =========================
 # ACTION SPACE
 # =========================
 tags:
   - customer-support
   - goal-oriented
+  - information-gathering