prashantmatlani commited on
Commit
bf34481
·
1 Parent(s): bce7dd2

updated yaml, added graders to root, modified inference

Browse files
Files changed (3) hide show
  1. graders.py +37 -0
  2. inference.py +15 -3
  3. openenv.yaml +21 -3
graders.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # graders.py
3
+
4
+ def get_info_efficiency(env):
5
+ if hasattr(env, "episode_stats") and env.episode_stats:
6
+ return env.episode_stats[-1].get("info_efficiency", 0)
7
+ return 0
8
+
9
+
10
+ def grade_easy(env, trajectory=None, final_state=None):
11
+ rewards = [step.get("reward", 0) for step in (trajectory or [])]
12
+ score = 0.3 + 0.1 * len(rewards)
13
+ return max(0.0, min(1.0, score))
14
+
15
+
16
+ def grade_medium(env, trajectory=None, final_state=None):
17
+ info_eff = get_info_efficiency(env)
18
+ score = 0.5 * info_eff
19
+ return max(0.0, min(1.0, score))
20
+
21
+
22
+ def grade_hard(env, trajectory=None, final_state=None):
23
+ info_eff = get_info_efficiency(env)
24
+
25
+ success = False
26
+ steps = len(trajectory or [])
27
+
28
+ if hasattr(env, "episode_stats") and env.episode_stats:
29
+ success = env.episode_stats[-1].get("success", False)
30
+
31
+ score = (
32
+ 0.5 * (1 if success else 0) +
33
+ 0.3 * info_eff +
34
+ 0.2 * (1 / (1 + steps))
35
+ )
36
+
37
+ return max(0.0, min(1.0, score))
inference.py CHANGED
@@ -5,7 +5,9 @@ import os
5
  import json
6
  from agent_llm import get_action
7
  from app.env import CustomerSupportEnv
 
8
 
 
9
 
10
  # =========================
11
  # TASK DEFINITIONS
@@ -16,7 +18,7 @@ TASKS = [
16
  {"name": "hard-efficient-resolution", "type": "hard"},
17
  ]
18
 
19
-
20
  # =========================
21
  # GRADERS (DETERMINISTIC)
22
  # =========================
@@ -45,7 +47,7 @@ def grade_hard(env, success, steps, rewards):
45
  )
46
 
47
  return max(0.01, min(0.99, score))
48
-
49
 
50
  def compute_score(task_type, env, success, steps, rewards):
51
 
@@ -137,7 +139,17 @@ def run_single_task(task):
137
  # =========================
138
  # SCORE USING TASK-SPECIFIC GRADER
139
  # =========================
140
- score = compute_score(task_type, env, success, step_count, rewards)
 
 
 
 
 
 
 
 
 
 
141
 
142
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
143
 
 
5
  import json
6
  from agent_llm import get_action
7
  from app.env import CustomerSupportEnv
8
+ from graders import grade_easy, grade_medium, grade_hard
9
 
10
+ import sys
11
 
12
  # =========================
13
  # TASK DEFINITIONS
 
18
  {"name": "hard-efficient-resolution", "type": "hard"},
19
  ]
20
 
21
+ """
22
  # =========================
23
  # GRADERS (DETERMINISTIC)
24
  # =========================
 
47
  )
48
 
49
  return max(0.01, min(0.99, score))
50
+ """
51
 
52
  def compute_score(task_type, env, success, steps, rewards):
53
 
 
139
  # =========================
140
  # SCORE USING TASK-SPECIFIC GRADER
141
  # =========================
142
+ #score = compute_score(task_type, env, success, step_count, rewards)
143
+
144
+ if task_type == "easy":
145
+ score = grade_easy(env)
146
+ elif task_type == "medium":
147
+ score = grade_medium(env)
148
+ elif task_type == "hard":
149
+ score = grade_hard(env)
150
+ else:
151
+ score = 0.5
152
+
153
 
154
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
155
 
openenv.yaml CHANGED
@@ -29,6 +29,26 @@ reward_range:
29
  min: -1.0
30
  max: 2.0
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  # =========================
33
  # ACTION SPACE
34
  # =========================
@@ -111,6 +131,4 @@ termination:
111
  tags:
112
  - customer-support
113
  - goal-oriented
114
- - information-gathering
115
-
116
- difficulty: medium
 
29
  min: -1.0
30
  max: 2.0
31
 
32
+ # =========================
33
+ # TASKS + GRADERS
34
+ # =========================
35
+ tasks:
36
+ - id: easy-info-collection
37
+ description: Collect at least some relevant information
38
+ difficulty: easy
39
+ grader: graders.grade_easy
40
+
41
+ - id: medium-complete-info
42
+ description: Collect all required information efficiently
43
+ difficulty: medium
44
+ grader: graders.grade_medium
45
+
46
+ - id: hard-efficient-resolution
47
+ description: Fully resolve ticket with optimal steps
48
+ difficulty: hard
49
+ grader: graders.grade_hard
50
+
51
+
52
  # =========================
53
  # ACTION SPACE
54
  # =========================
 
131
  tags:
132
  - customer-support
133
  - goal-oriented
134
+ - information-gathering