prashantmatlani commited on
Commit
c4b20ec
·
1 Parent(s): dc44de4

created tasks.py, dated env, inference

Browse files
Files changed (4) hide show
  1. app/env.py +7 -1
  2. graders.py +24 -14
  3. inference.py +12 -1
  4. tasks.py +22 -0
app/env.py CHANGED
@@ -6,9 +6,12 @@ from app.models import Observation, Action, Reward
6
  from app.dataset import TICKETS
7
  import random
8
  from graders import grade_easy, grade_medium, grade_hard
 
9
 
10
  import sys
11
 
 
 
12
  class CustomerSupportEnv:
13
 
14
  # OBTAIN TASKS FROM GRADERS.PY
@@ -273,4 +276,7 @@ class CustomerSupportEnv:
273
  "avg_steps": round(avg_steps, 3),
274
  "avg_reward": round(avg_reward, 3),
275
  "info_efficiency": round(info_eff, 3)
276
- }
 
 
 
 
6
  from app.dataset import TICKETS
7
  import random
8
  from graders import grade_easy, grade_medium, grade_hard
9
+ from tasks import TASKS
10
 
11
  import sys
12
 
13
+ AVAILABLE_TASKS = TASKS
14
+
15
  class CustomerSupportEnv:
16
 
17
  # OBTAIN TASKS FROM GRADERS.PY
 
276
  "avg_steps": round(avg_steps, 3),
277
  "avg_reward": round(avg_reward, 3),
278
  "info_efficiency": round(info_eff, 3)
279
+ }
280
+
281
+ def get_tasks():
282
+ return AVAILABLE_TASKS
graders.py CHANGED
@@ -7,31 +7,41 @@ def get_info_efficiency(env):
7
  return 0
8
 
9
 
10
- def grade_easy(env, trajectory=None, final_state=None):
11
- rewards = [step.get("reward", 0) for step in (trajectory or [])]
12
- score = 0.3 + 0.1 * len(rewards)
13
- return max(0.0, min(1.0, score))
14
 
 
 
 
15
 
16
- def grade_medium(env, trajectory=None, final_state=None):
 
 
 
17
  info_eff = get_info_efficiency(env)
18
  score = 0.5 * info_eff
19
- return max(0.0, min(1.0, score))
20
 
 
 
21
 
22
- def grade_hard(env, trajectory=None, final_state=None):
23
- info_eff = get_info_efficiency(env)
24
 
25
- success = False
26
- steps = len(trajectory or [])
27
 
28
- if hasattr(env, "episode_stats") and env.episode_stats:
29
- success = env.episode_stats[-1].get("success", False)
30
 
31
  score = (
32
  0.5 * (1 if success else 0) +
33
  0.3 * info_eff +
34
- 0.2 * (1 / (1 + steps))
35
  )
36
 
37
- return max(0.0, min(1.0, score))
 
 
 
 
 
 
 
 
 
7
  return 0
8
 
9
 
10
+ def grade_easy(env, success=None, steps=None, rewards=None):
11
+ score = 0.3 + 0.1 * (len(rewards) if rewards else 0)
 
 
12
 
13
+ #print(f"\nrewards: {rewards}")
14
+ #print(f"\nlen rewards: {len(rewards)}")
15
+ #print(f"\nscore: {score}")
16
 
17
+ return max(0.01, min(0.99, score))
18
+
19
+
20
+ def grade_medium(env, success=None, steps=None, rewards=None):
21
  info_eff = get_info_efficiency(env)
22
  score = 0.5 * info_eff
 
23
 
24
+ #print(f"\ninfo_eff: {info_eff}")
25
+ #print(f"\nscore: {score}")
26
 
27
+ return max(0.01, min(0.99, score))
 
28
 
 
 
29
 
30
+ def grade_hard(env, success=None, steps=None, rewards=None):
31
+ info_eff = get_info_efficiency(env)
32
 
33
  score = (
34
  0.5 * (1 if success else 0) +
35
  0.3 * info_eff +
36
+ 0.2 * (1 / (1 + (steps or 1)))
37
  )
38
 
39
+ #print(f"\nsteps: {steps}")
40
+ #print(f"\ninfo_eff: {info_eff}")
41
+ #print(f"\nlen trajectory: {len(trajectory or [])}")
42
+ #print(f"\nscore: {score}")
43
+
44
+ return max(0.01, min(0.99, score))
45
+
46
+
47
+
inference.py CHANGED
@@ -6,9 +6,11 @@ import json
6
  from agent_llm import get_action
7
  from app.env import CustomerSupportEnv
8
  from graders import grade_easy, grade_medium, grade_hard
 
9
 
10
  import sys
11
 
 
12
  # =========================
13
  # TASK DEFINITIONS
14
  # =========================
@@ -17,6 +19,7 @@ TASKS = [
17
  {"name": "medium-complete-info", "type": "medium"},
18
  {"name": "hard-efficient-resolution", "type": "hard"},
19
  ]
 
20
 
21
  """
22
  # =========================
@@ -141,6 +144,14 @@ def run_single_task(task):
141
  # =========================
142
  #score = compute_score(task_type, env, success, step_count, rewards)
143
 
 
 
 
 
 
 
 
 
144
  if task_type == "easy":
145
  score = grade_easy(env)
146
  elif task_type == "medium":
@@ -149,7 +160,7 @@ def run_single_task(task):
149
  score = grade_hard(env)
150
  else:
151
  score = 0.5
152
-
153
 
154
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
155
 
 
6
  from agent_llm import get_action
7
  from app.env import CustomerSupportEnv
8
  from graders import grade_easy, grade_medium, grade_hard
9
+ from tasks import TASKS
10
 
11
  import sys
12
 
13
+ """
14
  # =========================
15
  # TASK DEFINITIONS
16
  # =========================
 
19
  {"name": "medium-complete-info", "type": "medium"},
20
  {"name": "hard-efficient-resolution", "type": "hard"},
21
  ]
22
+ """
23
 
24
  """
25
  # =========================
 
144
  # =========================
145
  #score = compute_score(task_type, env, success, step_count, rewards)
146
 
147
+ grader = task.get("grader")
148
+
149
+ if grader:
150
+ score = grader(env, success, step_count, rewards)
151
+ else:
152
+ score = 0.5
153
+
154
+ """
155
  if task_type == "easy":
156
  score = grade_easy(env)
157
  elif task_type == "medium":
 
160
  score = grade_hard(env)
161
  else:
162
  score = 0.5
163
+ """
164
 
165
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
166
 
tasks.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # tasks.py
3
+
4
+ from graders import grade_easy, grade_medium, grade_hard
5
+
6
+ TASKS = [
7
+ {
8
+ "name": "easy-info-collection",
9
+ "type": "easy",
10
+ "grader": grade_easy,
11
+ },
12
+ {
13
+ "name": "medium-complete-info",
14
+ "type": "medium",
15
+ "grader": grade_medium,
16
+ },
17
+ {
18
+ "name": "hard-efficient-resolution",
19
+ "type": "hard",
20
+ "grader": grade_hard,
21
+ },
22
+ ]