prashantmatlani commited on
Commit
bce7dd2
·
1 Parent(s): 0e2d0ce

modified task definitions, graders sourcing episode data

Browse files
Files changed (1) hide show
  1. inference.py +86 -41
inference.py CHANGED
@@ -2,18 +2,68 @@
2
  # inference.py
3
 
4
  import os
 
5
  from agent_llm import get_action
6
  from app.env import CustomerSupportEnv
7
 
8
 
9
- #"""
10
- #def format_action(action: dict) -> str:
11
- #"""Convert action dict → string"""
12
- # if not action:
13
- # return "null"
14
- # return str(action).replace("\n", "").replace(" ", " ")
15
- #"""
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def format_action(action: dict) -> str:
18
  if not action:
19
  return "null"
@@ -29,23 +79,15 @@ def format_action(action: dict) -> str:
29
 
30
  return str(action)
31
 
32
- def compute_score(success, steps, rewards):
33
- """
34
- Continuous score in (0,1)
35
- """
36
- avg_reward = sum(rewards) / max(1, len(rewards))
37
 
38
- score = (
39
- 0.5 * (1.0 if success else 0.0) +
40
- 0.3 * (1 / (1 + steps)) +
41
- 0.2 * max(0, min(1, avg_reward))
42
- )
43
-
44
- # Clamp to (0,1) but not exact
45
- return max(0.01, min(0.99, score))
46
 
 
 
47
 
48
- def run_single_task(task_name):
49
  env = CustomerSupportEnv()
50
  obs = env.reset()
51
 
@@ -92,7 +134,10 @@ def run_single_task(task_name):
92
  f"action=null reward=0.00 done=true error={str(e)}"
93
  )
94
 
95
- score = compute_score(success, step_count, rewards)
 
 
 
96
 
97
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
98
 
@@ -104,11 +149,19 @@ def run_single_task(task_name):
104
  f"rewards={rewards_str}"
105
  )
106
 
107
- print(
108
- f"[GRADER] task={task_name} score={score:.2f}"
109
- )
110
-
111
-
 
 
 
 
 
 
 
 
112
  def main():
113
 
114
  model_name = os.getenv("MODEL_NAME", "unknown-model")
@@ -116,21 +169,13 @@ def main():
116
 
117
  print(f"[CONFIG] api_base_url={api_base_url}")
118
 
119
- task_name = "customer-support"
120
- benchmark = "openenv"
121
 
122
- print(f"[START] task={task_name} env={benchmark} model={model_name}")
123
-
124
- # =========================
125
- # RUN MULTIPLE TASKS (IMPORTANT)
126
- # =========================
127
- NUM_TASKS = 3
128
-
129
- for i in range(NUM_TASKS):
130
- #run_single_task(task_id=i + 1)
131
- task_name = f"customer-support-{i+1}"
132
- run_single_task(task_name)
133
 
134
 
135
  if __name__ == "__main__":
136
- main()
 
 
2
  # inference.py
3
 
4
  import os
5
+ import json
6
  from agent_llm import get_action
7
  from app.env import CustomerSupportEnv
8
 
9
 
10
+ # =========================
11
+ # TASK DEFINITIONS
12
+ # =========================
13
+ TASKS = [
14
+ {"name": "easy-info-collection", "type": "easy"},
15
+ {"name": "medium-complete-info", "type": "medium"},
16
+ {"name": "hard-efficient-resolution", "type": "hard"},
17
+ ]
18
 
19
+
20
+ # =========================
21
+ # GRADERS (DETERMINISTIC)
22
+ # =========================
23
+ def get_info_efficiency(env):
24
+ if env.episode_stats:
25
+ return env.episode_stats[-1].get("info_efficiency", 0)
26
+ return 0
27
+
28
+ def grade_easy(env, success, steps, rewards):
29
+ # Reward asking at least something
30
+ score = 0.3 + 0.1 * len(rewards)
31
+ return max(0.01, min(0.99, score))
32
+
33
+ def grade_medium(env, success, steps, rewards):
34
+ info_eff = get_info_efficiency(env)
35
+ score = 0.5 * info_eff
36
+ return max(0.01, min(0.99, score))
37
+
38
+ def grade_hard(env, success, steps, rewards):
39
+ info_eff = get_info_efficiency(env)
40
+
41
+ score = (
42
+ 0.5 * (1 if success else 0) +
43
+ 0.3 * info_eff +
44
+ 0.2 * (1 / (1 + steps))
45
+ )
46
+
47
+ return max(0.01, min(0.99, score))
48
+
49
+
50
+ def compute_score(task_type, env, success, steps, rewards):
51
+
52
+ if task_type == "easy":
53
+ return grade_easy(env, success, steps, rewards)
54
+
55
+ elif task_type == "medium":
56
+ return grade_medium(env, success, steps, rewards)
57
+
58
+ elif task_type == "hard":
59
+ return grade_hard(env, success, steps, rewards)
60
+
61
+ return 0.5 # fallback (should never hit)
62
+
63
+
64
+ # =========================
65
+ # ACTION FORMATTER
66
+ # =========================
67
  def format_action(action: dict) -> str:
68
  if not action:
69
  return "null"
 
79
 
80
  return str(action)
81
 
 
 
 
 
 
82
 
83
+ # =========================
84
+ # RUN SINGLE TASK
85
+ # =========================
86
+ def run_single_task(task):
 
 
 
 
87
 
88
+ task_name = task["name"]
89
+ task_type = task["type"]
90
 
 
91
  env = CustomerSupportEnv()
92
  obs = env.reset()
93
 
 
134
  f"action=null reward=0.00 done=true error={str(e)}"
135
  )
136
 
137
+ # =========================
138
+ # SCORE USING TASK-SPECIFIC GRADER
139
+ # =========================
140
+ score = compute_score(task_type, env, success, step_count, rewards)
141
 
142
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
143
 
 
149
  f"rewards={rewards_str}"
150
  )
151
 
152
+ # =========================
153
+ # CRITICAL: JSON OUTPUT (GRADER SIGNAL)
154
+ # =========================
155
+ print(f"\n")
156
+ print(json.dumps({
157
+ "task": task_name,
158
+ "score": round(score, 4)
159
+ }))
160
+ print(f"\n")
161
+
162
+ # =========================
163
+ # MAIN
164
+ # =========================
165
  def main():
166
 
167
  model_name = os.getenv("MODEL_NAME", "unknown-model")
 
169
 
170
  print(f"[CONFIG] api_base_url={api_base_url}")
171
 
172
+ print(f"[START] task=customer-support env=openenv model={model_name}")
 
173
 
174
+ # RUN DISTINCT TASKS (NOT LOOP COPIES)
175
+ for task in TASKS:
176
+ run_single_task(task)
 
 
 
 
 
 
 
 
177
 
178
 
179
  if __name__ == "__main__":
180
+ main()
181
+