OGrohit commited on
Commit
52f75f0
·
1 Parent(s): 580afcf

Issue resolved regarding PHASE 2

Browse files
Files changed (1) hide show
  1. inference.py +9 -11
inference.py CHANGED
@@ -195,8 +195,6 @@ def _get_fallback_action(obs: dict, step: int, actions_taken: list) -> dict:
195
 
196
  def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
197
  """Run one complete episode for a task. Returns score + breakdown."""
198
- print(f"\n Running task: {task_id}...")
199
-
200
  # Reset
201
  try:
202
  resp = requests.post(
@@ -207,9 +205,11 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
207
  resp.raise_for_status()
208
  obs = resp.json()
209
  except Exception as e:
210
- print(f" ERROR: Reset failed: {e}")
211
  return {"score": 0.0, "error": str(e), "task_id": task_id}
212
 
 
 
213
  max_steps = MAX_STEPS_PER_TASK.get(task_id, 10)
214
  conversation_history = []
215
  actions_taken = []
@@ -238,10 +238,9 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
238
  conversation_history.append({"role": "assistant", "content": response_text})
239
  action = _parse_action(response_text)
240
  if action is None:
241
- print(f" Step {steps_taken}: parse failed, using fallback")
242
  action = _get_fallback_action(obs, steps_taken, actions_taken)
243
  except Exception as e:
244
- print(f" Step {steps_taken}: LLM error ({e}), using fallback")
245
  action = _get_fallback_action(obs, steps_taken, actions_taken)
246
 
247
  # Step environment
@@ -255,12 +254,10 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
255
  obs = step_resp.json()
256
  done = obs.get("done", False)
257
  reward = obs.get("reward", 0.0)
258
- feedback = obs.get("last_action_feedback", "")
259
  actions_taken.append(action)
260
- print(f" Step {steps_taken}: {action['action_type']}({action['value']}) "
261
- f"-> reward={reward:+.2f} | {feedback[:50]}")
262
  except Exception as e:
263
- print(f" Step {steps_taken}: environment error: {e}")
264
  break
265
 
266
  steps_taken += 1
@@ -274,11 +271,12 @@ def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
274
  score = grader_result.get("score", 0.0)
275
  breakdown = grader_result.get("breakdown", {})
276
  except Exception as e:
277
- print(f" ERROR: Grader failed: {e}")
278
  score = obs.get("cumulative_score", 0.0)
279
  breakdown = {}
280
 
281
- print(f" Score: {score:.4f} ({steps_taken} steps)")
 
282
  return {
283
  "task_id": task_id,
284
  "score": score,
 
195
 
196
  def run_task(client: OpenAI, task_id: str, seed: int = 42) -> dict:
197
  """Run one complete episode for a task. Returns score + breakdown."""
 
 
198
  # Reset
199
  try:
200
  resp = requests.post(
 
205
  resp.raise_for_status()
206
  obs = resp.json()
207
  except Exception as e:
208
+ print(f"[ERROR] reset task={task_id} error={e}", flush=True)
209
  return {"score": 0.0, "error": str(e), "task_id": task_id}
210
 
211
+ print(f"[START] task={task_id}", flush=True)
212
+
213
  max_steps = MAX_STEPS_PER_TASK.get(task_id, 10)
214
  conversation_history = []
215
  actions_taken = []
 
238
  conversation_history.append({"role": "assistant", "content": response_text})
239
  action = _parse_action(response_text)
240
  if action is None:
 
241
  action = _get_fallback_action(obs, steps_taken, actions_taken)
242
  except Exception as e:
243
+ print(f"[ERROR] step={steps_taken + 1} llm_error={e}", flush=True)
244
  action = _get_fallback_action(obs, steps_taken, actions_taken)
245
 
246
  # Step environment
 
254
  obs = step_resp.json()
255
  done = obs.get("done", False)
256
  reward = obs.get("reward", 0.0)
 
257
  actions_taken.append(action)
258
+ print(f"[STEP] step={steps_taken + 1} reward={reward:.4f}", flush=True)
 
259
  except Exception as e:
260
+ print(f"[ERROR] step={steps_taken + 1} env_error={e}", flush=True)
261
  break
262
 
263
  steps_taken += 1
 
271
  score = grader_result.get("score", 0.0)
272
  breakdown = grader_result.get("breakdown", {})
273
  except Exception as e:
274
+ print(f"[ERROR] grader task={task_id} error={e}", flush=True)
275
  score = obs.get("cumulative_score", 0.0)
276
  breakdown = {}
277
 
278
+ print(f"[INFO] Score: {score:.4f} ({steps_taken} steps)", flush=True)
279
+ print(f"[END] task={task_id} score={score:.4f} steps={steps_taken}", flush=True)
280
  return {
281
  "task_id": task_id,
282
  "score": score,