Timusgeorge commited on
Commit
c204411
·
verified ·
1 Parent(s): f095b05

Deploy inference.py with all fixes

Browse files
Files changed (1) hide show
  1. inference.py +6 -2
inference.py CHANGED
@@ -219,7 +219,9 @@ def run_heuristic_task(task_id: str, task_name: str, seed: int) -> float:
219
  score = obs.score_so_far
220
  print(f"[STEP] step={step} reward={obs.reward:.3f}", flush=True)
221
 
222
- print(f"[END] task={task_id} score={score:.2f} steps={step}", flush=True)
 
 
223
  return score
224
 
225
 
@@ -374,7 +376,9 @@ def run_react_task(llm, task_id: str, task_name: str, seed: int) -> float:
374
  score = obs.score_so_far
375
  print(f"[STEP] step={step} reward={obs.reward:.3f}", flush=True)
376
 
377
- print(f"[END] task={task_id} score={score:.2f} steps={step}", flush=True)
 
 
378
  return score
379
 
380
 
 
219
  score = obs.score_so_far
220
  print(f"[STEP] step={step} reward={obs.reward:.3f}", flush=True)
221
 
222
+ # CRITICAL: Clamp score strictly between 0 and 1 (exclusive)
223
+ score = min(0.99, max(0.01, score))
224
+ print(f"[END] task={task_id} score={score:.4f} steps={step}", flush=True)
225
  return score
226
 
227
 
 
376
  score = obs.score_so_far
377
  print(f"[STEP] step={step} reward={obs.reward:.3f}", flush=True)
378
 
379
+ # CRITICAL: Clamp score strictly between 0 and 1 (exclusive)
380
+ score = min(0.99, max(0.01, score))
381
+ print(f"[END] task={task_id} score={score:.4f} steps={step}", flush=True)
382
  return score
383
 
384