Spaces:
Sleeping
Sleeping
Deploy inference.py with all fixes
Browse files- inference.py +6 -2
inference.py
CHANGED
|
@@ -219,7 +219,9 @@ def run_heuristic_task(task_id: str, task_name: str, seed: int) -> float:
|
|
| 219 |
score = obs.score_so_far
|
| 220 |
print(f"[STEP] step={step} reward={obs.reward:.3f}", flush=True)
|
| 221 |
|
| 222 |
-
|
|
|
|
|
|
|
| 223 |
return score
|
| 224 |
|
| 225 |
|
|
@@ -374,7 +376,9 @@ def run_react_task(llm, task_id: str, task_name: str, seed: int) -> float:
|
|
| 374 |
score = obs.score_so_far
|
| 375 |
print(f"[STEP] step={step} reward={obs.reward:.3f}", flush=True)
|
| 376 |
|
| 377 |
-
|
|
|
|
|
|
|
| 378 |
return score
|
| 379 |
|
| 380 |
|
|
|
|
| 219 |
score = obs.score_so_far
|
| 220 |
print(f"[STEP] step={step} reward={obs.reward:.3f}", flush=True)
|
| 221 |
|
| 222 |
+
# CRITICAL: Clamp score strictly between 0 and 1 (exclusive)
|
| 223 |
+
score = min(0.99, max(0.01, score))
|
| 224 |
+
print(f"[END] task={task_id} score={score:.4f} steps={step}", flush=True)
|
| 225 |
return score
|
| 226 |
|
| 227 |
|
|
|
|
| 376 |
score = obs.score_so_far
|
| 377 |
print(f"[STEP] step={step} reward={obs.reward:.3f}", flush=True)
|
| 378 |
|
| 379 |
+
# CRITICAL: Clamp score strictly between 0 and 1 (exclusive)
|
| 380 |
+
score = min(0.99, max(0.01, score))
|
| 381 |
+
print(f"[END] task={task_id} score={score:.4f} steps={step}", flush=True)
|
| 382 |
return score
|
| 383 |
|
| 384 |
|