Spaces:
Sleeping
Sleeping
Fix log_end: clamp score and use 4dp to avoid score=0.00 in [END] log
Browse files:.2f format would print 0.00 for any score < 0.005 (e.g. 0.001 after
the server-side clamp). If validator parses the [END] log for the score
value, 0.00 is treated as exactly 0 which fails the strictly-(0,1) check.
Added client-side max(0.001, min(0.999, score)) + :.4f to be safe.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- inference.py +4 -2
inference.py
CHANGED
|
@@ -99,10 +99,12 @@ def log_step(step: int, action: dict, reward: float, done: bool,
|
|
| 99 |
|
| 100 |
|
| 101 |
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 102 |
-
|
|
|
|
|
|
|
| 103 |
print(
|
| 104 |
f"[END] success={str(success).lower()} steps={steps} "
|
| 105 |
-
f"score={score:.
|
| 106 |
flush=True,
|
| 107 |
)
|
| 108 |
|
|
|
|
| 99 |
|
| 100 |
|
| 101 |
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 102 |
+
# Clamp score to (0.001, 0.999) strictly — validator rejects exact 0.0 or 1.0
|
| 103 |
+
score = max(0.001, min(0.999, score))
|
| 104 |
+
rewards_str = ",".join(f"{r:.4f}" for r in rewards)
|
| 105 |
print(
|
| 106 |
f"[END] success={str(success).lower()} steps={steps} "
|
| 107 |
+
f"score={score:.4f} rewards={rewards_str}",
|
| 108 |
flush=True,
|
| 109 |
)
|
| 110 |
|