Spaces:
Sleeping
Sleeping
Update inference.py
Browse files- inference.py +9 -1
inference.py
CHANGED
|
@@ -127,6 +127,9 @@ def _format_open_score(value: float) -> str:
|
|
| 127 |
clamped = max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(value)))
|
| 128 |
return f"{clamped:.2f}"
|
| 129 |
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None:
|
| 132 |
"""Emit mandatory STEP line."""
|
|
@@ -323,9 +326,14 @@ def run_episode(
|
|
| 323 |
if done:
|
| 324 |
break
|
| 325 |
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
| 327 |
success = avg_reward >= SUCCESS_SCORE_THRESHOLD
|
| 328 |
except Exception:
|
|
|
|
|
|
|
| 329 |
success = False
|
| 330 |
finally:
|
| 331 |
if env is not None:
|
|
|
|
| 127 |
clamped = max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(value)))
|
| 128 |
return f"{clamped:.2f}"
|
| 129 |
|
| 130 |
+
def _strict_task_score(raw_score: float) -> float:
|
| 131 |
+
"""Return task score in strict-open interval for evaluator compatibility."""
|
| 132 |
+
return max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(raw_score)))
|
| 133 |
|
| 134 |
def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None:
|
| 135 |
"""Emit mandatory STEP line."""
|
|
|
|
| 326 |
if done:
|
| 327 |
break
|
| 328 |
|
| 329 |
+
if not rewards:
|
| 330 |
+
rewards.append(LOG_SCORE_EPSILON)
|
| 331 |
+
|
| 332 |
+
avg_reward = _strict_task_score(sum(rewards) / len(rewards))
|
| 333 |
success = avg_reward >= SUCCESS_SCORE_THRESHOLD
|
| 334 |
except Exception:
|
| 335 |
+
if not rewards:
|
| 336 |
+
rewards.append(LOG_SCORE_EPSILON)
|
| 337 |
success = False
|
| 338 |
finally:
|
| 339 |
if env is not None:
|