Spaces:

Imaginephoenix
/

openenv1

Sleeping

Imaginephoenix commited on Apr 8

Commit

2d9543a

verified ·

1 Parent(s): 3ac18e2

Update inference.py

Files changed (1) hide show

inference.py CHANGED Viewed

@@ -127,6 +127,9 @@ def _format_open_score(value: float) -> str:
     clamped = max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(value)))
     return f"{clamped:.2f}"
 def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None:
     """Emit mandatory STEP line."""
@@ -323,9 +326,14 @@ def run_episode(
             if done:
                 break
-        avg_reward = sum(rewards) / max(len(rewards), 1)
         success = avg_reward >= SUCCESS_SCORE_THRESHOLD
     except Exception:
         success = False
     finally:
         if env is not None:

     clamped = max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(value)))
     return f"{clamped:.2f}"
+def _strict_task_score(raw_score: float) -> float:
+    """Return task score in strict-open interval for evaluator compatibility."""
+    return max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(raw_score)))
 def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None:
     """Emit mandatory STEP line."""
             if done:
                 break
+        if not rewards:
+            rewards.append(LOG_SCORE_EPSILON)
+        avg_reward = _strict_task_score(sum(rewards) / len(rewards))
         success = avg_reward >= SUCCESS_SCORE_THRESHOLD
     except Exception:
+        if not rewards:
+            rewards.append(LOG_SCORE_EPSILON)
         success = False
     finally:
         if env is not None: