Imaginephoenix commited on
Commit
2d9543a
·
verified ·
1 Parent(s): 3ac18e2

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +9 -1
inference.py CHANGED
@@ -127,6 +127,9 @@ def _format_open_score(value: float) -> str:
127
  clamped = max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(value)))
128
  return f"{clamped:.2f}"
129
 
 
 
 
130
 
131
  def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None:
132
  """Emit mandatory STEP line."""
@@ -323,9 +326,14 @@ def run_episode(
323
  if done:
324
  break
325
 
326
- avg_reward = sum(rewards) / max(len(rewards), 1)
 
 
 
327
  success = avg_reward >= SUCCESS_SCORE_THRESHOLD
328
  except Exception:
 
 
329
  success = False
330
  finally:
331
  if env is not None:
 
127
  clamped = max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(value)))
128
  return f"{clamped:.2f}"
129
 
130
+ def _strict_task_score(raw_score: float) -> float:
131
+ """Return task score in strict-open interval for evaluator compatibility."""
132
+ return max(LOG_SCORE_EPSILON, min(1.0 - LOG_SCORE_EPSILON, float(raw_score)))
133
 
134
  def log_step(step: int, action_str: str, reward: float, done: bool, error: str | None) -> None:
135
  """Emit mandatory STEP line."""
 
326
  if done:
327
  break
328
 
329
+ if not rewards:
330
+ rewards.append(LOG_SCORE_EPSILON)
331
+
332
+ avg_reward = _strict_task_score(sum(rewards) / len(rewards))
333
  success = avg_reward >= SUCCESS_SCORE_THRESHOLD
334
  except Exception:
335
+ if not rewards:
336
+ rewards.append(LOG_SCORE_EPSILON)
337
  success = False
338
  finally:
339
  if env is not None: