yadnyeshkolte commited on
Commit
131e2d3
·
1 Parent(s): 92d9fa2

Fix: grade() returns strictly (0.001,0.999), never 0.0 or 1.0

Browse files
Files changed (1) hide show
  1. server/api_debug_env_environment.py +12 -6
server/api_debug_env_environment.py CHANGED
@@ -394,25 +394,31 @@ class ApiDebugEnvironment(Environment):
394
  """
395
  Grade the agent's performance on the current episode.
396
 
397
- Score = (issues_fixed / issues_total) * efficiency_bonus
398
  Efficiency bonus = 1.0 + (remaining_steps / max_steps * 0.3)
 
399
 
400
  Returns:
401
- Score between 0.0 and 1.0
402
  """
403
  if self._scenario is None:
404
- return 0.0
405
 
406
  total = len(self._scenario.issues)
407
  if total == 0:
408
- return 1.0
409
 
410
  fix_ratio = len(self._issues_fixed) / total
411
  remaining = max(0, self._scenario.max_steps - self._state.step_count)
412
  efficiency_bonus = 1.0 + (remaining / self._scenario.max_steps * 0.3)
413
 
414
- score = fix_ratio * efficiency_bonus
415
- return min(1.0, round(score, 4))
 
 
 
 
 
416
 
417
  def get_task_info(self) -> Dict[str, Any]:
418
  """Return information about the current task."""
 
394
  """
395
  Grade the agent's performance on the current episode.
396
 
397
+ Score = (issues_fixed / issues_total) * efficiency_bonus + exploration_bonus
398
  Efficiency bonus = 1.0 + (remaining_steps / max_steps * 0.3)
399
+ Exploration bonus = small credit for inspecting services (max 0.05)
400
 
401
  Returns:
402
+ Score strictly between 0 and 1 (exclusive): in range (0.001, 0.999)
403
  """
404
  if self._scenario is None:
405
+ return 0.001
406
 
407
  total = len(self._scenario.issues)
408
  if total == 0:
409
+ return 0.999
410
 
411
  fix_ratio = len(self._issues_fixed) / total
412
  remaining = max(0, self._scenario.max_steps - self._state.step_count)
413
  efficiency_bonus = 1.0 + (remaining / self._scenario.max_steps * 0.3)
414
 
415
+ # Small partial credit for exploration even if no fixes submitted
416
+ exploration_bonus = min(0.05, len(self._inspected_targets) * 0.005)
417
+
418
+ score = fix_ratio * efficiency_bonus + exploration_bonus
419
+
420
+ # Clamp strictly to (0.001, 0.999) — never exactly 0 or 1
421
+ return max(0.001, min(0.999, round(score, 4)))
422
 
423
  def get_task_info(self) -> Dict[str, Any]:
424
  """Return information about the current task."""