Spaces:
Sleeping
Sleeping
Commit ·
131e2d3
1
Parent(s): 92d9fa2
Fix: grade() returns strictly (0.001,0.999), never 0.0 or 1.0
Browse files
server/api_debug_env_environment.py
CHANGED
|
@@ -394,25 +394,31 @@ class ApiDebugEnvironment(Environment):
|
|
| 394 |
"""
|
| 395 |
Grade the agent's performance on the current episode.
|
| 396 |
|
| 397 |
-
Score = (issues_fixed / issues_total) * efficiency_bonus
|
| 398 |
Efficiency bonus = 1.0 + (remaining_steps / max_steps * 0.3)
|
|
|
|
| 399 |
|
| 400 |
Returns:
|
| 401 |
-
Score between 0
|
| 402 |
"""
|
| 403 |
if self._scenario is None:
|
| 404 |
-
return 0.
|
| 405 |
|
| 406 |
total = len(self._scenario.issues)
|
| 407 |
if total == 0:
|
| 408 |
-
return
|
| 409 |
|
| 410 |
fix_ratio = len(self._issues_fixed) / total
|
| 411 |
remaining = max(0, self._scenario.max_steps - self._state.step_count)
|
| 412 |
efficiency_bonus = 1.0 + (remaining / self._scenario.max_steps * 0.3)
|
| 413 |
|
| 414 |
-
|
| 415 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
|
| 417 |
def get_task_info(self) -> Dict[str, Any]:
|
| 418 |
"""Return information about the current task."""
|
|
|
|
| 394 |
"""
|
| 395 |
Grade the agent's performance on the current episode.
|
| 396 |
|
| 397 |
+
Score = (issues_fixed / issues_total) * efficiency_bonus + exploration_bonus
|
| 398 |
Efficiency bonus = 1.0 + (remaining_steps / max_steps * 0.3)
|
| 399 |
+
Exploration bonus = small credit for inspecting services (max 0.05)
|
| 400 |
|
| 401 |
Returns:
|
| 402 |
+
Score strictly between 0 and 1 (exclusive): in range (0.001, 0.999)
|
| 403 |
"""
|
| 404 |
if self._scenario is None:
|
| 405 |
+
return 0.001
|
| 406 |
|
| 407 |
total = len(self._scenario.issues)
|
| 408 |
if total == 0:
|
| 409 |
+
return 0.999
|
| 410 |
|
| 411 |
fix_ratio = len(self._issues_fixed) / total
|
| 412 |
remaining = max(0, self._scenario.max_steps - self._state.step_count)
|
| 413 |
efficiency_bonus = 1.0 + (remaining / self._scenario.max_steps * 0.3)
|
| 414 |
|
| 415 |
+
# Small partial credit for exploration even if no fixes submitted
|
| 416 |
+
exploration_bonus = min(0.05, len(self._inspected_targets) * 0.005)
|
| 417 |
+
|
| 418 |
+
score = fix_ratio * efficiency_bonus + exploration_bonus
|
| 419 |
+
|
| 420 |
+
# Clamp strictly to (0.001, 0.999) — never exactly 0 or 1
|
| 421 |
+
return max(0.001, min(0.999, round(score, 4)))
|
| 422 |
|
| 423 |
def get_task_info(self) -> Dict[str, Any]:
|
| 424 |
"""Return information about the current task."""
|