Spaces:
Sleeping
Sleeping
Commit Β·
c5b547b
1
Parent(s): 0aa6a46
Fix grader scores to be strictly within (0, 1) range
Browse filesPhase 2 validation requires scores strictly between 0 and 1 (not 0.0
or 1.0). Updated _clip in graders.py to clamp to [0.01, 0.99], fixed
env_core.py fallback, and updated inference.py _clamp01 accordingly.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
backend/src/polypharmacy_env/env_core.py
CHANGED
|
@@ -354,7 +354,7 @@ class PolypharmacyEnv(
|
|
| 354 |
self._total_drug_changes,
|
| 355 |
self._critical_stopped_without_sub,
|
| 356 |
)
|
| 357 |
-
return 0.0
|
| 358 |
|
| 359 |
def _get_severe_pairs(self) -> List[Tuple[str, str]]:
|
| 360 |
"""Return all severe DDI pairs present in the *initial* medication list."""
|
|
|
|
| 354 |
self._total_drug_changes,
|
| 355 |
self._critical_stopped_without_sub,
|
| 356 |
)
|
| 357 |
+
return 0.01 # strict (0, 1) range required
|
| 358 |
|
| 359 |
def _get_severe_pairs(self) -> List[Tuple[str, str]]:
|
| 360 |
"""Return all severe DDI pairs present in the *initial* medication list."""
|
backend/src/polypharmacy_env/graders.py
CHANGED
|
@@ -12,9 +12,13 @@ from .models import InterventionRecord
|
|
| 12 |
|
| 13 |
_EPS = 1e-8
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
def _clip(x: float) -> float:
|
| 17 |
-
return max(
|
| 18 |
|
| 19 |
|
| 20 |
# ββ Easy: easy_screening βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 12 |
|
| 13 |
_EPS = 1e-8
|
| 14 |
|
| 15 |
+
# Scores must be strictly in (0, 1) β never exactly 0.0 or 1.0
|
| 16 |
+
_SCORE_MIN = 0.01
|
| 17 |
+
_SCORE_MAX = 0.99
|
| 18 |
+
|
| 19 |
|
| 20 |
def _clip(x: float) -> float:
|
| 21 |
+
return max(_SCORE_MIN, min(x, _SCORE_MAX))
|
| 22 |
|
| 23 |
|
| 24 |
# ββ Easy: easy_screening βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
inference.py
CHANGED
|
@@ -68,7 +68,8 @@ def _fmt_reward(v: float) -> str:
|
|
| 68 |
|
| 69 |
|
| 70 |
def _clamp01(v: float) -> float:
|
| 71 |
-
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
def log_start(task: str) -> None:
|
|
@@ -188,7 +189,7 @@ def run_task(client: OpenAI, task_id: str) -> None:
|
|
| 188 |
rewards: List[float] = []
|
| 189 |
steps = 0
|
| 190 |
success = False
|
| 191 |
-
score = 0.0
|
| 192 |
log_start(task_id)
|
| 193 |
try:
|
| 194 |
reset_payload = _reset(task_id)
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
def _clamp01(v: float) -> float:
|
| 71 |
+
"""Clamp score to strict (0, 1) β never exactly 0.0 or 1.0."""
|
| 72 |
+
return max(0.01, min(0.99, float(v)))
|
| 73 |
|
| 74 |
|
| 75 |
def log_start(task: str) -> None:
|
|
|
|
| 189 |
rewards: List[float] = []
|
| 190 |
steps = 0
|
| 191 |
success = False
|
| 192 |
+
score = 0.01 # strict (0, 1) β never exactly 0.0
|
| 193 |
log_start(task_id)
|
| 194 |
try:
|
| 195 |
reset_payload = _reset(task_id)
|