Spaces:

TheJackBright
/

polypharmacy-env

Sleeping

TheJackBright Claude Opus 4.6 commited on 30 days ago

Commit

c5b547b

1 Parent(s): 0aa6a46

Fix grader scores to be strictly within (0, 1) range

Phase 2 validation requires scores strictly between 0 and 1 (not 0.0
or 1.0). Updated _clip in graders.py to clamp to [0.01, 0.99], fixed
env_core.py fallback, and updated inference.py _clamp01 accordingly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show

backend/src/polypharmacy_env/env_core.py +1 -1
backend/src/polypharmacy_env/graders.py +5 -1
inference.py +3 -2

backend/src/polypharmacy_env/env_core.py CHANGED Viewed

@@ -354,7 +354,7 @@ class PolypharmacyEnv(
                 self._total_drug_changes,
                 self._critical_stopped_without_sub,
             )
-        return 0.0
     def _get_severe_pairs(self) -> List[Tuple[str, str]]:
         """Return all severe DDI pairs present in the *initial* medication list."""

                 self._total_drug_changes,
                 self._critical_stopped_without_sub,
             )
+        return 0.01  # strict (0, 1) range required
     def _get_severe_pairs(self) -> List[Tuple[str, str]]:
         """Return all severe DDI pairs present in the *initial* medication list."""

backend/src/polypharmacy_env/graders.py CHANGED Viewed

@@ -12,9 +12,13 @@ from .models import InterventionRecord
 _EPS = 1e-8
 def _clip(x: float) -> float:
-    return max(0.0, min(x, 1.0))
 # ── Easy: easy_screening ─────────────────────────────────────────────────────

 _EPS = 1e-8
+# Scores must be strictly in (0, 1) — never exactly 0.0 or 1.0
+_SCORE_MIN = 0.01
+_SCORE_MAX = 0.99
 def _clip(x: float) -> float:
+    return max(_SCORE_MIN, min(x, _SCORE_MAX))
 # ── Easy: easy_screening ─────────────────────────────────────────────────────

inference.py CHANGED Viewed

@@ -68,7 +68,8 @@ def _fmt_reward(v: float) -> str:
 def _clamp01(v: float) -> float:
-    return max(0.0, min(1.0, float(v)))
 def log_start(task: str) -> None:
@@ -188,7 +189,7 @@ def run_task(client: OpenAI, task_id: str) -> None:
     rewards: List[float] = []
     steps = 0
     success = False
-    score = 0.0
     log_start(task_id)
     try:
         reset_payload = _reset(task_id)

 def _clamp01(v: float) -> float:
+    """Clamp score to strict (0, 1) — never exactly 0.0 or 1.0."""
+    return max(0.01, min(0.99, float(v)))
 def log_start(task: str) -> None:
     rewards: List[float] = []
     steps = 0
     success = False
+    score = 0.01  # strict (0, 1) — never exactly 0.0
     log_start(task_id)
     try:
         reset_payload = _reset(task_id)