Spaces:

TheJackBright
/

polypharmacy-env

Sleeping

TheJackBright Claude Opus 4.6 commited on 30 days ago

Commit

6f37fb0

1 Parent(s): c5b547b

Tighten score bounds to (0.000001, 0.999999) for strict validation

Validator requires scores strictly between 0 and 1 (exclusive).
Widened decimal precision in log output to 6 places to avoid
rounding 0.000001 to 0.000 which could be read as 0.0.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show

backend/src/polypharmacy_env/env_core.py +1 -1
backend/src/polypharmacy_env/graders.py +2 -2
inference.py +3 -3

backend/src/polypharmacy_env/env_core.py CHANGED Viewed

@@ -354,7 +354,7 @@ class PolypharmacyEnv(
                 self._total_drug_changes,
                 self._critical_stopped_without_sub,
             )
-        return 0.01  # strict (0, 1) range required
     def _get_severe_pairs(self) -> List[Tuple[str, str]]:
         """Return all severe DDI pairs present in the *initial* medication list."""

                 self._total_drug_changes,
                 self._critical_stopped_without_sub,
             )
+        return 0.000001  # strict (0, 1) range required
     def _get_severe_pairs(self) -> List[Tuple[str, str]]:
         """Return all severe DDI pairs present in the *initial* medication list."""

backend/src/polypharmacy_env/graders.py CHANGED Viewed

@@ -13,8 +13,8 @@ from .models import InterventionRecord
 _EPS = 1e-8
 # Scores must be strictly in (0, 1) — never exactly 0.0 or 1.0
-_SCORE_MIN = 0.01
-_SCORE_MAX = 0.99
 def _clip(x: float) -> float:

 _EPS = 1e-8
 # Scores must be strictly in (0, 1) — never exactly 0.0 or 1.0
+_SCORE_MIN = 0.000001
+_SCORE_MAX = 0.999999
 def _clip(x: float) -> float:

inference.py CHANGED Viewed

@@ -69,7 +69,7 @@ def _fmt_reward(v: float) -> str:
 def _clamp01(v: float) -> float:
     """Clamp score to strict (0, 1) — never exactly 0.0 or 1.0."""
-    return max(0.01, min(0.99, float(v)))
 def log_start(task: str) -> None:
@@ -88,7 +88,7 @@ def log_step(step: int, action_str: str, reward: float, done: bool, error: str |
 def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
     rewards_str = ",".join(_fmt_reward(r) for r in rewards)
     print(
-        f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.3f} rewards={rewards_str}",
         flush=True,
     )
@@ -189,7 +189,7 @@ def run_task(client: OpenAI, task_id: str) -> None:
     rewards: List[float] = []
     steps = 0
     success = False
-    score = 0.01  # strict (0, 1) — never exactly 0.0
     log_start(task_id)
     try:
         reset_payload = _reset(task_id)

 def _clamp01(v: float) -> float:
     """Clamp score to strict (0, 1) — never exactly 0.0 or 1.0."""
+    return max(0.000001, min(0.999999, float(v)))
 def log_start(task: str) -> None:
 def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
     rewards_str = ",".join(_fmt_reward(r) for r in rewards)
     print(
+        f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.6f} rewards={rewards_str}",
         flush=True,
     )
     rewards: List[float] = []
     steps = 0
     success = False
+    score = 0.000001  # strict (0, 1) — never exactly 0.0
     log_start(task_id)
     try:
         reset_payload = _reset(task_id)