Spaces:
Sleeping
Sleeping
Commit Β·
6f37fb0
1
Parent(s): c5b547b
Tighten score bounds to (0.000001, 0.999999) for strict validation
Browse filesValidator requires scores strictly between 0 and 1 (exclusive).
Widened decimal precision in log output to 6 places to avoid
rounding 0.000001 to 0.000 which could be read as 0.0.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
backend/src/polypharmacy_env/env_core.py
CHANGED
|
@@ -354,7 +354,7 @@ class PolypharmacyEnv(
|
|
| 354 |
self._total_drug_changes,
|
| 355 |
self._critical_stopped_without_sub,
|
| 356 |
)
|
| 357 |
-
return 0.
|
| 358 |
|
| 359 |
def _get_severe_pairs(self) -> List[Tuple[str, str]]:
|
| 360 |
"""Return all severe DDI pairs present in the *initial* medication list."""
|
|
|
|
| 354 |
self._total_drug_changes,
|
| 355 |
self._critical_stopped_without_sub,
|
| 356 |
)
|
| 357 |
+
return 0.000001 # strict (0, 1) range required
|
| 358 |
|
| 359 |
def _get_severe_pairs(self) -> List[Tuple[str, str]]:
|
| 360 |
"""Return all severe DDI pairs present in the *initial* medication list."""
|
backend/src/polypharmacy_env/graders.py
CHANGED
|
@@ -13,8 +13,8 @@ from .models import InterventionRecord
|
|
| 13 |
_EPS = 1e-8
|
| 14 |
|
| 15 |
# Scores must be strictly in (0, 1) β never exactly 0.0 or 1.0
|
| 16 |
-
_SCORE_MIN = 0.
|
| 17 |
-
_SCORE_MAX = 0.
|
| 18 |
|
| 19 |
|
| 20 |
def _clip(x: float) -> float:
|
|
|
|
| 13 |
_EPS = 1e-8
|
| 14 |
|
| 15 |
# Scores must be strictly in (0, 1) β never exactly 0.0 or 1.0
|
| 16 |
+
_SCORE_MIN = 0.000001
|
| 17 |
+
_SCORE_MAX = 0.999999
|
| 18 |
|
| 19 |
|
| 20 |
def _clip(x: float) -> float:
|
inference.py
CHANGED
|
@@ -69,7 +69,7 @@ def _fmt_reward(v: float) -> str:
|
|
| 69 |
|
| 70 |
def _clamp01(v: float) -> float:
|
| 71 |
"""Clamp score to strict (0, 1) β never exactly 0.0 or 1.0."""
|
| 72 |
-
return max(0.
|
| 73 |
|
| 74 |
|
| 75 |
def log_start(task: str) -> None:
|
|
@@ -88,7 +88,7 @@ def log_step(step: int, action_str: str, reward: float, done: bool, error: str |
|
|
| 88 |
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 89 |
rewards_str = ",".join(_fmt_reward(r) for r in rewards)
|
| 90 |
print(
|
| 91 |
-
f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.
|
| 92 |
flush=True,
|
| 93 |
)
|
| 94 |
|
|
@@ -189,7 +189,7 @@ def run_task(client: OpenAI, task_id: str) -> None:
|
|
| 189 |
rewards: List[float] = []
|
| 190 |
steps = 0
|
| 191 |
success = False
|
| 192 |
-
score = 0.
|
| 193 |
log_start(task_id)
|
| 194 |
try:
|
| 195 |
reset_payload = _reset(task_id)
|
|
|
|
| 69 |
|
| 70 |
def _clamp01(v: float) -> float:
|
| 71 |
"""Clamp score to strict (0, 1) β never exactly 0.0 or 1.0."""
|
| 72 |
+
return max(0.000001, min(0.999999, float(v)))
|
| 73 |
|
| 74 |
|
| 75 |
def log_start(task: str) -> None:
|
|
|
|
| 88 |
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 89 |
rewards_str = ",".join(_fmt_reward(r) for r in rewards)
|
| 90 |
print(
|
| 91 |
+
f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.6f} rewards={rewards_str}",
|
| 92 |
flush=True,
|
| 93 |
)
|
| 94 |
|
|
|
|
| 189 |
rewards: List[float] = []
|
| 190 |
steps = 0
|
| 191 |
success = False
|
| 192 |
+
score = 0.000001 # strict (0, 1) β never exactly 0.0
|
| 193 |
log_start(task_id)
|
| 194 |
try:
|
| 195 |
reset_payload = _reset(task_id)
|