TheJackBright Claude Opus 4.6 commited on
Commit
c5b547b
Β·
1 Parent(s): 0aa6a46

Fix grader scores to be strictly within (0, 1) range

Browse files

Phase 2 validation requires scores strictly between 0 and 1 (not 0.0
or 1.0). Updated _clip in graders.py to clamp to [0.01, 0.99], fixed
env_core.py fallback, and updated inference.py _clamp01 accordingly.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

backend/src/polypharmacy_env/env_core.py CHANGED
@@ -354,7 +354,7 @@ class PolypharmacyEnv(
354
  self._total_drug_changes,
355
  self._critical_stopped_without_sub,
356
  )
357
- return 0.0
358
 
359
  def _get_severe_pairs(self) -> List[Tuple[str, str]]:
360
  """Return all severe DDI pairs present in the *initial* medication list."""
 
354
  self._total_drug_changes,
355
  self._critical_stopped_without_sub,
356
  )
357
+ return 0.01 # strict (0, 1) range required
358
 
359
  def _get_severe_pairs(self) -> List[Tuple[str, str]]:
360
  """Return all severe DDI pairs present in the *initial* medication list."""
backend/src/polypharmacy_env/graders.py CHANGED
@@ -12,9 +12,13 @@ from .models import InterventionRecord
12
 
13
  _EPS = 1e-8
14
 
 
 
 
 
15
 
16
  def _clip(x: float) -> float:
17
- return max(0.0, min(x, 1.0))
18
 
19
 
20
  # ── Easy: easy_screening ─────────────────────────────────────────────────────
 
12
 
13
  _EPS = 1e-8
14
 
15
+ # Scores must be strictly in (0, 1) β€” never exactly 0.0 or 1.0
16
+ _SCORE_MIN = 0.01
17
+ _SCORE_MAX = 0.99
18
+
19
 
20
  def _clip(x: float) -> float:
21
+ return max(_SCORE_MIN, min(x, _SCORE_MAX))
22
 
23
 
24
  # ── Easy: easy_screening ─────────────────────────────────────────────────────
inference.py CHANGED
@@ -68,7 +68,8 @@ def _fmt_reward(v: float) -> str:
68
 
69
 
70
  def _clamp01(v: float) -> float:
71
- return max(0.0, min(1.0, float(v)))
 
72
 
73
 
74
  def log_start(task: str) -> None:
@@ -188,7 +189,7 @@ def run_task(client: OpenAI, task_id: str) -> None:
188
  rewards: List[float] = []
189
  steps = 0
190
  success = False
191
- score = 0.0
192
  log_start(task_id)
193
  try:
194
  reset_payload = _reset(task_id)
 
68
 
69
 
70
  def _clamp01(v: float) -> float:
71
+ """Clamp score to strict (0, 1) β€” never exactly 0.0 or 1.0."""
72
+ return max(0.01, min(0.99, float(v)))
73
 
74
 
75
  def log_start(task: str) -> None:
 
189
  rewards: List[float] = []
190
  steps = 0
191
  success = False
192
+ score = 0.01 # strict (0, 1) β€” never exactly 0.0
193
  log_start(task_id)
194
  try:
195
  reset_payload = _reset(task_id)