TheJackBright Claude Opus 4.6 commited on
Commit
6f37fb0
Β·
1 Parent(s): c5b547b

Tighten score bounds to (0.000001, 0.999999) for strict validation

Browse files

Validator requires scores strictly between 0 and 1 (exclusive).
Widened decimal precision in log output to 6 places to avoid
rounding 0.000001 to 0.000 which could be read as 0.0.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

backend/src/polypharmacy_env/env_core.py CHANGED
@@ -354,7 +354,7 @@ class PolypharmacyEnv(
354
  self._total_drug_changes,
355
  self._critical_stopped_without_sub,
356
  )
357
- return 0.01 # strict (0, 1) range required
358
 
359
  def _get_severe_pairs(self) -> List[Tuple[str, str]]:
360
  """Return all severe DDI pairs present in the *initial* medication list."""
 
354
  self._total_drug_changes,
355
  self._critical_stopped_without_sub,
356
  )
357
+ return 0.000001 # strict (0, 1) range required
358
 
359
  def _get_severe_pairs(self) -> List[Tuple[str, str]]:
360
  """Return all severe DDI pairs present in the *initial* medication list."""
backend/src/polypharmacy_env/graders.py CHANGED
@@ -13,8 +13,8 @@ from .models import InterventionRecord
13
  _EPS = 1e-8
14
 
15
  # Scores must be strictly in (0, 1) β€” never exactly 0.0 or 1.0
16
- _SCORE_MIN = 0.01
17
- _SCORE_MAX = 0.99
18
 
19
 
20
  def _clip(x: float) -> float:
 
13
  _EPS = 1e-8
14
 
15
  # Scores must be strictly in (0, 1) β€” never exactly 0.0 or 1.0
16
+ _SCORE_MIN = 0.000001
17
+ _SCORE_MAX = 0.999999
18
 
19
 
20
  def _clip(x: float) -> float:
inference.py CHANGED
@@ -69,7 +69,7 @@ def _fmt_reward(v: float) -> str:
69
 
70
  def _clamp01(v: float) -> float:
71
  """Clamp score to strict (0, 1) β€” never exactly 0.0 or 1.0."""
72
- return max(0.01, min(0.99, float(v)))
73
 
74
 
75
  def log_start(task: str) -> None:
@@ -88,7 +88,7 @@ def log_step(step: int, action_str: str, reward: float, done: bool, error: str |
88
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
89
  rewards_str = ",".join(_fmt_reward(r) for r in rewards)
90
  print(
91
- f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.3f} rewards={rewards_str}",
92
  flush=True,
93
  )
94
 
@@ -189,7 +189,7 @@ def run_task(client: OpenAI, task_id: str) -> None:
189
  rewards: List[float] = []
190
  steps = 0
191
  success = False
192
- score = 0.01 # strict (0, 1) β€” never exactly 0.0
193
  log_start(task_id)
194
  try:
195
  reset_payload = _reset(task_id)
 
69
 
70
  def _clamp01(v: float) -> float:
71
  """Clamp score to strict (0, 1) β€” never exactly 0.0 or 1.0."""
72
+ return max(0.000001, min(0.999999, float(v)))
73
 
74
 
75
  def log_start(task: str) -> None:
 
88
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
89
  rewards_str = ",".join(_fmt_reward(r) for r in rewards)
90
  print(
91
+ f"[END] success={_b(success)} steps={steps} score={_clamp01(score):.6f} rewards={rewards_str}",
92
  flush=True,
93
  )
94
 
 
189
  rewards: List[float] = []
190
  steps = 0
191
  success = False
192
+ score = 0.000001 # strict (0, 1) β€” never exactly 0.0
193
  log_start(task_id)
194
  try:
195
  reset_payload = _reset(task_id)