Spaces:

hirann
/

gdpr-auditor

Sleeping

Charan Sai Mamidala commited on about 1 month ago

Commit

dd054aa

1 Parent(s): fe47b4f

fix: use 0.001/0.999 bounds — 1e-6 formats as 0.0000 in :.4f stdout which validator reads as 0.0

Files changed (2) hide show

env/core.py CHANGED Viewed

@@ -287,7 +287,7 @@ class GDPRAuditorEnvironment:
         if self._ep is None:
             return (
                 self._error_obs("Environment not reset"),
-                RewModel(value=1e-6, reason="Environment not initialized", issues_found=0, total_issues=0),
                 True,
                 {"error": "Environment not reset. Call /reset first."},
             )
@@ -373,10 +373,10 @@ class GDPRAuditorEnvironment:
         return None
     def _calculate_reward(self) -> RewModel:
-        # Scores must be strictly in (0, 1) — never exactly 0.0 or 1.0
-        _EPSILON = 1e-6
-        _MIN_SCORE = _EPSILON
-        _MAX_SCORE = 1.0 - _EPSILON
         task = self._ep.task_config
         total_issues = len(task.hidden_issues)

         if self._ep is None:
             return (
                 self._error_obs("Environment not reset"),
+                RewModel(value=0.001, reason="Environment not initialized", issues_found=0, total_issues=0),
                 True,
                 {"error": "Environment not reset. Call /reset first."},
             )
         return None
     def _calculate_reward(self) -> RewModel:
+        # Scores must be strictly in (0, 1) and visible in :.4f format
+        # 1e-6 formats as "0.0000" which the validator reads as 0.0 — use 0.001 minimum
+        _MIN_SCORE = 0.001
+        _MAX_SCORE = 0.999
         task = self._ep.task_config
         total_issues = len(task.hidden_issues)

inference.py CHANGED Viewed

@@ -219,7 +219,7 @@ def run_task(client: OpenAI, task_key: str, verbose: bool = False) -> dict:
                 if not reward_data:
                     print(f"[DEBUG] No reward in response: {result}", flush=True)
                 if isinstance(reward_data, dict):
-                    reward = reward_data.get("value", 1e-6)  # default to epsilon, not 0.0
                 else:
                     reward = float(reward_data)
                 done = result.get("done", False)
@@ -228,7 +228,7 @@ def run_task(client: OpenAI, task_key: str, verbose: bool = False) -> dict:
             except Exception as exc:
                 error_msg = str(exc).replace('\n', ' ').replace('\r', '')
-                reward = 1e-6  # never exactly 0.0 — validator requires strictly (0, 1)
                 done = True
                 obs_data = {}
@@ -240,10 +240,12 @@ def run_task(client: OpenAI, task_key: str, verbose: bool = False) -> dict:
             if done:
                 break
-        _EPSILON = 1e-6
-        score = max(rewards) if rewards else _EPSILON
-        # Clamp to strictly open interval (0, 1) — validator rejects 0.0 and 1.0
-        score = max(_EPSILON, min(1.0 - _EPSILON, score))
         success = score >= SUCCESS_SCORE_THRESHOLD
     except Exception as exc:

                 if not reward_data:
                     print(f"[DEBUG] No reward in response: {result}", flush=True)
                 if isinstance(reward_data, dict):
+                    reward = reward_data.get("value", 0.001)  # default 0.001, never 0.0
                 else:
                     reward = float(reward_data)
                 done = result.get("done", False)
             except Exception as exc:
                 error_msg = str(exc).replace('\n', ' ').replace('\r', '')
+                reward = 0.001  # 1e-6 formats as "0.0000" — use 0.001 so :.4f gives "0.0010"
                 done = True
                 obs_data = {}
             if done:
                 break
+        # 1e-6 formats as "0.0000" with :.4f — validator parses that as 0.0 and fails
+        # Use 0.001 min so it formats as "0.0010", and 0.999 max so it formats as "0.9990"
+        _MIN_SCORE = 0.001
+        _MAX_SCORE = 0.999
+        score = max(rewards) if rewards else _MIN_SCORE
+        score = max(_MIN_SCORE, min(_MAX_SCORE, score))
         success = score >= SUCCESS_SCORE_THRESHOLD
     except Exception as exc: