Spaces:

Draken1606
/

Container-Port

Sleeping

Draken1606 commited on 30 days ago

Commit

f69544d

1 Parent(s): 52c2c50

fix: enforce strict exclusive score bounds across inference and env

Files changed (4) hide show

inference.py CHANGED Viewed

@@ -42,7 +42,6 @@ def _load_dotenv() -> None:
 _load_dotenv()
-API_KEY = os.getenv('API_KEY')
 # Required environment variables
 HF_TOKEN = os.getenv('HF_TOKEN')
 API_BASE_URL = os.getenv('API_BASE_URL', 'https://api.openai.com/v1')
@@ -215,7 +214,9 @@ async def run_episode(url: str, difficulty: str = 'medium', use_llm: bool = Fals
                 resp = json.loads(await ws.recv())
                 payload = resp.get('data', {})
                 obs = payload.get('observation', payload)
-                reward = float(payload.get('reward', obs.get('last_reward', 0.0)) or obs.get('last_reward', 0.0))
                 done = payload.get('done', obs.get('done', False))
                 error = payload.get('error', None)
@@ -230,7 +231,7 @@ async def run_episode(url: str, difficulty: str = 'medium', use_llm: bool = Fals
             state_resp = json.loads(await ws.recv())
             state = state_resp.get('data', {})
             score = float(state.get('score', obs.get('score', 0.5)))
-            score = min(max(score, 0), 1)
         success = score >= SUCCESS_SCORE_THRESHOLD

 _load_dotenv()
 # Required environment variables
 HF_TOKEN = os.getenv('HF_TOKEN')
 API_BASE_URL = os.getenv('API_BASE_URL', 'https://api.openai.com/v1')
                 resp = json.loads(await ws.recv())
                 payload = resp.get('data', {})
                 obs = payload.get('observation', payload)
+                raw_reward = float(payload.get('reward', obs.get('last_reward', 0.0)) or obs.get('last_reward', 0.0))
+                # Normalize step reward to strictly (0, 1) as required by the grader
+                reward = min(max(raw_reward, 0.01), 0.99)
                 done = payload.get('done', obs.get('done', False))
                 error = payload.get('error', None)
             state_resp = json.loads(await ws.recv())
             state = state_resp.get('data', {})
             score = float(state.get('score', obs.get('score', 0.5)))
+            score = min(max(score, 0.01), 0.99)
         success = score >= SUCCESS_SCORE_THRESHOLD

models.py CHANGED Viewed

@@ -36,5 +36,5 @@ class ContainerObservation(Observation):
     max_height: int = Field(0)
     difficulty: str = Field("medium")
     last_reward: float = Field(0.0)
-    score: float = Field(0.5, description="Normalized score (0.0, 1.0)")
     done: bool = Field(False)

     max_height: int = Field(0)
     difficulty: str = Field("medium")
     last_reward: float = Field(0.0)
+    score: float = Field(0.5, description="Normalized score strictly in (0.0, 1.0)")
     done: bool = Field(False)

server/environment.py CHANGED Viewed

@@ -229,12 +229,14 @@ class ContainerYardEnvironment(Environment):
         )
     def score(self) -> float:
-        """Normalized score in (0.0, 1.0). Based on actual retrievals attempted."""
         n_retrieved = self.retrieval_pointer  # only count retrievals that actually happened
         worst_case = n_retrieved * (self.max_height - 1)
         if worst_case == 0:
-            return 0.99
-        score = max(0.01, min(1.0 - self.rehandle_count / worst_case, 0.99))
         return round(score, 4)
     def get_state(self) -> dict[str, Any]:

         )
     def score(self) -> float:
+        """Normalized score strictly in (0.0, 1.0). Based on actual retrievals attempted."""
         n_retrieved = self.retrieval_pointer  # only count retrievals that actually happened
         worst_case = n_retrieved * (self.max_height - 1)
         if worst_case == 0:
+            return 0.5  # no retrievals yet — neutral score
+        raw = 1.0 - self.rehandle_count / worst_case
+        # Clamp strictly inside (0, 1) — grader requires score != 0.0 and score != 1.0
+        score = max(0.01, min(raw, 0.99))
         return round(score, 4)
     def get_state(self) -> dict[str, Any]:

tests/test_openenv_env.py CHANGED Viewed

@@ -54,7 +54,8 @@ def test_score_in_range():
         )
         obs = as_dict(env.step(ContainerAction(stack_index=chosen)))
         done = obs["done"]
-    assert 0.0 <= env.score() <= 1.0
 @pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])

         )
         obs = as_dict(env.step(ContainerAction(stack_index=chosen)))
         done = obs["done"]
+    # Score must be strictly between 0 and 1 (grader requirement)
+    assert 0.0 < env.score() < 1.0
 @pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])