Draken1606 commited on
Commit
f69544d
·
1 Parent(s): 52c2c50

fix: enforce strict exclusive score bounds across inference and env

Browse files
inference.py CHANGED
@@ -42,7 +42,6 @@ def _load_dotenv() -> None:
42
 
43
 
44
  _load_dotenv()
45
- API_KEY = os.getenv('API_KEY')
46
  # Required environment variables
47
  HF_TOKEN = os.getenv('HF_TOKEN')
48
  API_BASE_URL = os.getenv('API_BASE_URL', 'https://api.openai.com/v1')
@@ -215,7 +214,9 @@ async def run_episode(url: str, difficulty: str = 'medium', use_llm: bool = Fals
215
  resp = json.loads(await ws.recv())
216
  payload = resp.get('data', {})
217
  obs = payload.get('observation', payload)
218
- reward = float(payload.get('reward', obs.get('last_reward', 0.0)) or obs.get('last_reward', 0.0))
 
 
219
  done = payload.get('done', obs.get('done', False))
220
  error = payload.get('error', None)
221
 
@@ -230,7 +231,7 @@ async def run_episode(url: str, difficulty: str = 'medium', use_llm: bool = Fals
230
  state_resp = json.loads(await ws.recv())
231
  state = state_resp.get('data', {})
232
  score = float(state.get('score', obs.get('score', 0.5)))
233
- score = min(max(score, 0), 1)
234
 
235
  success = score >= SUCCESS_SCORE_THRESHOLD
236
 
 
42
 
43
 
44
  _load_dotenv()
 
45
  # Required environment variables
46
  HF_TOKEN = os.getenv('HF_TOKEN')
47
  API_BASE_URL = os.getenv('API_BASE_URL', 'https://api.openai.com/v1')
 
214
  resp = json.loads(await ws.recv())
215
  payload = resp.get('data', {})
216
  obs = payload.get('observation', payload)
217
+ raw_reward = float(payload.get('reward', obs.get('last_reward', 0.0)) or obs.get('last_reward', 0.0))
218
+ # Normalize step reward to strictly (0, 1) as required by the grader
219
+ reward = min(max(raw_reward, 0.01), 0.99)
220
  done = payload.get('done', obs.get('done', False))
221
  error = payload.get('error', None)
222
 
 
231
  state_resp = json.loads(await ws.recv())
232
  state = state_resp.get('data', {})
233
  score = float(state.get('score', obs.get('score', 0.5)))
234
+ score = min(max(score, 0.01), 0.99)
235
 
236
  success = score >= SUCCESS_SCORE_THRESHOLD
237
 
models.py CHANGED
@@ -36,5 +36,5 @@ class ContainerObservation(Observation):
36
  max_height: int = Field(0)
37
  difficulty: str = Field("medium")
38
  last_reward: float = Field(0.0)
39
- score: float = Field(0.5, description="Normalized score (0.0, 1.0)")
40
  done: bool = Field(False)
 
36
  max_height: int = Field(0)
37
  difficulty: str = Field("medium")
38
  last_reward: float = Field(0.0)
39
+ score: float = Field(0.5, description="Normalized score strictly in (0.0, 1.0)")
40
  done: bool = Field(False)
server/environment.py CHANGED
@@ -229,12 +229,14 @@ class ContainerYardEnvironment(Environment):
229
  )
230
 
231
  def score(self) -> float:
232
- """Normalized score in (0.0, 1.0). Based on actual retrievals attempted."""
233
  n_retrieved = self.retrieval_pointer # only count retrievals that actually happened
234
  worst_case = n_retrieved * (self.max_height - 1)
235
  if worst_case == 0:
236
- return 0.99
237
- score = max(0.01, min(1.0 - self.rehandle_count / worst_case, 0.99))
 
 
238
  return round(score, 4)
239
 
240
  def get_state(self) -> dict[str, Any]:
 
229
  )
230
 
231
  def score(self) -> float:
232
+ """Normalized score strictly in (0.0, 1.0). Based on actual retrievals attempted."""
233
  n_retrieved = self.retrieval_pointer # only count retrievals that actually happened
234
  worst_case = n_retrieved * (self.max_height - 1)
235
  if worst_case == 0:
236
+ return 0.5 # no retrievals yet — neutral score
237
+ raw = 1.0 - self.rehandle_count / worst_case
238
+ # Clamp strictly inside (0, 1) — grader requires score != 0.0 and score != 1.0
239
+ score = max(0.01, min(raw, 0.99))
240
  return round(score, 4)
241
 
242
  def get_state(self) -> dict[str, Any]:
tests/test_openenv_env.py CHANGED
@@ -54,7 +54,8 @@ def test_score_in_range():
54
  )
55
  obs = as_dict(env.step(ContainerAction(stack_index=chosen)))
56
  done = obs["done"]
57
- assert 0.0 <= env.score() <= 1.0
 
58
 
59
 
60
  @pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])
 
54
  )
55
  obs = as_dict(env.step(ContainerAction(stack_index=chosen)))
56
  done = obs["done"]
57
+ # Score must be strictly between 0 and 1 (grader requirement)
58
+ assert 0.0 < env.score() < 1.0
59
 
60
 
61
  @pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])