Spaces:
Sleeping
Sleeping
Commit ·
f69544d
1
Parent(s): 52c2c50
fix: enforce strict exclusive score bounds across inference and env
Browse files- inference.py +4 -3
- models.py +1 -1
- server/environment.py +5 -3
- tests/test_openenv_env.py +2 -1
inference.py
CHANGED
|
@@ -42,7 +42,6 @@ def _load_dotenv() -> None:
|
|
| 42 |
|
| 43 |
|
| 44 |
_load_dotenv()
|
| 45 |
-
API_KEY = os.getenv('API_KEY')
|
| 46 |
# Required environment variables
|
| 47 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
| 48 |
API_BASE_URL = os.getenv('API_BASE_URL', 'https://api.openai.com/v1')
|
|
@@ -215,7 +214,9 @@ async def run_episode(url: str, difficulty: str = 'medium', use_llm: bool = Fals
|
|
| 215 |
resp = json.loads(await ws.recv())
|
| 216 |
payload = resp.get('data', {})
|
| 217 |
obs = payload.get('observation', payload)
|
| 218 |
-
|
|
|
|
|
|
|
| 219 |
done = payload.get('done', obs.get('done', False))
|
| 220 |
error = payload.get('error', None)
|
| 221 |
|
|
@@ -230,7 +231,7 @@ async def run_episode(url: str, difficulty: str = 'medium', use_llm: bool = Fals
|
|
| 230 |
state_resp = json.loads(await ws.recv())
|
| 231 |
state = state_resp.get('data', {})
|
| 232 |
score = float(state.get('score', obs.get('score', 0.5)))
|
| 233 |
-
score = min(max(score, 0),
|
| 234 |
|
| 235 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 236 |
|
|
|
|
| 42 |
|
| 43 |
|
| 44 |
_load_dotenv()
|
|
|
|
| 45 |
# Required environment variables
|
| 46 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
| 47 |
API_BASE_URL = os.getenv('API_BASE_URL', 'https://api.openai.com/v1')
|
|
|
|
| 214 |
resp = json.loads(await ws.recv())
|
| 215 |
payload = resp.get('data', {})
|
| 216 |
obs = payload.get('observation', payload)
|
| 217 |
+
raw_reward = float(payload.get('reward', obs.get('last_reward', 0.0)) or obs.get('last_reward', 0.0))
|
| 218 |
+
# Normalize step reward to strictly (0, 1) as required by the grader
|
| 219 |
+
reward = min(max(raw_reward, 0.01), 0.99)
|
| 220 |
done = payload.get('done', obs.get('done', False))
|
| 221 |
error = payload.get('error', None)
|
| 222 |
|
|
|
|
| 231 |
state_resp = json.loads(await ws.recv())
|
| 232 |
state = state_resp.get('data', {})
|
| 233 |
score = float(state.get('score', obs.get('score', 0.5)))
|
| 234 |
+
score = min(max(score, 0.01), 0.99)
|
| 235 |
|
| 236 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 237 |
|
models.py
CHANGED
|
@@ -36,5 +36,5 @@ class ContainerObservation(Observation):
|
|
| 36 |
max_height: int = Field(0)
|
| 37 |
difficulty: str = Field("medium")
|
| 38 |
last_reward: float = Field(0.0)
|
| 39 |
-
score: float = Field(0.5, description="Normalized score (0.0, 1.0)")
|
| 40 |
done: bool = Field(False)
|
|
|
|
| 36 |
max_height: int = Field(0)
|
| 37 |
difficulty: str = Field("medium")
|
| 38 |
last_reward: float = Field(0.0)
|
| 39 |
+
score: float = Field(0.5, description="Normalized score strictly in (0.0, 1.0)")
|
| 40 |
done: bool = Field(False)
|
server/environment.py
CHANGED
|
@@ -229,12 +229,14 @@ class ContainerYardEnvironment(Environment):
|
|
| 229 |
)
|
| 230 |
|
| 231 |
def score(self) -> float:
|
| 232 |
-
"""Normalized score in (0.0, 1.0). Based on actual retrievals attempted."""
|
| 233 |
n_retrieved = self.retrieval_pointer # only count retrievals that actually happened
|
| 234 |
worst_case = n_retrieved * (self.max_height - 1)
|
| 235 |
if worst_case == 0:
|
| 236 |
-
return 0.
|
| 237 |
-
|
|
|
|
|
|
|
| 238 |
return round(score, 4)
|
| 239 |
|
| 240 |
def get_state(self) -> dict[str, Any]:
|
|
|
|
| 229 |
)
|
| 230 |
|
| 231 |
def score(self) -> float:
|
| 232 |
+
"""Normalized score strictly in (0.0, 1.0). Based on actual retrievals attempted."""
|
| 233 |
n_retrieved = self.retrieval_pointer # only count retrievals that actually happened
|
| 234 |
worst_case = n_retrieved * (self.max_height - 1)
|
| 235 |
if worst_case == 0:
|
| 236 |
+
return 0.5 # no retrievals yet — neutral score
|
| 237 |
+
raw = 1.0 - self.rehandle_count / worst_case
|
| 238 |
+
# Clamp strictly inside (0, 1) — grader requires score != 0.0 and score != 1.0
|
| 239 |
+
score = max(0.01, min(raw, 0.99))
|
| 240 |
return round(score, 4)
|
| 241 |
|
| 242 |
def get_state(self) -> dict[str, Any]:
|
tests/test_openenv_env.py
CHANGED
|
@@ -54,7 +54,8 @@ def test_score_in_range():
|
|
| 54 |
)
|
| 55 |
obs = as_dict(env.step(ContainerAction(stack_index=chosen)))
|
| 56 |
done = obs["done"]
|
| 57 |
-
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
@pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])
|
|
|
|
| 54 |
)
|
| 55 |
obs = as_dict(env.step(ContainerAction(stack_index=chosen)))
|
| 56 |
done = obs["done"]
|
| 57 |
+
# Score must be strictly between 0 and 1 (grader requirement)
|
| 58 |
+
assert 0.0 < env.score() < 1.0
|
| 59 |
|
| 60 |
|
| 61 |
@pytest.mark.parametrize("difficulty", ["easy", "medium", "hard"])
|