immortalindeed commited on
Commit
ee547a6
·
1 Parent(s): f63920a

chore: Apply Bug #2 and Bug #3 strict min/max bound clamping to prevent out of range scores and fix windows encoding

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. inference.py +2 -1
  3. server/app.py +1 -1
README.md CHANGED
@@ -233,7 +233,7 @@ entropyenv/
233
 
234
  | Model | Provider | sec_easy | sec_med | sec_hard | dep_easy | dep_med | dep_hard | cli_easy | cli_med | cli_hard | **Avg** |
235
  |-------|----------|:--------:|:-------:|:--------:|:--------:|:-------:|:--------:|:--------:|:-------:|:--------:|:-------:|
236
- | *Benchmarking in progress...* | | | | | | | | | | | |
237
 
238
  **Scoring formula:** `score = 0.60 × max(step_rewards) + 0.40 × mean(step_rewards)`, clamped to `[0.01, 0.99]`
239
 
 
233
 
234
  | Model | Provider | sec_easy | sec_med | sec_hard | dep_easy | dep_med | dep_hard | cli_easy | cli_med | cli_hard | **Avg** |
235
  |-------|----------|:--------:|:-------:|:--------:|:--------:|:-------:|:--------:|:--------:|:-------:|:--------:|:-------:|
236
+ | *(Run `python unnecessary/run_14_models.py` to auto-populate this table)* | | | | | | | | | | | |
237
 
238
  **Scoring formula:** `score = 0.60 × max(step_rewards) + 0.40 × mean(step_rewards)`, clamped to `[0.01, 0.99]`
239
 
inference.py CHANGED
@@ -337,7 +337,8 @@ def run_task(client: OpenAI, task_id: str) -> tuple:
337
  rewards.append(0.01)
338
  break
339
 
340
- reward = float(step_data.get("reward", 0.0))
 
341
  done = bool(step_data.get("done", False))
342
  obs = step_data.get("observation", step_data)
343
  step_error = step_data.get("error") or error_msg
 
337
  rewards.append(0.01)
338
  break
339
 
340
+ raw_reward = float(step_data.get("reward", 0.01))
341
+ reward = round(min(max(raw_reward, 0.01), 0.99), 4)
342
  done = bool(step_data.get("done", False))
343
  obs = step_data.get("observation", step_data)
344
  step_error = step_data.get("error") or error_msg
server/app.py CHANGED
@@ -172,7 +172,7 @@ async def step(request: Request):
172
  if not valid:
173
  last_r = 0.01
174
  if session.history:
175
- last_r = max(0.01, session.history[-1].get('reward', 0.01))
176
  return {
177
  'reward': last_r,
178
  'done': False,
 
172
  if not valid:
173
  last_r = 0.01
174
  if session.history:
175
+ last_r = min(max(0.01, float(session.history[-1].get('reward', 0.01))), 0.99)
176
  return {
177
  'reward': last_r,
178
  'done': False,