hellinferno commited on
Commit
11aa990
·
1 Parent(s): 2c28868

fix: add run_episode wrapper, use .2f score format, update test for strict bounds

Browse files
Files changed (2) hide show
  1. inference.py +9 -1
  2. tests/test_inference.py +2 -2
inference.py CHANGED
@@ -77,7 +77,7 @@ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> No
77
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
78
  print(
79
  f"[END] success={str(success).lower()} steps={steps} "
80
- f"score={score:.3f} rewards={rewards_str}",
81
  flush=True,
82
  )
83
 
@@ -223,6 +223,14 @@ async def run_episode_async(
223
  # ---------------------------------------------------------------------------
224
 
225
 
 
 
 
 
 
 
 
 
226
  def run_episode_sync(
227
  env: Any, llm_client: Any, model_name: str, task_id: str
228
  ) -> None:
 
77
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
78
  print(
79
  f"[END] success={str(success).lower()} steps={steps} "
80
+ f"score={score:.2f} rewards={rewards_str}",
81
  flush=True,
82
  )
83
 
 
223
  # ---------------------------------------------------------------------------
224
 
225
 
226
+ def run_episode(env: Any, llm_client: Any, model_name: str, task_id: str) -> None:
227
+ """Public episode runner expected by tests.
228
+
229
+ Uses the synchronous env interface: reset(task_id=...), step(...), state().
230
+ """
231
+ return run_episode_sync(env, llm_client, model_name, task_id)
232
+
233
+
234
  def run_episode_sync(
235
  env: Any, llm_client: Any, model_name: str, task_id: str
236
  ) -> None:
tests/test_inference.py CHANGED
@@ -55,7 +55,7 @@ def test_run_episode_emits_start_step_end_logs(capsys) -> None:
55
  total_reward=0.2,
56
  done=True,
57
  approved=True,
58
- final_score=1.0,
59
  )
60
 
61
  class DummyCompletions:
@@ -80,5 +80,5 @@ def test_run_episode_emits_start_step_end_logs(capsys) -> None:
80
  assert "[STEP]" in captured
81
  assert "[END]" in captured
82
  assert "success=true" in captured
83
- assert "score=1.00" in captured
84
 
 
55
  total_reward=0.2,
56
  done=True,
57
  approved=True,
58
+ final_score=0.99,
59
  )
60
 
61
  class DummyCompletions:
 
80
  assert "[STEP]" in captured
81
  assert "[END]" in captured
82
  assert "success=true" in captured
83
+ assert "score=0.99" in captured
84