nevernever69 commited on
Commit
b5f2865
·
verified ·
1 Parent(s): ea9019d

Upload inference.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference.py +10 -4
inference.py CHANGED
@@ -86,9 +86,15 @@ def log_start(task: str, env: str, model: str) -> None:
86
  print(f"[START] task={task} env={env} model={model}", flush=True)
87
 
88
 
 
 
 
 
 
89
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
90
  error_val = error if error else "null"
91
  done_val = str(done).lower()
 
92
  print(
93
  f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
94
  flush=True,
@@ -96,7 +102,7 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
96
 
97
 
98
  def log_end(success: bool, steps: int, rewards: List[float]) -> None:
99
- rewards_str = ",".join(f"{r:.2f}" for r in rewards)
100
  print(
101
  f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}",
102
  flush=True,
@@ -193,7 +199,7 @@ def run_task(env: RedVeilEnvironment, client: OpenAI, task_id: str) -> dict:
193
  obs = env.step(action)
194
 
195
  # Track reward
196
- reward = obs.reward if obs.reward is not None else 0.0
197
  rewards.append(reward)
198
 
199
  # Log step
@@ -219,8 +225,8 @@ def run_task(env: RedVeilEnvironment, client: OpenAI, task_id: str) -> dict:
219
  # Get final score
220
  game_state = env.get_game_state()
221
  score = grade_task(game_state)
222
- score = min(max(score, 0.0), 1.0)
223
- success = score > 0.0
224
 
225
  log_end(success=success, steps=step_num, rewards=rewards)
226
 
 
86
  print(f"[START] task={task} env={env} model={model}", flush=True)
87
 
88
 
89
+ def _clamp_reward(r: float) -> float:
90
+ """Clamp reward to strictly (0, 1)."""
91
+ return max(0.01, min(0.99, r))
92
+
93
+
94
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
95
  error_val = error if error else "null"
96
  done_val = str(done).lower()
97
+ reward = _clamp_reward(reward)
98
  print(
99
  f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
100
  flush=True,
 
102
 
103
 
104
  def log_end(success: bool, steps: int, rewards: List[float]) -> None:
105
+ rewards_str = ",".join(f"{_clamp_reward(r):.2f}" for r in rewards)
106
  print(
107
  f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}",
108
  flush=True,
 
199
  obs = env.step(action)
200
 
201
  # Track reward
202
+ reward = obs.reward if obs.reward is not None else 0.01
203
  rewards.append(reward)
204
 
205
  # Log step
 
225
  # Get final score
226
  game_state = env.get_game_state()
227
  score = grade_task(game_state)
228
+ score = min(max(score, 0.01), 0.99)
229
+ success = score > 0.01
230
 
231
  log_end(success=success, steps=step_num, rewards=rewards)
232