Spaces:
Sleeping
Sleeping
Upload inference.py with huggingface_hub
Browse files- inference.py +10 -4
inference.py
CHANGED
|
@@ -86,9 +86,15 @@ def log_start(task: str, env: str, model: str) -> None:
|
|
| 86 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 87 |
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 90 |
error_val = error if error else "null"
|
| 91 |
done_val = str(done).lower()
|
|
|
|
| 92 |
print(
|
| 93 |
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
|
| 94 |
flush=True,
|
|
@@ -96,7 +102,7 @@ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[
|
|
| 96 |
|
| 97 |
|
| 98 |
def log_end(success: bool, steps: int, rewards: List[float]) -> None:
|
| 99 |
-
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 100 |
print(
|
| 101 |
f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}",
|
| 102 |
flush=True,
|
|
@@ -193,7 +199,7 @@ def run_task(env: RedVeilEnvironment, client: OpenAI, task_id: str) -> dict:
|
|
| 193 |
obs = env.step(action)
|
| 194 |
|
| 195 |
# Track reward
|
| 196 |
-
reward = obs.reward if obs.reward is not None else 0.
|
| 197 |
rewards.append(reward)
|
| 198 |
|
| 199 |
# Log step
|
|
@@ -219,8 +225,8 @@ def run_task(env: RedVeilEnvironment, client: OpenAI, task_id: str) -> dict:
|
|
| 219 |
# Get final score
|
| 220 |
game_state = env.get_game_state()
|
| 221 |
score = grade_task(game_state)
|
| 222 |
-
score = min(max(score, 0.
|
| 223 |
-
success = score > 0.
|
| 224 |
|
| 225 |
log_end(success=success, steps=step_num, rewards=rewards)
|
| 226 |
|
|
|
|
| 86 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 87 |
|
| 88 |
|
| 89 |
+
def _clamp_reward(r: float) -> float:
|
| 90 |
+
"""Clamp reward to strictly (0, 1)."""
|
| 91 |
+
return max(0.01, min(0.99, r))
|
| 92 |
+
|
| 93 |
+
|
| 94 |
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 95 |
error_val = error if error else "null"
|
| 96 |
done_val = str(done).lower()
|
| 97 |
+
reward = _clamp_reward(reward)
|
| 98 |
print(
|
| 99 |
f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
|
| 100 |
flush=True,
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
def log_end(success: bool, steps: int, rewards: List[float]) -> None:
|
| 105 |
+
rewards_str = ",".join(f"{_clamp_reward(r):.2f}" for r in rewards)
|
| 106 |
print(
|
| 107 |
f"[END] success={str(success).lower()} steps={steps} rewards={rewards_str}",
|
| 108 |
flush=True,
|
|
|
|
| 199 |
obs = env.step(action)
|
| 200 |
|
| 201 |
# Track reward
|
| 202 |
+
reward = obs.reward if obs.reward is not None else 0.01
|
| 203 |
rewards.append(reward)
|
| 204 |
|
| 205 |
# Log step
|
|
|
|
| 225 |
# Get final score
|
| 226 |
game_state = env.get_game_state()
|
| 227 |
score = grade_task(game_state)
|
| 228 |
+
score = min(max(score, 0.01), 0.99)
|
| 229 |
+
success = score > 0.01
|
| 230 |
|
| 231 |
log_end(success=success, steps=step_num, rewards=rewards)
|
| 232 |
|