Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- inference.py +10 -3
inference.py
CHANGED
|
@@ -84,8 +84,9 @@ def log_payload_generator_methods(tool_name: str, generator_methods: list[dict[s
|
|
| 84 |
|
| 85 |
def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
|
| 86 |
error_val = error if error else "null"
|
|
|
|
| 87 |
print(
|
| 88 |
-
f"[STEP] step={step} action={action} reward={
|
| 89 |
flush=True,
|
| 90 |
)
|
| 91 |
|
|
@@ -95,10 +96,16 @@ def bounded_task_score(score: float) -> float:
|
|
| 95 |
return min(1.0 - SCORE_EPSILON, max(SCORE_EPSILON, score))
|
| 96 |
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
|
| 99 |
-
|
|
|
|
| 100 |
print(
|
| 101 |
-
f"[END] success={str(success).lower()} steps={steps} score={
|
| 102 |
flush=True,
|
| 103 |
)
|
| 104 |
|
|
|
|
| 84 |
|
| 85 |
def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
|
| 86 |
error_val = error if error else "null"
|
| 87 |
+
clipped_reward = bounded_log_reward(reward)
|
| 88 |
print(
|
| 89 |
+
f"[STEP] step={step} action={action} reward={clipped_reward:.2f} done={str(done).lower()} error={error_val}",
|
| 90 |
flush=True,
|
| 91 |
)
|
| 92 |
|
|
|
|
| 96 |
return min(1.0 - SCORE_EPSILON, max(SCORE_EPSILON, score))
|
| 97 |
|
| 98 |
|
| 99 |
+
def bounded_log_reward(reward: float) -> float:
|
| 100 |
+
"""Clamp logged scores and rewards to [0.01, 0.99]."""
|
| 101 |
+
return min(0.99, max(0.01, reward))
|
| 102 |
+
|
| 103 |
+
|
| 104 |
def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
|
| 105 |
+
clipped_score = bounded_log_reward(score)
|
| 106 |
+
rewards_str = ",".join(f"{bounded_log_reward(reward):.2f}" for reward in rewards)
|
| 107 |
print(
|
| 108 |
+
f"[END] success={str(success).lower()} steps={steps} score={clipped_score:.2f} rewards={rewards_str}",
|
| 109 |
flush=True,
|
| 110 |
)
|
| 111 |
|