Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- inference.py +2 -2
inference.py
CHANGED
|
@@ -707,7 +707,7 @@ async def run_episode(task_config: dict, client: OpenAI) -> dict:
|
|
| 707 |
|
| 708 |
rewards: List[float] = []
|
| 709 |
steps_taken = 0
|
| 710 |
-
score = 0.
|
| 711 |
success = False
|
| 712 |
last_result = None
|
| 713 |
history: List[dict] = []
|
|
@@ -790,7 +790,7 @@ async def run_episode(task_config: dict, client: OpenAI) -> dict:
|
|
| 790 |
# Shift by +1.5 so that the fail reward (-1.5) maps to 0 and max maps to 1.
|
| 791 |
_TEMPLATE_REWARD_CEIL = {1: 2.0, 3: 3.5, 6: 5.0}
|
| 792 |
_reward_ceil = _TEMPLATE_REWARD_CEIL.get(task_config.get("template_id"), 5.0)
|
| 793 |
-
score = max(0.
|
| 794 |
success = total_reward >= 0.5 # any positive terminal reward = success
|
| 795 |
|
| 796 |
vprint(f"\n[VERBOSE] ── episode end — {task_name} ──")
|
|
|
|
| 707 |
|
| 708 |
rewards: List[float] = []
|
| 709 |
steps_taken = 0
|
| 710 |
+
score = 0.01 # strict (0, 1) required by validator; 0.0 / 1.0 are rejected
|
| 711 |
success = False
|
| 712 |
last_result = None
|
| 713 |
history: List[dict] = []
|
|
|
|
| 790 |
# Shift by +1.5 so that the fail reward (-1.5) maps to 0 and max maps to 1.
|
| 791 |
_TEMPLATE_REWARD_CEIL = {1: 2.0, 3: 3.5, 6: 5.0}
|
| 792 |
_reward_ceil = _TEMPLATE_REWARD_CEIL.get(task_config.get("template_id"), 5.0)
|
| 793 |
+
score = max(0.01, min(0.99, (total_reward + 1.5) / (_reward_ceil + 1.5)))
|
| 794 |
success = total_reward >= 0.5 # any positive terminal reward = success
|
| 795 |
|
| 796 |
vprint(f"\n[VERBOSE] ── episode end — {task_name} ──")
|