kdcyberdude commited on
Commit
b987e0b
·
verified ·
1 Parent(s): 13f9ac5

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. inference.py +2 -2
inference.py CHANGED
@@ -707,7 +707,7 @@ async def run_episode(task_config: dict, client: OpenAI) -> dict:
707
 
708
  rewards: List[float] = []
709
  steps_taken = 0
710
- score = 0.001 # strict (0, 1) required by validator; 0.0 / 1.0 are rejected
711
  success = False
712
  last_result = None
713
  history: List[dict] = []
@@ -790,7 +790,7 @@ async def run_episode(task_config: dict, client: OpenAI) -> dict:
790
  # Shift by +1.5 so that the fail reward (-1.5) maps to 0 and max maps to 1.
791
  _TEMPLATE_REWARD_CEIL = {1: 2.0, 3: 3.5, 6: 5.0}
792
  _reward_ceil = _TEMPLATE_REWARD_CEIL.get(task_config.get("template_id"), 5.0)
793
- score = max(0.001, min(0.999, (total_reward + 1.5) / (_reward_ceil + 1.5)))
794
  success = total_reward >= 0.5 # any positive terminal reward = success
795
 
796
  vprint(f"\n[VERBOSE] ── episode end — {task_name} ──")
 
707
 
708
  rewards: List[float] = []
709
  steps_taken = 0
710
+ score = 0.01 # strict (0, 1) required by validator; 0.0 / 1.0 are rejected
711
  success = False
712
  last_result = None
713
  history: List[dict] = []
 
790
  # Shift by +1.5 so that the fail reward (-1.5) maps to 0 and max maps to 1.
791
  _TEMPLATE_REWARD_CEIL = {1: 2.0, 3: 3.5, 6: 5.0}
792
  _reward_ceil = _TEMPLATE_REWARD_CEIL.get(task_config.get("template_id"), 5.0)
793
+ score = max(0.01, min(0.99, (total_reward + 1.5) / (_reward_ceil + 1.5)))
794
  success = total_reward >= 0.5 # any positive terminal reward = success
795
 
796
  vprint(f"\n[VERBOSE] ── episode end — {task_name} ──")