kushalExplores commited on
Commit
4a30ec6
·
verified ·
1 Parent(s): 821b7b8

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. inference.py +9 -2
inference.py CHANGED
@@ -40,6 +40,7 @@ CONNECT_TIMEOUT_S = float(os.getenv("OPENENV_CONNECT_TIMEOUT_S", "30"))
40
  MESSAGE_TIMEOUT_S = float(os.getenv("OPENENV_MESSAGE_TIMEOUT_S", "180"))
41
  DOCKER_WAIT_TIMEOUT_S = float(os.getenv("OPENENV_DOCKER_WAIT_TIMEOUT_S", "120"))
42
  TASK_RETRY_COUNT = int(os.getenv("OPENENV_TASK_RETRY_COUNT", "1"))
 
43
 
44
  SYSTEM_PROMPT = textwrap.dedent(
45
  """
@@ -92,9 +93,15 @@ def log_step(step: int, action: str, reward: float, done: bool, error: str | Non
92
  )
93
 
94
 
 
 
 
 
 
95
  def log_end(success: bool, steps: int, score: float, method_log: list[dict[str, Any]]) -> None:
 
96
  print(
97
- f"[END] success={str(success).lower()} steps={steps} score={score:.3f} methods={len(method_log)}",
98
  flush=True,
99
  )
100
  print(json.dumps({"method_log": method_log}, indent=2), flush=True)
@@ -543,7 +550,7 @@ async def run_single_task(
543
  "difficulty": task_spec.difficulty,
544
  "objective": task_spec.objective,
545
  "grader_name": task_spec.grader_name,
546
- "normalized_score": max(0.0, min(1.0, reward)),
547
  "done": final_result.done,
548
  "success": success,
549
  "final_status": final_result.observation.status,
 
40
  MESSAGE_TIMEOUT_S = float(os.getenv("OPENENV_MESSAGE_TIMEOUT_S", "180"))
41
  DOCKER_WAIT_TIMEOUT_S = float(os.getenv("OPENENV_DOCKER_WAIT_TIMEOUT_S", "120"))
42
  TASK_RETRY_COUNT = int(os.getenv("OPENENV_TASK_RETRY_COUNT", "1"))
43
+ SCORE_EPSILON = float(os.getenv("OPENENV_SCORE_EPSILON", "0.000001"))
44
 
45
  SYSTEM_PROMPT = textwrap.dedent(
46
  """
 
93
  )
94
 
95
 
96
+ def bounded_task_score(score: float) -> float:
97
+ """Clamp task scores to the open interval (0, 1)."""
98
+ return min(1.0 - SCORE_EPSILON, max(SCORE_EPSILON, score))
99
+
100
+
101
  def log_end(success: bool, steps: int, score: float, method_log: list[dict[str, Any]]) -> None:
102
+ safe_score = bounded_task_score(score)
103
  print(
104
+ f"[END] success={str(success).lower()} steps={steps} score={safe_score:.6f} methods={len(method_log)}",
105
  flush=True,
106
  )
107
  print(json.dumps({"method_log": method_log}, indent=2), flush=True)
 
550
  "difficulty": task_spec.difficulty,
551
  "objective": task_spec.objective,
552
  "grader_name": task_spec.grader_name,
553
+ "normalized_score": bounded_task_score(reward),
554
  "done": final_result.done,
555
  "success": success,
556
  "final_status": final_result.observation.status,