Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- inference.py +9 -2
inference.py
CHANGED
|
@@ -40,6 +40,7 @@ CONNECT_TIMEOUT_S = float(os.getenv("OPENENV_CONNECT_TIMEOUT_S", "30"))
|
|
| 40 |
MESSAGE_TIMEOUT_S = float(os.getenv("OPENENV_MESSAGE_TIMEOUT_S", "180"))
|
| 41 |
DOCKER_WAIT_TIMEOUT_S = float(os.getenv("OPENENV_DOCKER_WAIT_TIMEOUT_S", "120"))
|
| 42 |
TASK_RETRY_COUNT = int(os.getenv("OPENENV_TASK_RETRY_COUNT", "1"))
|
|
|
|
| 43 |
|
| 44 |
SYSTEM_PROMPT = textwrap.dedent(
|
| 45 |
"""
|
|
@@ -92,9 +93,15 @@ def log_step(step: int, action: str, reward: float, done: bool, error: str | Non
|
|
| 92 |
)
|
| 93 |
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
def log_end(success: bool, steps: int, score: float, method_log: list[dict[str, Any]]) -> None:
|
|
|
|
| 96 |
print(
|
| 97 |
-
f"[END] success={str(success).lower()} steps={steps} score={
|
| 98 |
flush=True,
|
| 99 |
)
|
| 100 |
print(json.dumps({"method_log": method_log}, indent=2), flush=True)
|
|
@@ -543,7 +550,7 @@ async def run_single_task(
|
|
| 543 |
"difficulty": task_spec.difficulty,
|
| 544 |
"objective": task_spec.objective,
|
| 545 |
"grader_name": task_spec.grader_name,
|
| 546 |
-
"normalized_score":
|
| 547 |
"done": final_result.done,
|
| 548 |
"success": success,
|
| 549 |
"final_status": final_result.observation.status,
|
|
|
|
| 40 |
MESSAGE_TIMEOUT_S = float(os.getenv("OPENENV_MESSAGE_TIMEOUT_S", "180"))
|
| 41 |
DOCKER_WAIT_TIMEOUT_S = float(os.getenv("OPENENV_DOCKER_WAIT_TIMEOUT_S", "120"))
|
| 42 |
TASK_RETRY_COUNT = int(os.getenv("OPENENV_TASK_RETRY_COUNT", "1"))
|
| 43 |
+
SCORE_EPSILON = float(os.getenv("OPENENV_SCORE_EPSILON", "0.000001"))
|
| 44 |
|
| 45 |
SYSTEM_PROMPT = textwrap.dedent(
|
| 46 |
"""
|
|
|
|
| 93 |
)
|
| 94 |
|
| 95 |
|
| 96 |
+
def bounded_task_score(score: float) -> float:
|
| 97 |
+
"""Clamp task scores to the open interval (0, 1)."""
|
| 98 |
+
return min(1.0 - SCORE_EPSILON, max(SCORE_EPSILON, score))
|
| 99 |
+
|
| 100 |
+
|
| 101 |
def log_end(success: bool, steps: int, score: float, method_log: list[dict[str, Any]]) -> None:
|
| 102 |
+
safe_score = bounded_task_score(score)
|
| 103 |
print(
|
| 104 |
+
f"[END] success={str(success).lower()} steps={steps} score={safe_score:.6f} methods={len(method_log)}",
|
| 105 |
flush=True,
|
| 106 |
)
|
| 107 |
print(json.dumps({"method_log": method_log}, indent=2), flush=True)
|
|
|
|
| 550 |
"difficulty": task_spec.difficulty,
|
| 551 |
"objective": task_spec.objective,
|
| 552 |
"grader_name": task_spec.grader_name,
|
| 553 |
+
"normalized_score": bounded_task_score(reward),
|
| 554 |
"done": final_result.done,
|
| 555 |
"success": success,
|
| 556 |
"final_status": final_result.observation.status,
|