Spaces:
Sleeping
Sleeping
Commit ·
e7cacff
1
Parent(s): 22c0d63
Strictly capped the score from 0 to 1 for phase 2 evaluation
Browse files- inference.py +4 -2
inference.py
CHANGED
|
@@ -199,8 +199,10 @@ def run_task(task_id: str, client: OpenAI) -> Dict[str, Any]:
|
|
| 199 |
log_step(steps, action, reward, done, error)
|
| 200 |
|
| 201 |
grader = http.get("/grader").json()
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
| 204 |
|
| 205 |
except Exception as exc:
|
| 206 |
print(f"Episode error ({task_id}): {exc}", file=sys.stderr)
|
|
|
|
| 199 |
log_step(steps, action, reward, done, error)
|
| 200 |
|
| 201 |
grader = http.get("/grader").json()
|
| 202 |
+
raw_score = grader.get("score", 0.0)
|
| 203 |
+
_EPS = 1e-6
|
| 204 |
+
score = min(max(raw_score, _EPS), 1.0 - _EPS)
|
| 205 |
+
log_end(success=score > _EPS, steps=steps, score=score, rewards=rewards)
|
| 206 |
|
| 207 |
except Exception as exc:
|
| 208 |
print(f"Episode error ({task_id}): {exc}", file=sys.stderr)
|