Spaces:
Sleeping
Sleeping
Commit Β·
332cfd0
1
Parent(s): 4ad3f24
Fix [END] format: restore score= field, add flush=True to all log prints
Browse files- inference.py +4 -3
inference.py
CHANGED
|
@@ -49,7 +49,7 @@ def run_task(client, model_name: str, task_config):
|
|
| 49 |
)
|
| 50 |
obs = env.reset()
|
| 51 |
|
| 52 |
-
print(f"[START] task={task_config.name} env=negotiation model={model_name}")
|
| 53 |
|
| 54 |
done = False
|
| 55 |
step_n = 0
|
|
@@ -153,14 +153,15 @@ Respond with ONLY your chosen action, nothing else."""
|
|
| 153 |
|
| 154 |
# ββ Log step (stdout β parsed by judges) ββ
|
| 155 |
log_action = action_str if not action_str.startswith("OFFER") else f"OFFER {action_price}"
|
| 156 |
-
print(f"[STEP] step={step_n} action={log_action} reward={reward:.2f} done={str(done).lower()} error={error_msg}")
|
| 157 |
|
| 158 |
finally:
|
| 159 |
# [END] MUST always be printed, even on exceptions
|
| 160 |
grader = get_grader(task_config)
|
| 161 |
result = grader.grade(rewards, step_n, deal_made)
|
| 162 |
rewards_str = ",".join([f"{r:.2f}" for r in rewards])
|
| 163 |
-
|
|
|
|
| 164 |
|
| 165 |
return result
|
| 166 |
|
|
|
|
| 49 |
)
|
| 50 |
obs = env.reset()
|
| 51 |
|
| 52 |
+
print(f"[START] task={task_config.name} env=negotiation model={model_name}", flush=True)
|
| 53 |
|
| 54 |
done = False
|
| 55 |
step_n = 0
|
|
|
|
| 153 |
|
| 154 |
# ββ Log step (stdout β parsed by judges) ββ
|
| 155 |
log_action = action_str if not action_str.startswith("OFFER") else f"OFFER {action_price}"
|
| 156 |
+
print(f"[STEP] step={step_n} action={log_action} reward={reward:.2f} done={str(done).lower()} error={error_msg}", flush=True)
|
| 157 |
|
| 158 |
finally:
|
| 159 |
# [END] MUST always be printed, even on exceptions
|
| 160 |
grader = get_grader(task_config)
|
| 161 |
result = grader.grade(rewards, step_n, deal_made)
|
| 162 |
rewards_str = ",".join([f"{r:.2f}" for r in rewards])
|
| 163 |
+
score = result['score']
|
| 164 |
+
print(f"[END] success={str(result['success']).lower()} steps={step_n} score={score:.4f} rewards={rewards_str}", flush=True)
|
| 165 |
|
| 166 |
return result
|
| 167 |
|