Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- inference.py +8 -6
inference.py
CHANGED
|
@@ -127,7 +127,7 @@ def main():
|
|
| 127 |
rewards = []
|
| 128 |
is_done = False
|
| 129 |
steps_taken = 0
|
| 130 |
-
final_score = 0.
|
| 131 |
|
| 132 |
for turn in range(1, MAX_TURNS + 1):
|
| 133 |
try:
|
|
@@ -155,7 +155,7 @@ def main():
|
|
| 155 |
error_msg = str(e).replace('\n', ' ')
|
| 156 |
|
| 157 |
steps_taken += 1
|
| 158 |
-
reward_val = obs.reward if hasattr(obs, 'reward') else getattr(obs, 'grader_score', 0.
|
| 159 |
rewards.append(f"{reward_val:.2f}")
|
| 160 |
is_done = result.done if hasattr(result, 'done') else getattr(obs, 'done', False)
|
| 161 |
done_str = "true" if is_done else "false"
|
|
@@ -170,18 +170,20 @@ def main():
|
|
| 170 |
f"Command output:\n"
|
| 171 |
f"stdout:\n```\n{getattr(obs, 'stdout', '')}\n```\n"
|
| 172 |
f"stderr:\n```\n{getattr(obs, 'stderr', '')}\n```\n"
|
| 173 |
-
f"Current score: {getattr(obs, 'grader_score', 0.
|
| 174 |
f"Grader feedback: {getattr(obs, 'grader_feedback', '')}\n\n"
|
| 175 |
f"What command should I run next?"
|
| 176 |
),
|
| 177 |
})
|
| 178 |
|
| 179 |
-
final_score = getattr(obs, 'grader_score', 0.
|
| 180 |
-
if
|
| 181 |
break
|
| 182 |
|
|
|
|
|
|
|
| 183 |
success_str = "true" if final_score >= 0.99 else "false"
|
| 184 |
-
rewards_str = ",".join(rewards) if rewards else "0.
|
| 185 |
print(f"[END] success={success_str} steps={steps_taken} score={final_score:.2f} rewards={rewards_str}", flush=True)
|
| 186 |
except Exception as e:
|
| 187 |
# Make sure to emit END log even on catastrophic wrapper failures so Hackathon doesn't crash inference.py
|
|
|
|
| 127 |
rewards = []
|
| 128 |
is_done = False
|
| 129 |
steps_taken = 0
|
| 130 |
+
final_score = getattr(obs, 'grader_score', 0.01)
|
| 131 |
|
| 132 |
for turn in range(1, MAX_TURNS + 1):
|
| 133 |
try:
|
|
|
|
| 155 |
error_msg = str(e).replace('\n', ' ')
|
| 156 |
|
| 157 |
steps_taken += 1
|
| 158 |
+
reward_val = obs.reward if hasattr(obs, 'reward') else getattr(obs, 'grader_score', 0.01)
|
| 159 |
rewards.append(f"{reward_val:.2f}")
|
| 160 |
is_done = result.done if hasattr(result, 'done') else getattr(obs, 'done', False)
|
| 161 |
done_str = "true" if is_done else "false"
|
|
|
|
| 170 |
f"Command output:\n"
|
| 171 |
f"stdout:\n```\n{getattr(obs, 'stdout', '')}\n```\n"
|
| 172 |
f"stderr:\n```\n{getattr(obs, 'stderr', '')}\n```\n"
|
| 173 |
+
f"Current score: {getattr(obs, 'grader_score', 0.01)}/1.0\n"
|
| 174 |
f"Grader feedback: {getattr(obs, 'grader_feedback', '')}\n\n"
|
| 175 |
f"What command should I run next?"
|
| 176 |
),
|
| 177 |
})
|
| 178 |
|
| 179 |
+
final_score = getattr(obs, 'grader_score', 0.01)
|
| 180 |
+
if final_score >= 0.99 or getattr(obs, 'done', False) or (hasattr(result, 'done') and result.done):
|
| 181 |
break
|
| 182 |
|
| 183 |
+
# Clamp final score strictly within (0, 1)
|
| 184 |
+
final_score = max(0.01, min(0.99, final_score))
|
| 185 |
success_str = "true" if final_score >= 0.99 else "false"
|
| 186 |
+
rewards_str = ",".join(rewards) if rewards else "0.01"
|
| 187 |
print(f"[END] success={success_str} steps={steps_taken} score={final_score:.2f} rewards={rewards_str}", flush=True)
|
| 188 |
except Exception as e:
|
| 189 |
# Make sure to emit END log even on catastrophic wrapper failures so Hackathon doesn't crash inference.py
|