DEVessi commited on
Commit
1f6c7ae
·
verified ·
1 Parent(s): 4fd3038

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. inference.py +8 -6
inference.py CHANGED
@@ -127,7 +127,7 @@ def main():
127
  rewards = []
128
  is_done = False
129
  steps_taken = 0
130
- final_score = 0.0
131
 
132
  for turn in range(1, MAX_TURNS + 1):
133
  try:
@@ -155,7 +155,7 @@ def main():
155
  error_msg = str(e).replace('\n', ' ')
156
 
157
  steps_taken += 1
158
- reward_val = obs.reward if hasattr(obs, 'reward') else getattr(obs, 'grader_score', 0.0)
159
  rewards.append(f"{reward_val:.2f}")
160
  is_done = result.done if hasattr(result, 'done') else getattr(obs, 'done', False)
161
  done_str = "true" if is_done else "false"
@@ -170,18 +170,20 @@ def main():
170
  f"Command output:\n"
171
  f"stdout:\n```\n{getattr(obs, 'stdout', '')}\n```\n"
172
  f"stderr:\n```\n{getattr(obs, 'stderr', '')}\n```\n"
173
- f"Current score: {getattr(obs, 'grader_score', 0.0)}/1.0\n"
174
  f"Grader feedback: {getattr(obs, 'grader_feedback', '')}\n\n"
175
  f"What command should I run next?"
176
  ),
177
  })
178
 
179
- final_score = getattr(obs, 'grader_score', 0.0)
180
- if getattr(obs, 'grader_score', 0.0) >= 0.99 or getattr(obs, 'done', False) or (hasattr(result, 'done') and result.done):
181
  break
182
 
 
 
183
  success_str = "true" if final_score >= 0.99 else "false"
184
- rewards_str = ",".join(rewards) if rewards else "0.00"
185
  print(f"[END] success={success_str} steps={steps_taken} score={final_score:.2f} rewards={rewards_str}", flush=True)
186
  except Exception as e:
187
  # Make sure to emit END log even on catastrophic wrapper failures so Hackathon doesn't crash inference.py
 
127
  rewards = []
128
  is_done = False
129
  steps_taken = 0
130
+ final_score = getattr(obs, 'grader_score', 0.01)
131
 
132
  for turn in range(1, MAX_TURNS + 1):
133
  try:
 
155
  error_msg = str(e).replace('\n', ' ')
156
 
157
  steps_taken += 1
158
+ reward_val = obs.reward if hasattr(obs, 'reward') else getattr(obs, 'grader_score', 0.01)
159
  rewards.append(f"{reward_val:.2f}")
160
  is_done = result.done if hasattr(result, 'done') else getattr(obs, 'done', False)
161
  done_str = "true" if is_done else "false"
 
170
  f"Command output:\n"
171
  f"stdout:\n```\n{getattr(obs, 'stdout', '')}\n```\n"
172
  f"stderr:\n```\n{getattr(obs, 'stderr', '')}\n```\n"
173
+ f"Current score: {getattr(obs, 'grader_score', 0.01)}/1.0\n"
174
  f"Grader feedback: {getattr(obs, 'grader_feedback', '')}\n\n"
175
  f"What command should I run next?"
176
  ),
177
  })
178
 
179
+ final_score = getattr(obs, 'grader_score', 0.01)
180
+ if final_score >= 0.99 or getattr(obs, 'done', False) or (hasattr(result, 'done') and result.done):
181
  break
182
 
183
+ # Clamp final score strictly within (0, 1)
184
+ final_score = max(0.01, min(0.99, final_score))
185
  success_str = "true" if final_score >= 0.99 else "false"
186
+ rewards_str = ",".join(rewards) if rewards else "0.01"
187
  print(f"[END] success={success_str} steps={steps_taken} score={final_score:.2f} rewards={rewards_str}", flush=True)
188
  except Exception as e:
189
  # Make sure to emit END log even on catastrophic wrapper failures so Hackathon doesn't crash inference.py