Sumit Saraswat commited on
Commit
9cbeffe
Β·
1 Parent(s): f8de88f

fix: [START]/[STEP]/[END] structured output for Meta Phase 2 validation

Browse files

- EnvLoggerWrapper now tracks task_id, step_count, reward, score
- Exact format: [START] task=NAME, [STEP] step=N reward=X.XX, [END] task=NAME score=X.XX steps=N
- All prints use flush=True to prevent Docker buffer trapping
- OpenAI client uses exact spec: OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
- No stderr writes, no stdout redirect, no requests.post fallbacks
- Tested: all 3 tasks emit correct structured blocks

Files changed (1) hide show
  1. inference.py +38 -4
inference.py CHANGED
@@ -155,8 +155,14 @@ class MetricsTracker:
155
  # ═══════════════════════════════════════════════════════════════
156
 
157
  class EnvLoggerWrapper:
 
 
158
  def __init__(self, env):
159
  self.env = env
 
 
 
 
160
 
161
  def __enter__(self):
162
  if hasattr(self.env, "__enter__"):
@@ -164,18 +170,46 @@ class EnvLoggerWrapper:
164
  return self
165
 
166
  def __exit__(self, exc_type, exc, tb):
167
- print("END")
 
168
  if hasattr(self.env, "__exit__"):
169
  return self.env.__exit__(exc_type, exc, tb)
170
  return False
171
 
172
  def reset(self, **kwargs):
173
- print("START")
 
 
 
 
 
 
174
  return self.env.reset(**kwargs)
175
 
176
  def step(self, action):
177
- print("STEP")
178
- return self.env.step(action)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  class InProcessEnvSession:
181
  def __init__(self):
 
155
  # ═══════════════════════════════════════════════════════════════
156
 
157
  class EnvLoggerWrapper:
158
+ """Wrapper that emits [START]/[STEP]/[END] structured output for Meta's validation bot."""
159
+
160
  def __init__(self, env):
161
  self.env = env
162
+ self._task_id = ""
163
+ self._step_count = 0
164
+ self._score = 0.0
165
+ self._last_reward = 0.0
166
 
167
  def __enter__(self):
168
  if hasattr(self.env, "__enter__"):
 
170
  return self
171
 
172
  def __exit__(self, exc_type, exc, tb):
173
+ # Emit END block with exact format Meta expects
174
+ print(f"[END] task={self._task_id} score={self._score:.2f} steps={self._step_count}", flush=True)
175
  if hasattr(self.env, "__exit__"):
176
  return self.env.__exit__(exc_type, exc, tb)
177
  return False
178
 
179
  def reset(self, **kwargs):
180
+ # Extract task_id and reset counters
181
+ self._task_id = kwargs.get("task_id", "unknown")
182
+ self._step_count = 0
183
+ self._score = 0.0
184
+ self._last_reward = 0.0
185
+ # Emit START block
186
+ print(f"[START] task={self._task_id}", flush=True)
187
  return self.env.reset(**kwargs)
188
 
189
  def step(self, action):
190
+ result = self.env.step(action)
191
+ self._step_count += 1
192
+
193
+ # Safely extract reward and score from the observation
194
+ try:
195
+ if hasattr(result, 'reward'):
196
+ self._last_reward = float(result.reward or 0.0)
197
+ if hasattr(result, 'observation'):
198
+ obs = result.observation
199
+ if hasattr(obs, 'model_dump'):
200
+ obs_dict = obs.model_dump()
201
+ elif isinstance(obs, dict):
202
+ obs_dict = obs
203
+ else:
204
+ obs_dict = {}
205
+ self._score = float(obs_dict.get("score_so_far", self._score))
206
+ except Exception:
207
+ pass # Never crash on logging
208
+
209
+ # Emit STEP block
210
+ print(f"[STEP] step={self._step_count} reward={self._last_reward:.2f}", flush=True)
211
+ return result
212
+
213
 
214
  class InProcessEnvSession:
215
  def __init__(self):