Spaces:
Sleeping
Sleeping
Sumit Saraswat commited on
Commit Β·
9cbeffe
1
Parent(s): f8de88f
fix: [START]/[STEP]/[END] structured output for Meta Phase 2 validation
Browse files- EnvLoggerWrapper now tracks task_id, step_count, reward, score
- Exact format: [START] task=NAME, [STEP] step=N reward=X.XX, [END] task=NAME score=X.XX steps=N
- All prints use flush=True to prevent Docker buffer trapping
- OpenAI client uses exact spec: OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
- No stderr writes, no stdout redirect, no requests.post fallbacks
- Tested: all 3 tasks emit correct structured blocks
- inference.py +38 -4
inference.py
CHANGED
|
@@ -155,8 +155,14 @@ class MetricsTracker:
|
|
| 155 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 156 |
|
| 157 |
class EnvLoggerWrapper:
|
|
|
|
|
|
|
| 158 |
def __init__(self, env):
|
| 159 |
self.env = env
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
def __enter__(self):
|
| 162 |
if hasattr(self.env, "__enter__"):
|
|
@@ -164,18 +170,46 @@ class EnvLoggerWrapper:
|
|
| 164 |
return self
|
| 165 |
|
| 166 |
def __exit__(self, exc_type, exc, tb):
|
| 167 |
-
|
|
|
|
| 168 |
if hasattr(self.env, "__exit__"):
|
| 169 |
return self.env.__exit__(exc_type, exc, tb)
|
| 170 |
return False
|
| 171 |
|
| 172 |
def reset(self, **kwargs):
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
return self.env.reset(**kwargs)
|
| 175 |
|
| 176 |
def step(self, action):
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
class InProcessEnvSession:
|
| 181 |
def __init__(self):
|
|
|
|
| 155 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 156 |
|
| 157 |
class EnvLoggerWrapper:
|
| 158 |
+
"""Wrapper that emits [START]/[STEP]/[END] structured output for Meta's validation bot."""
|
| 159 |
+
|
| 160 |
def __init__(self, env):
|
| 161 |
self.env = env
|
| 162 |
+
self._task_id = ""
|
| 163 |
+
self._step_count = 0
|
| 164 |
+
self._score = 0.0
|
| 165 |
+
self._last_reward = 0.0
|
| 166 |
|
| 167 |
def __enter__(self):
|
| 168 |
if hasattr(self.env, "__enter__"):
|
|
|
|
| 170 |
return self
|
| 171 |
|
| 172 |
def __exit__(self, exc_type, exc, tb):
|
| 173 |
+
# Emit END block with exact format Meta expects
|
| 174 |
+
print(f"[END] task={self._task_id} score={self._score:.2f} steps={self._step_count}", flush=True)
|
| 175 |
if hasattr(self.env, "__exit__"):
|
| 176 |
return self.env.__exit__(exc_type, exc, tb)
|
| 177 |
return False
|
| 178 |
|
| 179 |
def reset(self, **kwargs):
|
| 180 |
+
# Extract task_id and reset counters
|
| 181 |
+
self._task_id = kwargs.get("task_id", "unknown")
|
| 182 |
+
self._step_count = 0
|
| 183 |
+
self._score = 0.0
|
| 184 |
+
self._last_reward = 0.0
|
| 185 |
+
# Emit START block
|
| 186 |
+
print(f"[START] task={self._task_id}", flush=True)
|
| 187 |
return self.env.reset(**kwargs)
|
| 188 |
|
| 189 |
def step(self, action):
|
| 190 |
+
result = self.env.step(action)
|
| 191 |
+
self._step_count += 1
|
| 192 |
+
|
| 193 |
+
# Safely extract reward and score from the observation
|
| 194 |
+
try:
|
| 195 |
+
if hasattr(result, 'reward'):
|
| 196 |
+
self._last_reward = float(result.reward or 0.0)
|
| 197 |
+
if hasattr(result, 'observation'):
|
| 198 |
+
obs = result.observation
|
| 199 |
+
if hasattr(obs, 'model_dump'):
|
| 200 |
+
obs_dict = obs.model_dump()
|
| 201 |
+
elif isinstance(obs, dict):
|
| 202 |
+
obs_dict = obs
|
| 203 |
+
else:
|
| 204 |
+
obs_dict = {}
|
| 205 |
+
self._score = float(obs_dict.get("score_so_far", self._score))
|
| 206 |
+
except Exception:
|
| 207 |
+
pass # Never crash on logging
|
| 208 |
+
|
| 209 |
+
# Emit STEP block
|
| 210 |
+
print(f"[STEP] step={self._step_count} reward={self._last_reward:.2f}", flush=True)
|
| 211 |
+
return result
|
| 212 |
+
|
| 213 |
|
| 214 |
class InProcessEnvSession:
|
| 215 |
def __init__(self):
|