srishtichugh commited on
Commit
53ae9f0
·
1 Parent(s): fa53e30

Fix scoring

Browse files
Files changed (3) hide show
  1. inference.py +19 -2
  2. inference_log.txt +0 -0
  3. server/environment.py +16 -3
inference.py CHANGED
@@ -112,6 +112,23 @@ def api_get(path: str) -> dict:
112
  resp.raise_for_status()
113
  return resp.json()
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  # ------------------------------------------------------------------
117
  # Agent loop
@@ -228,7 +245,7 @@ def run_task(task_id: int) -> float:
228
  finally:
229
  log_end(success=success, steps=steps_taken, rewards=rewards)
230
 
231
- final_score = obs["current_score"]
232
  print(
233
  f"\n Task {task_id} final score: {final_score:.4f} (steps used: {obs['step_count']})",
234
  file=sys.stderr,
@@ -268,7 +285,7 @@ def main():
268
  print("="*60, file=sys.stderr)
269
  for k, v in scores.items():
270
  print(f" {k}: {v:.4f}", file=sys.stderr)
271
- avg = sum(scores.values()) / len(scores)
272
  print(f" average: {avg:.4f}", file=sys.stderr)
273
  print("="*60, file=sys.stderr)
274
 
 
112
  resp.raise_for_status()
113
  return resp.json()
114
 
115
+ # ------------------------------------------------------------------
116
+ # Score sanitizer
117
+ # ------------------------------------------------------------------
118
+
119
+ def sanitize_score(score: float) -> float:
120
+ """
121
+ Ensures score is strictly within (0, 1)
122
+ required by hackathon validator.
123
+ """
124
+ EPS = 1e-4
125
+
126
+ if score >= 1.0:
127
+ return 1.0 - EPS
128
+ if score <= 0.0:
129
+ return EPS
130
+
131
+ return float(score)
132
 
133
  # ------------------------------------------------------------------
134
  # Agent loop
 
245
  finally:
246
  log_end(success=success, steps=steps_taken, rewards=rewards)
247
 
248
+ final_score = sanitize_score(obs["current_score"])
249
  print(
250
  f"\n Task {task_id} final score: {final_score:.4f} (steps used: {obs['step_count']})",
251
  file=sys.stderr,
 
285
  print("="*60, file=sys.stderr)
286
  for k, v in scores.items():
287
  print(f" {k}: {v:.4f}", file=sys.stderr)
288
+ avg = sanitize_score(sum(scores.values()) / len(scores))
289
  print(f" average: {avg:.4f}", file=sys.stderr)
290
  print("="*60, file=sys.stderr)
291
 
inference_log.txt CHANGED
Binary files a/inference_log.txt and b/inference_log.txt differ
 
server/environment.py CHANGED
@@ -111,11 +111,24 @@ class DataCleaningEnvironment:
111
 
112
  def _compute_score(self) -> float:
113
  if self._task_id == 1:
114
- return t1.score(self._df, self._meta)
115
  elif self._task_id == 2:
116
- return t2.score(self._df, self._meta)
117
  else:
118
- return t3.score(self._df, self._meta)
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  def _count_errors(self) -> int:
121
  if self._task_id == 1:
 
111
 
112
  def _compute_score(self) -> float:
113
  if self._task_id == 1:
114
+ raw = t1.score(self._df, self._meta)
115
  elif self._task_id == 2:
116
+ raw = t2.score(self._df, self._meta)
117
  else:
118
+ raw = t3.score(self._df, self._meta)
119
+
120
+ EPS = 1e-4
121
+
122
+ # First round safely
123
+ raw = float(raw)
124
+
125
+ # HARD clamp AFTER rounding risk
126
+ if raw >= 1.0:
127
+ raw = 1.0 - EPS
128
+ elif raw <= 0.0:
129
+ raw = EPS
130
+
131
+ return round(raw, 4)
132
 
133
  def _count_errors(self) -> int:
134
  if self._task_id == 1: