Don Rishabh commited on
Commit
86be5e0
·
1 Parent(s): 433bfad

demo: filter tasks dead on target (verbose=0 AND trained=0)

Browse files
Files changed (1) hide show
  1. space-demo/app.py +18 -4
space-demo/app.py CHANGED
@@ -76,15 +76,29 @@ def load_demo_rows() -> List[Dict]:
76
  with urllib.request.urlopen(req) as r:
77
  text = r.read().decode("utf-8")
78
  rows = list(csv.DictReader(io.StringIO(text)))
 
79
 
80
- def _delta(r: Dict) -> float:
81
  try:
82
- return float(r.get("reward_delta_trained_minus_base") or 0)
83
  except ValueError:
84
  return 0.0
85
 
86
- rows.sort(key=_delta, reverse=True)
87
- print(f"[demo] loaded {len(rows)} rows", flush=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  return rows
89
 
90
 
 
76
  with urllib.request.urlopen(req) as r:
77
  text = r.read().decode("utf-8")
78
  rows = list(csv.DictReader(io.StringIO(text)))
79
+ n_total = len(rows)
80
 
81
+ def _f(r: Dict, k: str) -> float:
82
  try:
83
+ return float(r.get(k) or 0)
84
  except ValueError:
85
  return 0.0
86
 
87
+ # Filter out tasks that are dead on this target — both the human
88
+ # verbose prompt AND the trained agent's prompt score 0. Those are
89
+ # tasks where the target genuinely can't do the task regardless of
90
+ # prompt, and they just clutter the demo UI dropdown.
91
+ def _alive(r: Dict) -> bool:
92
+ return (_f(r, "verbose_accuracy") > 0
93
+ or _f(r, "trained_accuracy") > 0
94
+ or _f(r, "trained_reward") > 0)
95
+
96
+ rows = [r for r in rows if _alive(r)]
97
+
98
+ # Sort by trained reward (desc) — most interesting tasks first
99
+ rows.sort(key=lambda r: _f(r, "trained_reward"), reverse=True)
100
+ print(f"[demo] loaded {len(rows)}/{n_total} rows "
101
+ f"(filtered out tasks dead on this target)", flush=True)
102
  return rows
103
 
104