siddeshwar-kagatikar commited on
Commit
9f98669
·
1 Parent(s): 957b4b2

Add pre/post benchmark dashboard options in Space UI

Browse files

Replace the single default dashboard entry on the Space home page with
explicit Post-Training and Pre-Training dashboard links, and wire those
to dedicated routes. Update the compare-dashboard path mapping to use
pre_training_benchmark_dashboard_original.html and keep compatibility
aliases for existing /dashboard/finetuned and /dashboard/base URLs.

Made-with: Cursor

artifacts/local_hf_eval_round_006/pre_training_benchmark_dashboard_original.html ADDED
The diff for this file is too large to render. See raw diff
 
server.py CHANGED
@@ -39,8 +39,8 @@ SPACE_DASHBOARD = Path("artifacts/space_dashboard.html")
39
  LATEST_BASELINE_OUTPUT = Path("artifacts/baselines/openai_fixed_levels_latest.json")
40
  LATEST_EVALUATION_OUTPUT = Path("artifacts/latest_evaluation.json")
41
  OPENENV_SPEC_PATH = Path("openenv.yaml")
42
- COMPARE_FINETUNED_DASHBOARD = Path("artifacts/local_hf_eval_round_006/post_training_benchmark_dashboard.html")
43
- COMPARE_BASE_DASHBOARD = Path("artifacts/local_hf_eval_round_006/post_training_benchmark_dashboard_original.html")
44
 
45
  _SESSION_LOCK = Lock()
46
  _SESSIONS: dict[str, OSINTEnvironment] = {}
@@ -253,8 +253,8 @@ def _preview_snapshot() -> dict[str, Any]:
253
  def _space_snapshot() -> dict[str, Any]:
254
  snapshot = dict(_base_environment_snapshot())
255
  compare_dashboards = {
256
- "finetuned": str(COMPARE_FINETUNED_DASHBOARD),
257
- "base": str(COMPARE_BASE_DASHBOARD),
258
  }
259
  available_compare_dashboards = {
260
  name: path for name, path in compare_dashboards.items() if Path(path).exists()
@@ -319,13 +319,13 @@ def home() -> str:
319
  compare_dashboards = snapshot.get("compare_dashboard_paths", {})
320
  compare_links_html = ""
321
  if compare_dashboards:
322
- finetuned_link = ""
323
- base_link = ""
324
- if compare_dashboards.get("finetuned"):
325
- finetuned_link = '<a class="button" href="/dashboard/finetuned">Finetuned Dashboard</a>'
326
- if compare_dashboards.get("base"):
327
- base_link = '<a class="button secondary" href="/dashboard/base">Base Dashboard</a>'
328
- compare_links_html = f"<div style=\"margin-top:10px\">{finetuned_link}{base_link}</div>"
329
  difficulty_html = "".join(
330
  f"<li><strong>{level}</strong>: {count}</li>"
331
  for level, count in sorted(snapshot["difficulty_counts"].items())
@@ -409,7 +409,6 @@ def home() -> str:
409
  <h1>OSINT OpenEnv Space</h1>
410
  <p class="muted">A containerized OpenEnv-compatible benchmark for synthetic OSINT reasoning over profiles, forum threads, posts, aliases, organizations, locations, and event links.</p>
411
  <p>The Space boots with the fixed-level benchmark so visitors get a stable environment snapshot instead of a different graph every restart.</p>
412
- <a class="button" href="/dashboard">Open Dashboard</a>
413
  <a class="link" href="/api/environment">Environment JSON</a>
414
  {compare_links_html}
415
  </section>
@@ -591,18 +590,28 @@ def dashboard() -> FileResponse:
591
  return FileResponse(snapshot["dashboard_path"], media_type="text/html")
592
 
593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  @app.get("/dashboard/finetuned")
595
- def dashboard_finetuned() -> FileResponse:
596
- if not COMPARE_FINETUNED_DASHBOARD.exists():
597
- raise HTTPException(status_code=404, detail="Finetuned dashboard not found")
598
- return FileResponse(COMPARE_FINETUNED_DASHBOARD, media_type="text/html")
599
 
600
 
601
  @app.get("/dashboard/base")
602
- def dashboard_base() -> FileResponse:
603
- if not COMPARE_BASE_DASHBOARD.exists():
604
- raise HTTPException(status_code=404, detail="Base dashboard not found")
605
- return FileResponse(COMPARE_BASE_DASHBOARD, media_type="text/html")
606
 
607
 
608
  if __name__ == "__main__":
 
39
  LATEST_BASELINE_OUTPUT = Path("artifacts/baselines/openai_fixed_levels_latest.json")
40
  LATEST_EVALUATION_OUTPUT = Path("artifacts/latest_evaluation.json")
41
  OPENENV_SPEC_PATH = Path("openenv.yaml")
42
+ POST_TRAINING_DASHBOARD = Path("artifacts/local_hf_eval_round_006/post_training_benchmark_dashboard.html")
43
+ PRE_TRAINING_DASHBOARD = Path("artifacts/local_hf_eval_round_006/pre_training_benchmark_dashboard_original.html")
44
 
45
  _SESSION_LOCK = Lock()
46
  _SESSIONS: dict[str, OSINTEnvironment] = {}
 
253
  def _space_snapshot() -> dict[str, Any]:
254
  snapshot = dict(_base_environment_snapshot())
255
  compare_dashboards = {
256
+ "post_training": str(POST_TRAINING_DASHBOARD),
257
+ "pre_training": str(PRE_TRAINING_DASHBOARD),
258
  }
259
  available_compare_dashboards = {
260
  name: path for name, path in compare_dashboards.items() if Path(path).exists()
 
319
  compare_dashboards = snapshot.get("compare_dashboard_paths", {})
320
  compare_links_html = ""
321
  if compare_dashboards:
322
+ post_training_link = ""
323
+ pre_training_link = ""
324
+ if compare_dashboards.get("post_training"):
325
+ post_training_link = '<a class="button" href="/dashboard/post-training">Post-Training Dashboard</a>'
326
+ if compare_dashboards.get("pre_training"):
327
+ pre_training_link = '<a class="button secondary" href="/dashboard/pre-training">Pre-Training Dashboard</a>'
328
+ compare_links_html = f"<div style=\"margin-top:10px\">{post_training_link}{pre_training_link}</div>"
329
  difficulty_html = "".join(
330
  f"<li><strong>{level}</strong>: {count}</li>"
331
  for level, count in sorted(snapshot["difficulty_counts"].items())
 
409
  <h1>OSINT OpenEnv Space</h1>
410
  <p class="muted">A containerized OpenEnv-compatible benchmark for synthetic OSINT reasoning over profiles, forum threads, posts, aliases, organizations, locations, and event links.</p>
411
  <p>The Space boots with the fixed-level benchmark so visitors get a stable environment snapshot instead of a different graph every restart.</p>
 
412
  <a class="link" href="/api/environment">Environment JSON</a>
413
  {compare_links_html}
414
  </section>
 
590
  return FileResponse(snapshot["dashboard_path"], media_type="text/html")
591
 
592
 
593
+ @app.get("/dashboard/post-training")
594
+ def dashboard_post_training() -> FileResponse:
595
+ if not POST_TRAINING_DASHBOARD.exists():
596
+ raise HTTPException(status_code=404, detail="Post-training dashboard not found")
597
+ return FileResponse(POST_TRAINING_DASHBOARD, media_type="text/html")
598
+
599
+
600
+ @app.get("/dashboard/pre-training")
601
+ def dashboard_pre_training() -> FileResponse:
602
+ if not PRE_TRAINING_DASHBOARD.exists():
603
+ raise HTTPException(status_code=404, detail="Pre-training dashboard not found")
604
+ return FileResponse(PRE_TRAINING_DASHBOARD, media_type="text/html")
605
+
606
+
607
  @app.get("/dashboard/finetuned")
608
+ def dashboard_finetuned_alias() -> FileResponse:
609
+ return dashboard_post_training()
 
 
610
 
611
 
612
  @app.get("/dashboard/base")
613
+ def dashboard_base_alias() -> FileResponse:
614
+ return dashboard_pre_training()
 
 
615
 
616
 
617
  if __name__ == "__main__":