narcolepticchicken commited on
Commit
c9f3b32
·
verified ·
1 Parent(s): 100ce6a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -7
app.py CHANGED
@@ -1,15 +1,38 @@
1
  """Gradio Space for Agent Cost Optimizer Dashboard.
2
 
3
  This app visualizes cost-quality frontiers from ACO benchmark runs.
 
4
  """
5
 
6
  import json
 
 
7
  from pathlib import Path
8
  from typing import Dict, List, Any
9
 
10
  import gradio as gr
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def load_results(path: str) -> Dict[str, Any]:
14
  with open(path) as f:
15
  return json.load(f)
@@ -37,14 +60,14 @@ def create_frontier_plot(results: Dict[str, Any]):
37
 
38
 
39
  def build_dashboard():
40
- results_path = Path("eval_results_v2/baseline_results.json")
41
- report_path = Path("eval_results_v2/report.txt")
42
 
43
  if not results_path.exists() or not report_path.exists():
44
- return gr.Interface(
45
- fn=lambda: "Run benchmark first: python standalone_eval_v2.py",
46
- inputs=[], outputs="text", title="ACO Dashboard (No Data)"
47
- )
 
48
 
49
  results = load_results(str(results_path))
50
  report_text = parse_report(str(report_path))
@@ -130,7 +153,7 @@ def build_dashboard():
130
  with gr.Row():
131
  with gr.Column():
132
  gr.Markdown("## Ablation Impact")
133
- gr.Markdown("Cost increase when removing each module (vs full_optimizer)")
134
 
135
  full_cost = results.get("full_optimizer", {}).get("total_cost", 0)
136
  ablation_data = []
 
1
  """Gradio Space for Agent Cost Optimizer Dashboard.
2
 
3
  This app visualizes cost-quality frontiers from ACO benchmark runs.
4
+ If no benchmark data exists, it runs the benchmark on first load.
5
  """
6
 
7
  import json
8
+ import subprocess
9
+ import sys
10
  from pathlib import Path
11
  from typing import Dict, List, Any
12
 
13
  import gradio as gr
14
 
15
 
16
+ def ensure_data_exists():
17
+ """Run benchmark if data doesn't exist."""
18
+ results_path = Path("eval_results_v2/baseline_results.json")
19
+ report_path = Path("eval_results_v2/report.txt")
20
+
21
+ if not results_path.exists() or not report_path.exists():
22
+ print("Benchmark data not found. Running benchmark...")
23
+ try:
24
+ # Run the benchmark generator
25
+ subprocess.run(
26
+ [sys.executable, "standalone_eval_v2.py", "--tasks", "2000", "--output", "eval_results_v2"],
27
+ capture_output=True, text=True, timeout=120
28
+ )
29
+ print("Benchmark complete.")
30
+ except Exception as e:
31
+ print(f"Benchmark failed: {e}")
32
+
33
+ return results_path, report_path
34
+
35
+
36
  def load_results(path: str) -> Dict[str, Any]:
37
  with open(path) as f:
38
  return json.load(f)
 
60
 
61
 
62
  def build_dashboard():
63
+ results_path, report_path = ensure_data_exists()
 
64
 
65
  if not results_path.exists() or not report_path.exists():
66
+ with gr.Blocks(title="Agent Cost Optimizer Dashboard") as demo:
67
+ gr.Markdown("# Agent Cost Optimizer Dashboard")
68
+ gr.Markdown("## Benchmark data not available")
69
+ gr.Markdown("Run `python standalone_eval_v2.py --tasks 2000 --output eval_results_v2` to generate data.")
70
+ return demo
71
 
72
  results = load_results(str(results_path))
73
  report_text = parse_report(str(report_path))
 
153
  with gr.Row():
154
  with gr.Column():
155
  gr.Markdown("## Ablation Impact")
156
+ gr.Markdown("Cost impact when removing each module (vs full_optimizer)")
157
 
158
  full_cost = results.get("full_optimizer", {}).get("total_cost", 0)
159
  ablation_data = []