Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,15 +1,38 @@
|
|
| 1 |
"""Gradio Space for Agent Cost Optimizer Dashboard.
|
| 2 |
|
| 3 |
This app visualizes cost-quality frontiers from ACO benchmark runs.
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import json
|
|
|
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
from typing import Dict, List, Any
|
| 9 |
|
| 10 |
import gradio as gr
|
| 11 |
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def load_results(path: str) -> Dict[str, Any]:
|
| 14 |
with open(path) as f:
|
| 15 |
return json.load(f)
|
|
@@ -37,14 +60,14 @@ def create_frontier_plot(results: Dict[str, Any]):
|
|
| 37 |
|
| 38 |
|
| 39 |
def build_dashboard():
|
| 40 |
-
results_path =
|
| 41 |
-
report_path = Path("eval_results_v2/report.txt")
|
| 42 |
|
| 43 |
if not results_path.exists() or not report_path.exists():
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
| 48 |
|
| 49 |
results = load_results(str(results_path))
|
| 50 |
report_text = parse_report(str(report_path))
|
|
@@ -130,7 +153,7 @@ def build_dashboard():
|
|
| 130 |
with gr.Row():
|
| 131 |
with gr.Column():
|
| 132 |
gr.Markdown("## Ablation Impact")
|
| 133 |
-
gr.Markdown("Cost
|
| 134 |
|
| 135 |
full_cost = results.get("full_optimizer", {}).get("total_cost", 0)
|
| 136 |
ablation_data = []
|
|
|
|
| 1 |
"""Gradio Space for Agent Cost Optimizer Dashboard.
|
| 2 |
|
| 3 |
This app visualizes cost-quality frontiers from ACO benchmark runs.
|
| 4 |
+
If no benchmark data exists, it runs the benchmark on first load.
|
| 5 |
"""
|
| 6 |
|
| 7 |
import json
|
| 8 |
+
import subprocess
|
| 9 |
+
import sys
|
| 10 |
from pathlib import Path
|
| 11 |
from typing import Dict, List, Any
|
| 12 |
|
| 13 |
import gradio as gr
|
| 14 |
|
| 15 |
|
| 16 |
+
def ensure_data_exists():
|
| 17 |
+
"""Run benchmark if data doesn't exist."""
|
| 18 |
+
results_path = Path("eval_results_v2/baseline_results.json")
|
| 19 |
+
report_path = Path("eval_results_v2/report.txt")
|
| 20 |
+
|
| 21 |
+
if not results_path.exists() or not report_path.exists():
|
| 22 |
+
print("Benchmark data not found. Running benchmark...")
|
| 23 |
+
try:
|
| 24 |
+
# Run the benchmark generator
|
| 25 |
+
subprocess.run(
|
| 26 |
+
[sys.executable, "standalone_eval_v2.py", "--tasks", "2000", "--output", "eval_results_v2"],
|
| 27 |
+
capture_output=True, text=True, timeout=120
|
| 28 |
+
)
|
| 29 |
+
print("Benchmark complete.")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"Benchmark failed: {e}")
|
| 32 |
+
|
| 33 |
+
return results_path, report_path
|
| 34 |
+
|
| 35 |
+
|
| 36 |
def load_results(path: str) -> Dict[str, Any]:
|
| 37 |
with open(path) as f:
|
| 38 |
return json.load(f)
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
def build_dashboard():
|
| 63 |
+
results_path, report_path = ensure_data_exists()
|
|
|
|
| 64 |
|
| 65 |
if not results_path.exists() or not report_path.exists():
|
| 66 |
+
with gr.Blocks(title="Agent Cost Optimizer Dashboard") as demo:
|
| 67 |
+
gr.Markdown("# Agent Cost Optimizer Dashboard")
|
| 68 |
+
gr.Markdown("## Benchmark data not available")
|
| 69 |
+
gr.Markdown("Run `python standalone_eval_v2.py --tasks 2000 --output eval_results_v2` to generate data.")
|
| 70 |
+
return demo
|
| 71 |
|
| 72 |
results = load_results(str(results_path))
|
| 73 |
report_text = parse_report(str(report_path))
|
|
|
|
| 153 |
with gr.Row():
|
| 154 |
with gr.Column():
|
| 155 |
gr.Markdown("## Ablation Impact")
|
| 156 |
+
gr.Markdown("Cost impact when removing each module (vs full_optimizer)")
|
| 157 |
|
| 158 |
full_cost = results.get("full_optimizer", {}).get("total_cost", 0)
|
| 159 |
ablation_data = []
|