narcolepticchicken
/

agent-cost-optimizer

Safetensors

ml-intern

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 2 days ago

Commit

c9f3b32

verified ·

1 Parent(s): 100ce6a

Upload app.py

Browse files

Files changed (1) hide show

app.py +30 -7

app.py CHANGED Viewed

@@ -1,15 +1,38 @@
 """Gradio Space for Agent Cost Optimizer Dashboard.
 This app visualizes cost-quality frontiers from ACO benchmark runs.
 """
 import json
 from pathlib import Path
 from typing import Dict, List, Any
 import gradio as gr
 def load_results(path: str) -> Dict[str, Any]:
     with open(path) as f:
         return json.load(f)
@@ -37,14 +60,14 @@ def create_frontier_plot(results: Dict[str, Any]):
 def build_dashboard():
-    results_path = Path("eval_results_v2/baseline_results.json")
-    report_path = Path("eval_results_v2/report.txt")
     if not results_path.exists() or not report_path.exists():
-        return gr.Interface(
-            fn=lambda: "Run benchmark first: python standalone_eval_v2.py",
-            inputs=[], outputs="text", title="ACO Dashboard (No Data)"
-        )
     results = load_results(str(results_path))
     report_text = parse_report(str(report_path))
@@ -130,7 +153,7 @@ def build_dashboard():
         with gr.Row():
             with gr.Column():
                 gr.Markdown("## Ablation Impact")
-                gr.Markdown("Cost increase when removing each module (vs full_optimizer)")
                 full_cost = results.get("full_optimizer", {}).get("total_cost", 0)
                 ablation_data = []

 """Gradio Space for Agent Cost Optimizer Dashboard.
 This app visualizes cost-quality frontiers from ACO benchmark runs.
+If no benchmark data exists, it runs the benchmark on first load.
 """
 import json
+import subprocess
+import sys
 from pathlib import Path
 from typing import Dict, List, Any
 import gradio as gr
+def ensure_data_exists():
+    """Run benchmark if data doesn't exist."""
+    results_path = Path("eval_results_v2/baseline_results.json")
+    report_path = Path("eval_results_v2/report.txt")
+    if not results_path.exists() or not report_path.exists():
+        print("Benchmark data not found. Running benchmark...")
+        try:
+            # Run the benchmark generator
+            subprocess.run(
+                [sys.executable, "standalone_eval_v2.py", "--tasks", "2000", "--output", "eval_results_v2"],
+                capture_output=True, text=True, timeout=120
+            )
+            print("Benchmark complete.")
+        except Exception as e:
+            print(f"Benchmark failed: {e}")
+    return results_path, report_path
 def load_results(path: str) -> Dict[str, Any]:
     with open(path) as f:
         return json.load(f)
 def build_dashboard():
+    results_path, report_path = ensure_data_exists()
     if not results_path.exists() or not report_path.exists():
+        with gr.Blocks(title="Agent Cost Optimizer Dashboard") as demo:
+            gr.Markdown("# Agent Cost Optimizer Dashboard")
+            gr.Markdown("## Benchmark data not available")
+            gr.Markdown("Run `python standalone_eval_v2.py --tasks 2000 --output eval_results_v2` to generate data.")
+        return demo
     results = load_results(str(results_path))
     report_text = parse_report(str(report_path))
         with gr.Row():
             with gr.Column():
                 gr.Markdown("## Ablation Impact")
+                gr.Markdown("Cost impact when removing each module (vs full_optimizer)")
                 full_cost = results.get("full_optimizer", {}).get("total_cost", 0)
                 ablation_data = []