agent-cost-optimizer / dashboard.py
narcolepticchicken's picture
Upload dashboard.py
11d5cdf verified
"""Gradio Dashboard for Agent Cost Optimizer.
Visualizes:
- Cost-quality frontier (scatter plot: success rate vs avg cost)
- Baseline comparison bar charts
- Per-scenario breakdown
- Module ablation impact
"""
import json
import sys
from pathlib import Path
from typing import Dict, List, Any
import gradio as gr
def load_results(path: str) -> Dict[str, Any]:
with open(path) as f:
return json.load(f)
def parse_report(report_path: str) -> str:
with open(report_path) as f:
return f.read()
def create_frontier_plot(results: Dict[str, Any]):
"""Create scatter plot data for cost-quality frontier."""
points = []
for name, data in results.items():
success = (data.get("num_success", 0) + data.get("num_partial", 0)) / max(data.get("num_tasks", 1), 1)
cost = data.get("avg_cost_success", 0)
points.append({"baseline": name, "success_rate": success, "cost_per_success": cost})
# Sort by success rate desc, cost asc for frontier
points.sort(key=lambda p: (-p["success_rate"], p["cost_per_success"]))
# Build Pareto frontier
frontier = []
min_cost = float("inf")
for p in points:
if p["cost_per_success"] <= min_cost:
frontier.append(p)
min_cost = p["cost_per_success"]
return points, frontier
def build_dashboard(results_path: str, report_path: str):
results = load_results(results_path)
report_text = parse_report(report_path)
points, frontier = create_frontier_plot(results)
with gr.Blocks(title="Agent Cost Optimizer Dashboard") as demo:
gr.Markdown("# Agent Cost Optimizer - Cost-Quality Dashboard")
gr.Markdown("Visualize cost-quality tradeoffs across routing strategies and ablations.")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("## Cost-Quality Frontier")
gr.Markdown("**X-axis**: Average cost per successful task | **Y-axis**: Success rate")
# Scatter plot using native Gradio components
scatter_data = [
[p["baseline"], f"{p['success_rate']:.1%}", f"${p['cost_per_success']:.4f}"]
for p in points
]
gr.Dataframe(
headers=["Baseline", "Success Rate", "Cost per Success"],
value=scatter_data,
label="All Baselines",
)
frontier_data = [
[p["baseline"], f"{p['success_rate']:.1%}", f"${p['cost_per_success']:.4f}"]
for p in frontier
]
gr.Dataframe(
headers=["Baseline", "Success Rate", "Cost per Success"],
value=frontier_data,
label="Pareto Frontier",
)
with gr.Column(scale=1):
gr.Markdown("## Pareto Frontier Baselines")
pareto_names = [p["baseline"] for p in frontier]
for name in pareto_names:
gr.Markdown(f"- **{name}**")
with gr.Row():
with gr.Column():
gr.Markdown("## Baseline Comparison")
comparison_data = []
for name, data in results.items():
comparison_data.append([
name,
f"{(data.get('num_success',0)+data.get('num_partial',0))/max(data.get('num_tasks',1),1):.1%}",
f"${data.get('avg_cost_success',0):.4f}",
f"${data.get('total_cost',0):.2f}",
f"{data.get('cost_reduction_vs_frontier',0):.1%}",
f"{data.get('false_done_rate',0):.1%}",
f"{data.get('unsafe_cheap_miss_rate',0):.1%}",
f"{data.get('regression_rate',0):.1%}",
])
gr.Dataframe(
headers=["Baseline", "Success", "Cost/Success", "Total Cost", "Cost Reduction", "False-DONE", "Cheap Miss", "Regression"],
value=comparison_data,
)
with gr.Row():
with gr.Column():
gr.Markdown("## Per-Scenario Breakdown (Full Optimizer)")
full_data = results.get("full_optimizer", {})
scenario_stats = full_data.get("per_scenario_stats", {})
if scenario_stats:
scenario_data = []
for scenario, stats in scenario_stats.items():
count = stats.get("count", 0)
success = stats.get("success", 0)
cost = stats.get("cost", 0)
scenario_data.append([
scenario,
str(count),
f"{success/max(count,1):.1%}",
f"${cost:.2f}",
])
gr.Dataframe(
headers=["Scenario", "Count", "Success Rate", "Total Cost"],
value=scenario_data,
)
with gr.Row():
with gr.Column():
gr.Markdown("## Ablation Impact")
gr.Markdown("Cost increase when removing each module (vs full_optimizer)")
full_cost = results.get("full_optimizer", {}).get("total_cost", 0)
ablation_data = []
for name, data in results.items():
if name.startswith("no_"):
delta = data.get("total_cost", 0) - full_cost
pct = (delta / max(full_cost, 0.001)) * 100
ablation_data.append([name, f"${delta:.2f}", f"{pct:.1f}%"])
if ablation_data:
ablation_data.sort(key=lambda x: float(x[1].replace("$", "")), reverse=True)
gr.Dataframe(
headers=["Module Removed", "Cost Increase", "% Increase"],
value=ablation_data,
)
with gr.Row():
with gr.Column():
gr.Markdown("## Full Report")
gr.Textbox(report_text, lines=40, label="Benchmark Report")
return demo
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--results", default="./eval_results_v2/baseline_results.json",
help="Path to baseline results JSON")
parser.add_argument("--report", default="./eval_results_v2/report.txt",
help="Path to report text file")
parser.add_argument("--port", type=int, default=7860)
args = parser.parse_args()
demo = build_dashboard(args.results, args.report)
demo.launch(server_name="0.0.0.0", server_port=args.port)
if __name__ == "__main__":
main()