| """CLI for Agent Cost Optimizer.""" |
|
|
| import argparse |
| import json |
| import sys |
| from pathlib import Path |
|
|
| from aco.optimizer import AgentCostOptimizer |
| from aco.config import ACOConfig |
| from aco.benchmarks.benchmark_suite import BenchmarkSuite |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Agent Cost Optimizer") |
| subparsers = parser.add_subparsers(dest="command", help="Command to run") |
| |
| |
| opt_parser = subparsers.add_parser("optimize", help="Optimize an agent request") |
| opt_parser.add_argument("--config", "-c", default="config.yaml", help="Config file path") |
| opt_parser.add_argument("--request", "-r", required=True, help="User request text") |
| opt_parser.add_argument("--output", "-o", default="-", help="Output file (default: stdout)") |
| |
| |
| bench_parser = subparsers.add_parser("benchmark", help="Run benchmark suite") |
| bench_parser.add_argument("--config", "-c", default="config.yaml", help="Config file path") |
| bench_parser.add_argument("--tasks", "-n", type=int, default=1000, help="Number of tasks") |
| bench_parser.add_argument("--output", "-o", default="benchmark_results.json", help="Output path") |
| bench_parser.add_argument("--ablations", action="store_true", help="Run ablation study") |
| |
| |
| report_parser = subparsers.add_parser("report", help="Generate report from benchmark results") |
| report_parser.add_argument("--input", "-i", required=True, help="Benchmark results JSON") |
| report_parser.add_argument("--output", "-o", default="-", help="Output file") |
| |
| args = parser.parse_args() |
| |
| if args.command == "optimize": |
| _cmd_optimize(args) |
| elif args.command == "benchmark": |
| _cmd_benchmark(args) |
| elif args.command == "report": |
| _cmd_report(args) |
| else: |
| parser.print_help() |
| sys.exit(1) |
|
|
|
|
| def _cmd_optimize(args): |
| config = ACOConfig.from_yaml(args.config) if Path(args.config).exists() else ACOConfig() |
| optimizer = AgentCostOptimizer(config) |
| result = optimizer.optimize(args.request) |
| |
| output = { |
| "trace_id": result.trace_id, |
| "model": result.routing_decision.model_id, |
| "tier": result.routing_decision.tier, |
| "estimated_cost": result.estimated_cost, |
| "estimated_latency_ms": result.estimated_latency_ms, |
| "confidence": result.confidence, |
| "reasoning": result.reasoning, |
| "tool_decisions": [ |
| {"tool": d.tool_name, "decision": d.decision.value, "cost": d.estimated_cost} |
| for d in result.tool_decisions |
| ], |
| "verifier": result.verifier_decision.decision.value if result.verifier_decision else None, |
| "doom_score": result.doom_assessment.confidence if result.doom_assessment else None, |
| "meta_tool_match": result.meta_tool_match is not None, |
| } |
| |
| json_str = json.dumps(output, indent=2) |
| if args.output == "-": |
| print(json_str) |
| else: |
| with open(args.output, "w") as f: |
| f.write(json_str) |
|
|
|
|
| def _cmd_benchmark(args): |
| config = ACOConfig.from_yaml(args.config) if Path(args.config).exists() else ACOConfig() |
| suite = BenchmarkSuite(config) |
| |
| print(f"Generating {args.tasks} synthetic traces...") |
| traces = suite.generate_benchmark_data(args.tasks) |
| |
| print("Running baselines...") |
| results = suite.run_all_baselines(traces) |
| |
| if args.ablations: |
| print("Running ablations...") |
| ablation_results = suite.run_ablations(traces) |
| results.update(ablation_results) |
| |
| suite.export(results, args.output) |
| |
| |
| report = suite.report(results) |
| print(report) |
| |
| print(f"\nResults saved to {args.output}") |
|
|
|
|
| def _cmd_report(args): |
| with open(args.input, "r") as f: |
| data = json.load(f) |
| |
| |
| from aco.benchmarks.benchmark_suite import BenchmarkResult |
| results = {} |
| for name, d in data.items(): |
| results[name] = BenchmarkResult(**d) |
| |
| suite = BenchmarkSuite() |
| report = suite.report(results) |
| |
| if args.output == "-": |
| print(report) |
| else: |
| with open(args.output, "w") as f: |
| f.write(report) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|