narcolepticchicken's picture
Upload aco/cli.py
7d1a411 verified
raw
history blame
4.29 kB
"""CLI for Agent Cost Optimizer."""
import argparse
import json
import sys
from pathlib import Path
from aco.optimizer import AgentCostOptimizer
from aco.config import ACOConfig
from aco.benchmarks.benchmark_suite import BenchmarkSuite
def main():
parser = argparse.ArgumentParser(description="Agent Cost Optimizer")
subparsers = parser.add_subparsers(dest="command", help="Command to run")
# Optimize command
opt_parser = subparsers.add_parser("optimize", help="Optimize an agent request")
opt_parser.add_argument("--config", "-c", default="config.yaml", help="Config file path")
opt_parser.add_argument("--request", "-r", required=True, help="User request text")
opt_parser.add_argument("--output", "-o", default="-", help="Output file (default: stdout)")
# Benchmark command
bench_parser = subparsers.add_parser("benchmark", help="Run benchmark suite")
bench_parser.add_argument("--config", "-c", default="config.yaml", help="Config file path")
bench_parser.add_argument("--tasks", "-n", type=int, default=1000, help="Number of tasks")
bench_parser.add_argument("--output", "-o", default="benchmark_results.json", help="Output path")
bench_parser.add_argument("--ablations", action="store_true", help="Run ablation study")
# Report command
report_parser = subparsers.add_parser("report", help="Generate report from benchmark results")
report_parser.add_argument("--input", "-i", required=True, help="Benchmark results JSON")
report_parser.add_argument("--output", "-o", default="-", help="Output file")
args = parser.parse_args()
if args.command == "optimize":
_cmd_optimize(args)
elif args.command == "benchmark":
_cmd_benchmark(args)
elif args.command == "report":
_cmd_report(args)
else:
parser.print_help()
sys.exit(1)
def _cmd_optimize(args):
config = ACOConfig.from_yaml(args.config) if Path(args.config).exists() else ACOConfig()
optimizer = AgentCostOptimizer(config)
result = optimizer.optimize(args.request)
output = {
"trace_id": result.trace_id,
"model": result.routing_decision.model_id,
"tier": result.routing_decision.tier,
"estimated_cost": result.estimated_cost,
"estimated_latency_ms": result.estimated_latency_ms,
"confidence": result.confidence,
"reasoning": result.reasoning,
"tool_decisions": [
{"tool": d.tool_name, "decision": d.decision.value, "cost": d.estimated_cost}
for d in result.tool_decisions
],
"verifier": result.verifier_decision.decision.value if result.verifier_decision else None,
"doom_score": result.doom_assessment.confidence if result.doom_assessment else None,
"meta_tool_match": result.meta_tool_match is not None,
}
json_str = json.dumps(output, indent=2)
if args.output == "-":
print(json_str)
else:
with open(args.output, "w") as f:
f.write(json_str)
def _cmd_benchmark(args):
config = ACOConfig.from_yaml(args.config) if Path(args.config).exists() else ACOConfig()
suite = BenchmarkSuite(config)
print(f"Generating {args.tasks} synthetic traces...")
traces = suite.generate_benchmark_data(args.tasks)
print("Running baselines...")
results = suite.run_all_baselines(traces)
if args.ablations:
print("Running ablations...")
ablation_results = suite.run_ablations(traces)
results.update(ablation_results)
suite.export(results, args.output)
# Print report
report = suite.report(results)
print(report)
print(f"\nResults saved to {args.output}")
def _cmd_report(args):
with open(args.input, "r") as f:
data = json.load(f)
# Reconstruct BenchmarkResults for reporting
from aco.benchmarks.benchmark_suite import BenchmarkResult
results = {}
for name, d in data.items():
results[name] = BenchmarkResult(**d)
suite = BenchmarkSuite()
report = suite.report(results)
if args.output == "-":
print(report)
else:
with open(args.output, "w") as f:
f.write(report)
if __name__ == "__main__":
main()