File size: 4,290 Bytes
7d1a411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""CLI for Agent Cost Optimizer."""

import argparse
import json
import sys
from pathlib import Path

from aco.optimizer import AgentCostOptimizer
from aco.config import ACOConfig
from aco.benchmarks.benchmark_suite import BenchmarkSuite


def main():
    parser = argparse.ArgumentParser(description="Agent Cost Optimizer")
    subparsers = parser.add_subparsers(dest="command", help="Command to run")
    
    # Optimize command
    opt_parser = subparsers.add_parser("optimize", help="Optimize an agent request")
    opt_parser.add_argument("--config", "-c", default="config.yaml", help="Config file path")
    opt_parser.add_argument("--request", "-r", required=True, help="User request text")
    opt_parser.add_argument("--output", "-o", default="-", help="Output file (default: stdout)")
    
    # Benchmark command
    bench_parser = subparsers.add_parser("benchmark", help="Run benchmark suite")
    bench_parser.add_argument("--config", "-c", default="config.yaml", help="Config file path")
    bench_parser.add_argument("--tasks", "-n", type=int, default=1000, help="Number of tasks")
    bench_parser.add_argument("--output", "-o", default="benchmark_results.json", help="Output path")
    bench_parser.add_argument("--ablations", action="store_true", help="Run ablation study")
    
    # Report command
    report_parser = subparsers.add_parser("report", help="Generate report from benchmark results")
    report_parser.add_argument("--input", "-i", required=True, help="Benchmark results JSON")
    report_parser.add_argument("--output", "-o", default="-", help="Output file")
    
    args = parser.parse_args()
    
    if args.command == "optimize":
        _cmd_optimize(args)
    elif args.command == "benchmark":
        _cmd_benchmark(args)
    elif args.command == "report":
        _cmd_report(args)
    else:
        parser.print_help()
        sys.exit(1)


def _cmd_optimize(args):
    config = ACOConfig.from_yaml(args.config) if Path(args.config).exists() else ACOConfig()
    optimizer = AgentCostOptimizer(config)
    result = optimizer.optimize(args.request)
    
    output = {
        "trace_id": result.trace_id,
        "model": result.routing_decision.model_id,
        "tier": result.routing_decision.tier,
        "estimated_cost": result.estimated_cost,
        "estimated_latency_ms": result.estimated_latency_ms,
        "confidence": result.confidence,
        "reasoning": result.reasoning,
        "tool_decisions": [
            {"tool": d.tool_name, "decision": d.decision.value, "cost": d.estimated_cost}
            for d in result.tool_decisions
        ],
        "verifier": result.verifier_decision.decision.value if result.verifier_decision else None,
        "doom_score": result.doom_assessment.confidence if result.doom_assessment else None,
        "meta_tool_match": result.meta_tool_match is not None,
    }
    
    json_str = json.dumps(output, indent=2)
    if args.output == "-":
        print(json_str)
    else:
        with open(args.output, "w") as f:
            f.write(json_str)


def _cmd_benchmark(args):
    config = ACOConfig.from_yaml(args.config) if Path(args.config).exists() else ACOConfig()
    suite = BenchmarkSuite(config)
    
    print(f"Generating {args.tasks} synthetic traces...")
    traces = suite.generate_benchmark_data(args.tasks)
    
    print("Running baselines...")
    results = suite.run_all_baselines(traces)
    
    if args.ablations:
        print("Running ablations...")
        ablation_results = suite.run_ablations(traces)
        results.update(ablation_results)
    
    suite.export(results, args.output)
    
    # Print report
    report = suite.report(results)
    print(report)
    
    print(f"\nResults saved to {args.output}")


def _cmd_report(args):
    with open(args.input, "r") as f:
        data = json.load(f)
    
    # Reconstruct BenchmarkResults for reporting
    from aco.benchmarks.benchmark_suite import BenchmarkResult
    results = {}
    for name, d in data.items():
        results[name] = BenchmarkResult(**d)
    
    suite = BenchmarkSuite()
    report = suite.report(results)
    
    if args.output == "-":
        print(report)
    else:
        with open(args.output, "w") as f:
            f.write(report)


if __name__ == "__main__":
    main()