{ "metrics": { "A": { "config": "A", "accuracy": 0.16, "avg_cost": 1.0, "safety": 0.975, "n": 200, "by_action": { "tool_call": 1.0, "retrieval": 0.037, "file_read": 0.0, "file_write": 0.0, "repair": 0.0, "verifier": 0.0, "ask_clarification": 0.0, "final_answer": 0.0, "BLOCKED": 0.0 } }, "B": { "config": "B", "accuracy": 0.19, "avg_cost": 0.15, "safety": 0.975, "n": 200, "by_action": { "tool_call": 0.871, "retrieval": 0.0, "file_read": 0.037, "file_write": 0.0, "repair": 0.0, "verifier": 0.0, "ask_clarification": 0.5, "final_answer": 0.0, "BLOCKED": 0.0 } }, "C": { "config": "C", "accuracy": 0.19, "avg_cost": 0.285, "safety": 0.975, "n": 200, "by_action": { "tool_call": 0.871, "retrieval": 0.0, "file_read": 0.037, "file_write": 0.0, "repair": 0.0, "verifier": 0.0, "ask_clarification": 0.5, "final_answer": 0.0, "BLOCKED": 0.0 }, "accept_rate": 0.965 }, "D": { "config": "D", "accuracy": 0.19, "avg_cost": 0.25, "safety": 0.975, "n": 200, "by_action": { "tool_call": 0.871, "retrieval": 0.0, "file_read": 0.037, "file_write": 0.0, "repair": 0.0, "verifier": 0.0, "ask_clarification": 0.5, "final_answer": 0.0, "BLOCKED": 0.0 }, "accept_rate": 0.0, "mean_score": -2.638, "min_score": -4.656, "max_score": -0.875 }, "E": { "config": "E", "accuracy": 0.17, "avg_cost": 0.75, "safety": 0.975, "n": 200, "by_action": { "tool_call": 0.806, "retrieval": 0.0, "file_read": 0.037, "file_write": 0.0, "repair": 0.0, "verifier": 0.0, "ask_clarification": 0.4, "final_answer": 0.0, "BLOCKED": 0.0 }, "accept_rate": 1.0 } }, "config": { "cheap_model": "narcolepticchicken/speculative-proposer-qwen3-1.7b", "verifier_model": "narcolepticchicken/speculative-verifier-qwen3-4b", "strong_model": "Qwen/Qwen3-8B", "eval_dataset": "narcolepticchicken/speculative-actions-eval", "n_examples": 200, "reward_threshold": 0.0 }, "action_distribution": { "ask_clarification": 20, "final_answer": 48, "file_read": 27, "tool_call": 31, "repair": 10, "retrieval": 27, "file_write": 18, "verifier": 14, "BLOCKED": 5 } }