File size: 2,717 Bytes
a008fa8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
{
  "metrics": {
    "A": {
      "config": "A",
      "accuracy": 0.16,
      "avg_cost": 1.0,
      "safety": 0.975,
      "n": 200,
      "by_action": {
        "tool_call": 1.0,
        "retrieval": 0.037,
        "file_read": 0.0,
        "file_write": 0.0,
        "repair": 0.0,
        "verifier": 0.0,
        "ask_clarification": 0.0,
        "final_answer": 0.0,
        "BLOCKED": 0.0
      }
    },
    "B": {
      "config": "B",
      "accuracy": 0.19,
      "avg_cost": 0.15,
      "safety": 0.975,
      "n": 200,
      "by_action": {
        "tool_call": 0.871,
        "retrieval": 0.0,
        "file_read": 0.037,
        "file_write": 0.0,
        "repair": 0.0,
        "verifier": 0.0,
        "ask_clarification": 0.5,
        "final_answer": 0.0,
        "BLOCKED": 0.0
      }
    },
    "C": {
      "config": "C",
      "accuracy": 0.19,
      "avg_cost": 0.285,
      "safety": 0.975,
      "n": 200,
      "by_action": {
        "tool_call": 0.871,
        "retrieval": 0.0,
        "file_read": 0.037,
        "file_write": 0.0,
        "repair": 0.0,
        "verifier": 0.0,
        "ask_clarification": 0.5,
        "final_answer": 0.0,
        "BLOCKED": 0.0
      },
      "accept_rate": 0.965
    },
    "D": {
      "config": "D",
      "accuracy": 0.19,
      "avg_cost": 0.25,
      "safety": 0.975,
      "n": 200,
      "by_action": {
        "tool_call": 0.871,
        "retrieval": 0.0,
        "file_read": 0.037,
        "file_write": 0.0,
        "repair": 0.0,
        "verifier": 0.0,
        "ask_clarification": 0.5,
        "final_answer": 0.0,
        "BLOCKED": 0.0
      },
      "accept_rate": 0.0,
      "mean_score": -2.638,
      "min_score": -4.656,
      "max_score": -0.875
    },
    "E": {
      "config": "E",
      "accuracy": 0.17,
      "avg_cost": 0.75,
      "safety": 0.975,
      "n": 200,
      "by_action": {
        "tool_call": 0.806,
        "retrieval": 0.0,
        "file_read": 0.037,
        "file_write": 0.0,
        "repair": 0.0,
        "verifier": 0.0,
        "ask_clarification": 0.4,
        "final_answer": 0.0,
        "BLOCKED": 0.0
      },
      "accept_rate": 1.0
    }
  },
  "config": {
    "cheap_model": "narcolepticchicken/speculative-proposer-qwen3-1.7b",
    "verifier_model": "narcolepticchicken/speculative-verifier-qwen3-4b",
    "strong_model": "Qwen/Qwen3-8B",
    "eval_dataset": "narcolepticchicken/speculative-actions-eval",
    "n_examples": 200,
    "reward_threshold": 0.0
  },
  "action_distribution": {
    "ask_clarification": 20,
    "final_answer": 48,
    "file_read": 27,
    "tool_call": 31,
    "repair": 10,
    "retrieval": 27,
    "file_write": 18,
    "verifier": 14,
    "BLOCKED": 5
  }
}