Rohan03 commited on
Commit
ec1ea80
·
verified ·
1 Parent(s): d9f6778

Track 2: validation suite with improvement curves, cold/warm, transfer, adversarial

Browse files
benchmarks/results/track2_results.json ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "curves": {
3
+ "fibonacci": [
4
+ {
5
+ "run": 1,
6
+ "steps": 2,
7
+ "phi": 5.0,
8
+ "pass_rate": 0.5,
9
+ "all_passed": false,
10
+ "heuristics": 3,
11
+ "time": 0.01
12
+ },
13
+ {
14
+ "run": 2,
15
+ "steps": 1,
16
+ "phi": 10.0,
17
+ "pass_rate": 0,
18
+ "all_passed": false,
19
+ "heuristics": 9,
20
+ "time": 0.0
21
+ },
22
+ {
23
+ "run": 3,
24
+ "steps": 1,
25
+ "phi": 10.0,
26
+ "pass_rate": 0,
27
+ "all_passed": false,
28
+ "heuristics": 18,
29
+ "time": 0.0
30
+ },
31
+ {
32
+ "run": 4,
33
+ "steps": 1,
34
+ "phi": 10.0,
35
+ "pass_rate": 0,
36
+ "all_passed": false,
37
+ "heuristics": 30,
38
+ "time": 0.0
39
+ },
40
+ {
41
+ "run": 5,
42
+ "steps": 1,
43
+ "phi": 10.0,
44
+ "pass_rate": 0,
45
+ "all_passed": false,
46
+ "heuristics": 45,
47
+ "time": 0.0
48
+ }
49
+ ],
50
+ "factorial": [
51
+ {
52
+ "run": 1,
53
+ "steps": 2,
54
+ "phi": 1.0,
55
+ "pass_rate": 0.0,
56
+ "all_passed": false,
57
+ "heuristics": 3,
58
+ "time": 0.0
59
+ },
60
+ {
61
+ "run": 2,
62
+ "steps": 1,
63
+ "phi": 10.0,
64
+ "pass_rate": 0,
65
+ "all_passed": false,
66
+ "heuristics": 9,
67
+ "time": 0.0
68
+ },
69
+ {
70
+ "run": 3,
71
+ "steps": 1,
72
+ "phi": 10.0,
73
+ "pass_rate": 0,
74
+ "all_passed": false,
75
+ "heuristics": 18,
76
+ "time": 0.0
77
+ },
78
+ {
79
+ "run": 4,
80
+ "steps": 1,
81
+ "phi": 10.0,
82
+ "pass_rate": 0,
83
+ "all_passed": false,
84
+ "heuristics": 30,
85
+ "time": 0.0
86
+ },
87
+ {
88
+ "run": 5,
89
+ "steps": 1,
90
+ "phi": 10.0,
91
+ "pass_rate": 0,
92
+ "all_passed": false,
93
+ "heuristics": 45,
94
+ "time": 0.0
95
+ }
96
+ ],
97
+ "palindrome": [
98
+ {
99
+ "run": 1,
100
+ "steps": 2,
101
+ "phi": 7.0,
102
+ "pass_rate": 0.75,
103
+ "all_passed": false,
104
+ "heuristics": 3,
105
+ "time": 0.0
106
+ },
107
+ {
108
+ "run": 2,
109
+ "steps": 1,
110
+ "phi": 10.0,
111
+ "pass_rate": 0,
112
+ "all_passed": false,
113
+ "heuristics": 9,
114
+ "time": 0.0
115
+ },
116
+ {
117
+ "run": 3,
118
+ "steps": 1,
119
+ "phi": 10.0,
120
+ "pass_rate": 0,
121
+ "all_passed": false,
122
+ "heuristics": 18,
123
+ "time": 0.0
124
+ },
125
+ {
126
+ "run": 4,
127
+ "steps": 1,
128
+ "phi": 10.0,
129
+ "pass_rate": 0,
130
+ "all_passed": false,
131
+ "heuristics": 30,
132
+ "time": 0.0
133
+ },
134
+ {
135
+ "run": 5,
136
+ "steps": 1,
137
+ "phi": 10.0,
138
+ "pass_rate": 0,
139
+ "all_passed": false,
140
+ "heuristics": 45,
141
+ "time": 0.0
142
+ }
143
+ ],
144
+ "fizzbuzz": [
145
+ {
146
+ "run": 1,
147
+ "steps": 2,
148
+ "phi": 7.0,
149
+ "pass_rate": 0.75,
150
+ "all_passed": false,
151
+ "heuristics": 3,
152
+ "time": 0.0
153
+ },
154
+ {
155
+ "run": 2,
156
+ "steps": 1,
157
+ "phi": 10.0,
158
+ "pass_rate": 0,
159
+ "all_passed": false,
160
+ "heuristics": 9,
161
+ "time": 0.0
162
+ },
163
+ {
164
+ "run": 3,
165
+ "steps": 1,
166
+ "phi": 10.0,
167
+ "pass_rate": 0,
168
+ "all_passed": false,
169
+ "heuristics": 18,
170
+ "time": 0.0
171
+ },
172
+ {
173
+ "run": 4,
174
+ "steps": 1,
175
+ "phi": 10.0,
176
+ "pass_rate": 0,
177
+ "all_passed": false,
178
+ "heuristics": 30,
179
+ "time": 0.0
180
+ },
181
+ {
182
+ "run": 5,
183
+ "steps": 1,
184
+ "phi": 10.0,
185
+ "pass_rate": 0,
186
+ "all_passed": false,
187
+ "heuristics": 45,
188
+ "time": 0.0
189
+ }
190
+ ]
191
+ },
192
+ "cold_warm": [
193
+ {
194
+ "task": "fibonacci",
195
+ "cold_phi": 5.0,
196
+ "warm_phi": 10.0,
197
+ "delta": 5.0,
198
+ "improved": true
199
+ },
200
+ {
201
+ "task": "factorial",
202
+ "cold_phi": 1.0,
203
+ "warm_phi": 10.0,
204
+ "delta": 9.0,
205
+ "improved": true
206
+ }
207
+ ],
208
+ "transfer": {
209
+ "train": [
210
+ "fibonacci",
211
+ "factorial"
212
+ ],
213
+ "test": [
214
+ "palindrome",
215
+ "fizzbuzz"
216
+ ],
217
+ "heuristics": 30,
218
+ "results": {
219
+ "palindrome": {
220
+ "phi": 10.0,
221
+ "passed": false
222
+ },
223
+ "fizzbuzz": {
224
+ "phi": 10.0,
225
+ "passed": false
226
+ }
227
+ }
228
+ },
229
+ "adversarial": {
230
+ "total": 8,
231
+ "correct": 8,
232
+ "accuracy": 1.0
233
+ }
234
+ }