kmchiti commited on
Commit
57a8c18
·
verified ·
1 Parent(s): 03b1c59

Upload results/eval_difficulty/summary.json

Browse files
Files changed (1) hide show
  1. results/eval_difficulty/summary.json +645 -0
results/eval_difficulty/summary.json ADDED
@@ -0,0 +1,645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "checkpoint": "checkpoint-18779",
4
+ "id_acc": 0.9630347222222222,
5
+ "ood_acc": 0.25220703125,
6
+ "total_acc": 0.588914884868421,
7
+ "id_avg_loss": 6.54602900314331,
8
+ "ood_avg_loss": 5.825457085227966,
9
+ "total_avg_loss": 6.166780625293129,
10
+ "id_avg_resp_len": 201.21272916666666,
11
+ "ood_avg_resp_len": 284.85050390625,
12
+ "count_id": 1152000,
13
+ "count_ood": 1280000,
14
+ "count_total": 2432000,
15
+ "template_metrics": {
16
+ "crazy_zootopia": {
17
+ "count": 809728,
18
+ "correct": 477128,
19
+ "answer_accuracy": 0.5892447834334492,
20
+ "avg_response_len": 235.475637498024,
21
+ "resp_tokens_sum": 190671217,
22
+ "pass_at_k": {
23
+ "pass@1": 0.5892447834334492,
24
+ "pass@2": 0.6242494417489666,
25
+ "pass@4": 0.6517067209785661,
26
+ "pass@8": 0.6752650274111465,
27
+ "pass@16": 0.6966098480320178,
28
+ "pass@32": 0.7161446042348242,
29
+ "pass@64": 0.7340453705919688,
30
+ "pass@128": 0.7510275055327221
31
+ },
32
+ "per_op_pass_at_k": {
33
+ "10": {
34
+ "pass@1": 0.8994976032448377,
35
+ "pass@2": 0.9366747090795067,
36
+ "pass@4": 0.954640098361598,
37
+ "pass@8": 0.9671935395338649,
38
+ "pass@16": 0.9780409476989385,
39
+ "pass@32": 0.9873672705205916,
40
+ "pass@64": 0.9931124421272916,
41
+ "pass@128": 0.9941002949852508
42
+ },
43
+ "2": {
44
+ "pass@1": 0.9997322819314641,
45
+ "pass@2": 0.9999915679348491,
46
+ "pass@4": 0.9999999897793149,
47
+ "pass@8": 1.0,
48
+ "pass@16": 1.0,
49
+ "pass@32": 1.0,
50
+ "pass@64": 1.0,
51
+ "pass@128": 1.0
52
+ },
53
+ "3": {
54
+ "pass@1": 0.9875525611620795,
55
+ "pass@2": 0.9937483445303281,
56
+ "pass@4": 0.9960841566249173,
57
+ "pass@8": 0.9973945614382809,
58
+ "pass@16": 0.9984052318072489,
59
+ "pass@32": 0.9992931754002767,
60
+ "pass@64": 0.9999118688143109,
61
+ "pass@128": 1.0
62
+ },
63
+ "4": {
64
+ "pass@1": 0.996337890625,
65
+ "pass@2": 0.9982602577509845,
66
+ "pass@4": 0.9992694307742784,
67
+ "pass@8": 0.9998455780548049,
68
+ "pass@16": 0.9999940908757206,
69
+ "pass@32": 0.9999999967265157,
70
+ "pass@64": 1.0,
71
+ "pass@128": 1.0
72
+ },
73
+ "5": {
74
+ "pass@1": 0.9481150793650793,
75
+ "pass@2": 0.9641533089613797,
76
+ "pass@4": 0.9757381440415184,
77
+ "pass@8": 0.9837977641761237,
78
+ "pass@16": 0.9891567790393069,
79
+ "pass@32": 0.9924331098404072,
80
+ "pass@64": 0.9948057827917357,
81
+ "pass@128": 0.9968253968253968
82
+ },
83
+ "6": {
84
+ "pass@1": 0.9665746631736527,
85
+ "pass@2": 0.9810815131547932,
86
+ "pass@4": 0.9889092237347574,
87
+ "pass@8": 0.992817054998713,
88
+ "pass@16": 0.9942118482404367,
89
+ "pass@32": 0.9947566055669427,
90
+ "pass@64": 0.9955089818359227,
91
+ "pass@128": 0.9970059880239521
92
+ },
93
+ "7": {
94
+ "pass@1": 0.9575397559171598,
95
+ "pass@2": 0.9706686361412663,
96
+ "pass@4": 0.9781720887884576,
97
+ "pass@8": 0.9836457175352227,
98
+ "pass@16": 0.9877211930993773,
99
+ "pass@32": 0.991030454111008,
100
+ "pass@64": 0.9934560055493304,
101
+ "pass@128": 0.9940828402366864
102
+ },
103
+ "8": {
104
+ "pass@1": 0.9534755608974359,
105
+ "pass@2": 0.9758759684786995,
106
+ "pass@4": 0.9847553208493165,
107
+ "pass@8": 0.9897007291187125,
108
+ "pass@16": 0.9935694000922215,
109
+ "pass@32": 0.9965151373391712,
110
+ "pass@64": 0.998300039303945,
111
+ "pass@128": 1.0
112
+ },
113
+ "9": {
114
+ "pass@1": 0.9408450704225352,
115
+ "pass@2": 0.9764160751913055,
116
+ "pass@4": 0.9897490211610868,
117
+ "pass@8": 0.9954928597779902,
118
+ "pass@16": 0.9986146363237696,
119
+ "pass@32": 0.9998472651017836,
120
+ "pass@64": 0.9999990904114461,
121
+ "pass@128": 1.0
122
+ },
123
+ "11": {
124
+ "pass@1": 0.7521689093484419,
125
+ "pass@2": 0.8267172826838576,
126
+ "pass@4": 0.8749963240253604,
127
+ "pass@8": 0.9072539747181511,
128
+ "pass@16": 0.9305891455160197,
129
+ "pass@32": 0.9482171914771189,
130
+ "pass@64": 0.9598296757598728,
131
+ "pass@128": 0.9660056657223796
132
+ },
133
+ "12": {
134
+ "pass@1": 0.4268626412429379,
135
+ "pass@2": 0.5226548250589441,
136
+ "pass@4": 0.5947451512946471,
137
+ "pass@8": 0.6499540700619184,
138
+ "pass@16": 0.697671407935311,
139
+ "pass@32": 0.7391764349175001,
140
+ "pass@64": 0.7728700105439386,
141
+ "pass@128": 0.8022598870056498
142
+ },
143
+ "13": {
144
+ "pass@1": 0.226048197492163,
145
+ "pass@2": 0.2745025516500875,
146
+ "pass@4": 0.32170087379359735,
147
+ "pass@8": 0.36726090847720233,
148
+ "pass@16": 0.4118837384913241,
149
+ "pass@32": 0.4577964268716136,
150
+ "pass@64": 0.5060004900301981,
151
+ "pass@128": 0.554858934169279
152
+ },
153
+ "14": {
154
+ "pass@1": 0.1970404984423676,
155
+ "pass@2": 0.2447659871955257,
156
+ "pass@4": 0.2905214605183697,
157
+ "pass@8": 0.33432116638118264,
158
+ "pass@16": 0.37689919457954874,
159
+ "pass@32": 0.41849874775311363,
160
+ "pass@64": 0.46201871752633594,
161
+ "pass@128": 0.5109034267912772
162
+ },
163
+ "15": {
164
+ "pass@1": 0.190774024566474,
165
+ "pass@2": 0.24494681910245306,
166
+ "pass@4": 0.29680882700356104,
167
+ "pass@8": 0.3440605324830605,
168
+ "pass@16": 0.38846742937996726,
169
+ "pass@32": 0.43008799604881653,
170
+ "pass@64": 0.4654618905817254,
171
+ "pass@128": 0.4913294797687861
172
+ },
173
+ "16": {
174
+ "pass@1": 0.1630796370967742,
175
+ "pass@2": 0.1964297053594107,
176
+ "pass@4": 0.23033826426938572,
177
+ "pass@8": 0.2660781043146322,
178
+ "pass@16": 0.3019958633560044,
179
+ "pass@32": 0.33569879011745574,
180
+ "pass@64": 0.3686862876847773,
181
+ "pass@128": 0.4064516129032258
182
+ },
183
+ "17": {
184
+ "pass@1": 0.1676300578034682,
185
+ "pass@2": 0.20699551397296437,
186
+ "pass@4": 0.2408987041359715,
187
+ "pass@8": 0.271017564290959,
188
+ "pass@16": 0.29876652985892266,
189
+ "pass@32": 0.3264747152113181,
190
+ "pass@64": 0.3549548508714773,
191
+ "pass@128": 0.38439306358381503
192
+ },
193
+ "18": {
194
+ "pass@1": 0.1616517857142857,
195
+ "pass@2": 0.19595648200224966,
196
+ "pass@4": 0.23106420322459695,
197
+ "pass@8": 0.26888622832851616,
198
+ "pass@16": 0.3071732341417159,
199
+ "pass@32": 0.3420263056744111,
200
+ "pass@64": 0.3714714335108666,
201
+ "pass@128": 0.39714285714285713
202
+ },
203
+ "19": {
204
+ "pass@1": 0.15040822072072071,
205
+ "pass@2": 0.1915057281691139,
206
+ "pass@4": 0.2273034600967671,
207
+ "pass@8": 0.2598221642368353,
208
+ "pass@16": 0.2905735616301428,
209
+ "pass@32": 0.32042698971733125,
210
+ "pass@64": 0.35124146512270843,
211
+ "pass@128": 0.3843843843843844
212
+ },
213
+ "20": {
214
+ "pass@1": 0.15052552552552553,
215
+ "pass@2": 0.18885223806483653,
216
+ "pass@4": 0.22942468621602477,
217
+ "pass@8": 0.26979999128432536,
218
+ "pass@16": 0.30689533475643643,
219
+ "pass@32": 0.3397581402934893,
220
+ "pass@64": 0.370991289408613,
221
+ "pass@128": 0.4024024024024024
222
+ }
223
+ }
224
+ },
225
+ "teachers_in_school": {
226
+ "count": 821120,
227
+ "correct": 483803,
228
+ "answer_accuracy": 0.5891988990646921,
229
+ "avg_response_len": 242.341766124318,
230
+ "resp_tokens_sum": 198991671,
231
+ "pass_at_k": {
232
+ "pass@1": 0.5891988990646921,
233
+ "pass@2": 0.6227068003142299,
234
+ "pass@4": 0.650107249088997,
235
+ "pass@8": 0.6741994105674554,
236
+ "pass@16": 0.6956359816626693,
237
+ "pass@32": 0.7149534992963424,
238
+ "pass@64": 0.7325456550204769,
239
+ "pass@128": 0.7480904130943102
240
+ },
241
+ "per_op_pass_at_k": {
242
+ "10": {
243
+ "pass@1": 0.8878930214723927,
244
+ "pass@2": 0.9240016333751996,
245
+ "pass@4": 0.9427751138086263,
246
+ "pass@8": 0.9559837013558444,
247
+ "pass@16": 0.9669589357176731,
248
+ "pass@32": 0.9757400698048759,
249
+ "pass@64": 0.9820426961144421,
250
+ "pass@128": 0.9877300613496932
251
+ },
252
+ "2": {
253
+ "pass@1": 0.9987177051671733,
254
+ "pass@2": 0.9996395064978579,
255
+ "pass@4": 0.9999613177152248,
256
+ "pass@8": 0.9999996232076699,
257
+ "pass@16": 0.9999999999864305,
258
+ "pass@32": 1.0,
259
+ "pass@64": 1.0,
260
+ "pass@128": 1.0
261
+ },
262
+ "3": {
263
+ "pass@1": 0.9907670454545454,
264
+ "pass@2": 0.9960163892865667,
265
+ "pass@4": 0.9985778993534897,
266
+ "pass@8": 0.9996915957072215,
267
+ "pass@16": 0.9999815253902985,
268
+ "pass@32": 0.99999995938087,
269
+ "pass@64": 0.9999999999999987,
270
+ "pass@128": 1.0
271
+ },
272
+ "4": {
273
+ "pass@1": 0.9982664571005917,
274
+ "pass@2": 0.9998132687648512,
275
+ "pass@4": 0.999995557138198,
276
+ "pass@8": 0.9999999967656203,
277
+ "pass@16": 0.9999999999999998,
278
+ "pass@32": 1.0,
279
+ "pass@64": 1.0,
280
+ "pass@128": 1.0
281
+ },
282
+ "5": {
283
+ "pass@1": 0.9600317028985508,
284
+ "pass@2": 0.9691975493552438,
285
+ "pass@4": 0.9763000869456536,
286
+ "pass@8": 0.9828931121195829,
287
+ "pass@16": 0.989036439654314,
288
+ "pass@32": 0.9944838684919953,
289
+ "pass@64": 0.9981653755234783,
290
+ "pass@128": 1.0
291
+ },
292
+ "6": {
293
+ "pass@1": 0.9823379297994269,
294
+ "pass@2": 0.9910194058389551,
295
+ "pass@4": 0.9946701218221643,
296
+ "pass@8": 0.9965445299840145,
297
+ "pass@16": 0.9976712739922713,
298
+ "pass@32": 0.9983904225804993,
299
+ "pass@64": 0.9992893080034889,
300
+ "pass@128": 1.0
301
+ },
302
+ "7": {
303
+ "pass@1": 0.9681855130057804,
304
+ "pass@2": 0.9832421146056164,
305
+ "pass@4": 0.9914642009163593,
306
+ "pass@8": 0.9964758591421767,
307
+ "pass@16": 0.9990374180123244,
308
+ "pass@32": 0.9999126056145744,
309
+ "pass@64": 0.9999997855847184,
310
+ "pass@128": 1.0
311
+ },
312
+ "8": {
313
+ "pass@1": 0.9455765845070423,
314
+ "pass@2": 0.9712158284351782,
315
+ "pass@4": 0.9835400283591312,
316
+ "pass@8": 0.9910390348371692,
317
+ "pass@16": 0.9955726997113149,
318
+ "pass@32": 0.9980539040490879,
319
+ "pass@64": 0.9995741614459047,
320
+ "pass@128": 1.0
321
+ },
322
+ "9": {
323
+ "pass@1": 0.9390437874251497,
324
+ "pass@2": 0.9670264539818006,
325
+ "pass@4": 0.9783135702722786,
326
+ "pass@8": 0.9840906736702117,
327
+ "pass@16": 0.9879928605789021,
328
+ "pass@32": 0.9908513468886352,
329
+ "pass@64": 0.9930496212972972,
330
+ "pass@128": 0.9940119760479041
331
+ },
332
+ "11": {
333
+ "pass@1": 0.7309864457831325,
334
+ "pass@2": 0.811214839673655,
335
+ "pass@4": 0.8645121111555325,
336
+ "pass@8": 0.9001346785530842,
337
+ "pass@16": 0.9235781504819824,
338
+ "pass@32": 0.939017807747773,
339
+ "pass@64": 0.9485292552001391,
340
+ "pass@128": 0.9548192771084337
341
+ },
342
+ "12": {
343
+ "pass@1": 0.439042907523511,
344
+ "pass@2": 0.5317378835188704,
345
+ "pass@4": 0.6075961422729685,
346
+ "pass@8": 0.6683062360710545,
347
+ "pass@16": 0.7177272091161964,
348
+ "pass@32": 0.7598458853246168,
349
+ "pass@64": 0.7942021269976,
350
+ "pass@128": 0.8213166144200627
351
+ },
352
+ "13": {
353
+ "pass@1": 0.19227065826330533,
354
+ "pass@2": 0.23474754074858276,
355
+ "pass@4": 0.27791213361985206,
356
+ "pass@8": 0.32123267772057174,
357
+ "pass@16": 0.36547236780648024,
358
+ "pass@32": 0.412591392141311,
359
+ "pass@64": 0.46083134462740805,
360
+ "pass@128": 0.5042016806722689
361
+ },
362
+ "14": {
363
+ "pass@1": 0.20837902046783627,
364
+ "pass@2": 0.2522699670764838,
365
+ "pass@4": 0.2964683594923442,
366
+ "pass@8": 0.3416618793696011,
367
+ "pass@16": 0.3841336779597966,
368
+ "pass@32": 0.42375806001618826,
369
+ "pass@64": 0.4632207639904677,
370
+ "pass@128": 0.5029239766081871
371
+ },
372
+ "15": {
373
+ "pass@1": 0.19093276515151514,
374
+ "pass@2": 0.24199400501073715,
375
+ "pass@4": 0.2891956014020975,
376
+ "pass@8": 0.3326345282383242,
377
+ "pass@16": 0.37366558323041027,
378
+ "pass@32": 0.4128573661812689,
379
+ "pass@64": 0.4483166035224402,
380
+ "pass@128": 0.4727272727272727
381
+ },
382
+ "16": {
383
+ "pass@1": 0.15642806267806267,
384
+ "pass@2": 0.1904278691926329,
385
+ "pass@4": 0.22132350069489173,
386
+ "pass@8": 0.251268169779741,
387
+ "pass@16": 0.28180434335829546,
388
+ "pass@32": 0.3122157586188942,
389
+ "pass@64": 0.34211985927431315,
390
+ "pass@128": 0.3732193732193732
391
+ },
392
+ "17": {
393
+ "pass@1": 0.16779891304347827,
394
+ "pass@2": 0.21377104526336374,
395
+ "pass@4": 0.2616579632708955,
396
+ "pass@8": 0.3084358591590647,
397
+ "pass@16": 0.3485515761094292,
398
+ "pass@32": 0.38192734451252297,
399
+ "pass@64": 0.4116145438455866,
400
+ "pass@128": 0.43788819875776397
401
+ },
402
+ "18": {
403
+ "pass@1": 0.1518612132352941,
404
+ "pass@2": 0.1958187384205651,
405
+ "pass@4": 0.23828893419572547,
406
+ "pass@8": 0.27971765090438966,
407
+ "pass@16": 0.31909231139592203,
408
+ "pass@32": 0.3541919542192856,
409
+ "pass@64": 0.3879892602530647,
410
+ "pass@128": 0.4235294117647059
411
+ },
412
+ "19": {
413
+ "pass@1": 0.14004371279761904,
414
+ "pass@2": 0.17627703763592056,
415
+ "pass@4": 0.21033196631671033,
416
+ "pass@8": 0.24241868591666452,
417
+ "pass@16": 0.2725452956766527,
418
+ "pass@32": 0.3023936104609465,
419
+ "pass@64": 0.331830773125219,
420
+ "pass@128": 0.3601190476190476
421
+ },
422
+ "20": {
423
+ "pass@1": 0.1367421407185629,
424
+ "pass@2": 0.17771825934744678,
425
+ "pass@4": 0.21898993408009634,
426
+ "pass@8": 0.260066157867715,
427
+ "pass@16": 0.2989195527821917,
428
+ "pass@32": 0.3334463816599912,
429
+ "pass@64": 0.3631967204725436,
430
+ "pass@128": 0.38622754491017963
431
+ }
432
+ }
433
+ },
434
+ "movie_festival_awards": {
435
+ "count": 801152,
436
+ "correct": 471310,
437
+ "answer_accuracy": 0.5882903618788944,
438
+ "avg_response_len": 258.0569242790382,
439
+ "resp_tokens_sum": 206742821,
440
+ "pass_at_k": {
441
+ "pass@1": 0.5882903618788944,
442
+ "pass@2": 0.6212685622467475,
443
+ "pass@4": 0.6478714293112718,
444
+ "pass@8": 0.6709216329453926,
445
+ "pass@16": 0.6916683710373397,
446
+ "pass@32": 0.7106315691905148,
447
+ "pass@64": 0.728740000525103,
448
+ "pass@128": 0.7466048889598977
449
+ },
450
+ "per_op_pass_at_k": {
451
+ "10": {
452
+ "pass@1": 0.8983908582089553,
453
+ "pass@2": 0.9340132506757551,
454
+ "pass@4": 0.9530650669599133,
455
+ "pass@8": 0.9652801856862316,
456
+ "pass@16": 0.9745693164759084,
457
+ "pass@32": 0.9818461284408381,
458
+ "pass@64": 0.9871328155545905,
459
+ "pass@128": 0.991044776119403
460
+ },
461
+ "2": {
462
+ "pass@1": 0.9998660714285714,
463
+ "pass@2": 0.9999992969628797,
464
+ "pass@4": 1.0,
465
+ "pass@8": 1.0,
466
+ "pass@16": 1.0,
467
+ "pass@32": 1.0,
468
+ "pass@64": 1.0,
469
+ "pass@128": 1.0
470
+ },
471
+ "3": {
472
+ "pass@1": 0.9907069970845481,
473
+ "pass@2": 0.9970669721769471,
474
+ "pass@4": 0.9993563997831172,
475
+ "pass@8": 0.9999514691641971,
476
+ "pass@16": 0.9999996499221584,
477
+ "pass@32": 0.9999999999946803,
478
+ "pass@64": 1.0,
479
+ "pass@128": 1.0
480
+ },
481
+ "4": {
482
+ "pass@1": 0.9958196271929824,
483
+ "pass@2": 0.9977559342911083,
484
+ "pass@4": 0.9990211264710332,
485
+ "pass@8": 0.9997663197662907,
486
+ "pass@16": 0.9999856176620865,
487
+ "pass@32": 0.9999999740185194,
488
+ "pass@64": 0.9999999999999997,
489
+ "pass@128": 1.0
490
+ },
491
+ "5": {
492
+ "pass@1": 0.9548943014705882,
493
+ "pass@2": 0.9685079174386292,
494
+ "pass@4": 0.9784535587463331,
495
+ "pass@8": 0.9869293632462349,
496
+ "pass@16": 0.9934826994856965,
497
+ "pass@32": 0.9970310861536675,
498
+ "pass@64": 0.9985066302193607,
499
+ "pass@128": 1.0
500
+ },
501
+ "6": {
502
+ "pass@1": 0.9775483044164038,
503
+ "pass@2": 0.9891293716932861,
504
+ "pass@4": 0.9959075131381444,
505
+ "pass@8": 0.9991092920224423,
506
+ "pass@16": 0.9999352040645176,
507
+ "pass@32": 0.9999996662295679,
508
+ "pass@64": 0.9999999999997393,
509
+ "pass@128": 1.0
510
+ },
511
+ "7": {
512
+ "pass@1": 0.9744115901898734,
513
+ "pass@2": 0.9867456923402768,
514
+ "pass@4": 0.9912063912303684,
515
+ "pass@8": 0.9935151685224703,
516
+ "pass@16": 0.9954041601504686,
517
+ "pass@32": 0.9972167179953726,
518
+ "pass@64": 0.9990265956175474,
519
+ "pass@128": 1.0
520
+ },
521
+ "8": {
522
+ "pass@1": 0.9515531156156156,
523
+ "pass@2": 0.9743795222387744,
524
+ "pass@4": 0.9852795146102226,
525
+ "pass@8": 0.9899431408996546,
526
+ "pass@16": 0.9925003924523234,
527
+ "pass@32": 0.9950902478963195,
528
+ "pass@64": 0.9976578190444356,
529
+ "pass@128": 1.0
530
+ },
531
+ "9": {
532
+ "pass@1": 0.9379521704180064,
533
+ "pass@2": 0.9720591912043948,
534
+ "pass@4": 0.9858236963747689,
535
+ "pass@8": 0.9932814976290268,
536
+ "pass@16": 0.9968830356374474,
537
+ "pass@32": 0.9981725276413653,
538
+ "pass@64": 0.9992024635603814,
539
+ "pass@128": 1.0
540
+ },
541
+ "11": {
542
+ "pass@1": 0.7650545634920635,
543
+ "pass@2": 0.8431473409573809,
544
+ "pass@4": 0.8877359086066622,
545
+ "pass@8": 0.9145899228176874,
546
+ "pass@16": 0.9334761048558039,
547
+ "pass@32": 0.9489928847114996,
548
+ "pass@64": 0.9642010673849086,
549
+ "pass@128": 0.9809523809523809
550
+ },
551
+ "12": {
552
+ "pass@1": 0.4054615825688073,
553
+ "pass@2": 0.49957183534397626,
554
+ "pass@4": 0.5768995727598271,
555
+ "pass@8": 0.639355428961972,
556
+ "pass@16": 0.6924379381519293,
557
+ "pass@32": 0.7389465717283092,
558
+ "pass@64": 0.7833812207530858,
559
+ "pass@128": 0.8256880733944955
560
+ },
561
+ "13": {
562
+ "pass@1": 0.21805073302469136,
563
+ "pass@2": 0.2614506628511712,
564
+ "pass@4": 0.30509700407819385,
565
+ "pass@8": 0.34843945585161107,
566
+ "pass@16": 0.3892841325907048,
567
+ "pass@32": 0.42707158587605587,
568
+ "pass@64": 0.46350207303509766,
569
+ "pass@128": 0.5
570
+ },
571
+ "14": {
572
+ "pass@1": 0.19482566765578635,
573
+ "pass@2": 0.24658432440010283,
574
+ "pass@4": 0.2977982871762691,
575
+ "pass@8": 0.34710213397424194,
576
+ "pass@16": 0.39305571585018145,
577
+ "pass@32": 0.4354334888531387,
578
+ "pass@64": 0.4760201367236571,
579
+ "pass@128": 0.516320474777448
580
+ },
581
+ "15": {
582
+ "pass@1": 0.15263310185185186,
583
+ "pass@2": 0.20657633724603863,
584
+ "pass@4": 0.26040626837154607,
585
+ "pass@8": 0.31066200412971595,
586
+ "pass@16": 0.3575054534309268,
587
+ "pass@32": 0.40181191645690056,
588
+ "pass@64": 0.4460592807353767,
589
+ "pass@128": 0.49074074074074076
590
+ },
591
+ "16": {
592
+ "pass@1": 0.15613477138643067,
593
+ "pass@2": 0.1863313822497852,
594
+ "pass@4": 0.21273005393131156,
595
+ "pass@8": 0.23865739981671225,
596
+ "pass@16": 0.2676959517806339,
597
+ "pass@32": 0.3011310763692745,
598
+ "pass@64": 0.3362407178410369,
599
+ "pass@128": 0.37168141592920356
600
+ },
601
+ "17": {
602
+ "pass@1": 0.1378012048192771,
603
+ "pass@2": 0.17401619272365054,
604
+ "pass@4": 0.21124320340981484,
605
+ "pass@8": 0.24873698272180494,
606
+ "pass@16": 0.28468715470923933,
607
+ "pass@32": 0.31684364503562207,
608
+ "pass@64": 0.3460631347877581,
609
+ "pass@128": 0.37650602409638556
610
+ },
611
+ "18": {
612
+ "pass@1": 0.16940524193548387,
613
+ "pass@2": 0.20793830962661927,
614
+ "pass@4": 0.24442847708552565,
615
+ "pass@8": 0.27696590133200394,
616
+ "pass@16": 0.3047539165997947,
617
+ "pass@32": 0.329140512075412,
618
+ "pass@64": 0.3525837977178592,
619
+ "pass@128": 0.3774193548387097
620
+ },
621
+ "19": {
622
+ "pass@1": 0.13812311178247735,
623
+ "pass@2": 0.16759305790137258,
624
+ "pass@4": 0.19758813267676884,
625
+ "pass@8": 0.2284911534750015,
626
+ "pass@16": 0.2603002938970115,
627
+ "pass@32": 0.29249731296624343,
628
+ "pass@64": 0.3260332407783438,
629
+ "pass@128": 0.36253776435045315
630
+ },
631
+ "20": {
632
+ "pass@1": 0.13072447447447447,
633
+ "pass@2": 0.16936079780567978,
634
+ "pass@4": 0.2094049935762534,
635
+ "pass@8": 0.25098284669882537,
636
+ "pass@16": 0.291417867530838,
637
+ "pass@32": 0.32739810777438544,
638
+ "pass@64": 0.35806392636422313,
639
+ "pass@128": 0.3813813813813814
640
+ }
641
+ }
642
+ }
643
+ }
644
+ }
645
+ ]