amirali1985 commited on
Commit
e692933
·
verified ·
1 Parent(s): 0c51d15

Upload add_sub_sorl_v1_abs30_K1_100K_1L2H256d/metrics.json with huggingface_hub

Browse files
add_sub_sorl_v1_abs30_K1_100K_1L2H256d/metrics.json CHANGED
@@ -5030,502 +5030,567 @@
5030
  "K": null,
5031
  "mode": "sft",
5032
  "n_digits": 6,
5033
- "n_per_split": 100
5034
  },
5035
  "splits": {
5036
  "add_S0": {
5037
- "full_accuracy": 0.73,
5038
- "n_examples": 100,
 
5039
  "per_subtask": {
5040
  "SA": {
5041
- "accuracy": 0.9520661157024793,
5042
- "count": 605
5043
  },
5044
  "SS": {
5045
- "accuracy": 0.9789473684210527,
5046
- "count": 95
5047
  }
5048
  }
5049
  },
5050
  "add_S1": {
5051
- "full_accuracy": 0.59,
5052
- "n_examples": 100,
 
5053
  "per_subtask": {
5054
  "SA": {
5055
- "accuracy": 0.9362745098039216,
5056
- "count": 204
5057
  },
5058
  "SC": {
5059
- "accuracy": 0.9349112426035503,
5060
- "count": 169
5061
  },
5062
  "SS": {
5063
- "accuracy": 0.9354838709677419,
5064
- "count": 31
5065
  },
5066
  "UC": {
5067
- "accuracy": 0.918918918918919,
5068
- "count": 296
5069
  }
5070
  }
5071
  },
5072
  "add_S2": {
5073
- "full_accuracy": 0.3,
5074
- "n_examples": 100,
 
5075
  "per_subtask": {
5076
  "SA": {
5077
- "accuracy": 0.9386503067484663,
5078
- "count": 163
5079
  },
5080
  "SC": {
5081
- "accuracy": 0.8692307692307693,
5082
- "count": 130
5083
  },
5084
  "SS": {
5085
- "accuracy": 0.8735632183908046,
5086
- "count": 87
5087
  },
5088
  "UC": {
5089
- "accuracy": 0.7192118226600985,
5090
- "count": 203
5091
  },
5092
  "US": {
5093
- "accuracy": 0.9401709401709402,
5094
- "count": 117
5095
  }
5096
  }
5097
  },
5098
  "add_S3": {
5099
- "full_accuracy": 0.21,
5100
- "n_examples": 100,
 
5101
  "per_subtask": {
5102
  "SA": {
5103
- "accuracy": 0.9669421487603306,
5104
- "count": 121
5105
  },
5106
  "SC": {
5107
- "accuracy": 0.8925619834710744,
5108
- "count": 121
5109
  },
5110
  "SS": {
5111
- "accuracy": 0.8775510204081632,
5112
- "count": 49
5113
  },
5114
  "UC": {
5115
- "accuracy": 0.6720430107526881,
5116
- "count": 186
5117
  },
5118
  "US": {
5119
- "accuracy": 0.7937219730941704,
5120
- "count": 223
5121
  }
5122
  }
5123
  },
5124
  "add_S4": {
5125
- "full_accuracy": 0.2,
5126
- "n_examples": 100,
 
5127
  "per_subtask": {
5128
  "SA": {
5129
- "accuracy": 0.9519230769230769,
5130
- "count": 104
5131
  },
5132
  "SC": {
5133
- "accuracy": 0.8773584905660378,
5134
- "count": 106
5135
  },
5136
  "SS": {
5137
- "accuracy": 0.9565217391304348,
5138
- "count": 23
5139
  },
5140
  "UC": {
5141
- "accuracy": 0.7375,
5142
- "count": 160
5143
  },
5144
  "US": {
5145
- "accuracy": 0.6416938110749185,
5146
- "count": 307
5147
  }
5148
  }
5149
  },
5150
  "add_S5": {
5151
- "full_accuracy": 0.07,
5152
- "n_examples": 100,
 
5153
  "per_subtask": {
5154
  "SA": {
5155
  "accuracy": 1.0,
5156
- "count": 100
5157
  },
5158
  "SC": {
5159
  "accuracy": 0.98,
5160
- "count": 100
5161
  },
5162
  "UC": {
5163
- "accuracy": 0.29,
5164
- "count": 100
5165
  },
5166
  "US": {
5167
- "accuracy": 0.245,
5168
- "count": 400
5169
  }
5170
  }
5171
  },
5172
  "add_S6": {
5173
- "full_accuracy": 0.28,
5174
- "n_examples": 100,
 
5175
  "per_subtask": {
5176
  "SC": {
5177
  "accuracy": 1.0,
5178
- "count": 100
5179
  },
5180
  "UC": {
5181
- "accuracy": 0.57,
5182
- "count": 100
5183
  },
5184
  "US": {
5185
- "accuracy": 0.482,
5186
- "count": 500
5187
  }
5188
  }
5189
  },
5190
  "add_random": {
5191
- "full_accuracy": 0.575,
 
5192
  "n_examples": 200,
5193
  "per_subtask": {
5194
  "SA": {
5195
- "accuracy": 0.9686800894854586,
5196
- "count": 447
5197
  },
5198
  "SC": {
5199
- "accuracy": 0.925,
5200
- "count": 320
5201
  },
5202
  "SS": {
5203
- "accuracy": 0.8928571428571429,
5204
- "count": 56
5205
  },
5206
  "UC": {
5207
- "accuracy": 0.8960302457466919,
5208
- "count": 529
5209
  },
5210
  "US": {
5211
- "accuracy": 0.7708333333333334,
5212
- "count": 48
5213
  }
5214
  }
5215
  },
5216
- "add_C3": {
5217
- "full_accuracy": 0.37,
5218
- "n_examples": 100,
 
5219
  "per_subtask": {
5220
  "SA": {
5221
- "accuracy": 0.9966666666666667,
5222
- "count": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5223
  },
5224
  "SC": {
5225
  "accuracy": 0.98,
5226
- "count": 100
5227
  },
5228
  "UC": {
5229
- "accuracy": 0.7098445595854922,
5230
- "count": 193
5231
  },
5232
  "US": {
5233
- "accuracy": 0.6635514018691588,
5234
- "count": 107
5235
  }
5236
  }
5237
  },
5238
- "add_C4": {
5239
- "full_accuracy": 0.37,
5240
- "n_examples": 100,
 
5241
  "per_subtask": {
5242
  "SA": {
5243
- "accuracy": 0.995,
5244
- "count": 200
5245
  },
5246
  "SC": {
5247
- "accuracy": 0.99,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5248
  "count": 100
5249
  },
 
 
 
 
5250
  "UC": {
5251
- "accuracy": 0.7421875,
5252
- "count": 256
5253
  },
5254
  "US": {
5255
- "accuracy": 0.7083333333333334,
5256
- "count": 144
5257
  }
5258
  }
5259
  },
5260
  "add_C5": {
5261
- "full_accuracy": 0.34,
5262
- "n_examples": 100,
 
5263
  "per_subtask": {
5264
  "SA": {
5265
  "accuracy": 1.0,
5266
- "count": 100
5267
  },
5268
  "SC": {
5269
- "accuracy": 0.99,
5270
- "count": 100
5271
  },
5272
  "UC": {
5273
- "accuracy": 0.7483660130718954,
5274
- "count": 306
5275
  },
5276
  "US": {
5277
- "accuracy": 0.7835051546391752,
5278
- "count": 194
5279
  }
5280
  }
5281
  },
5282
  "add_C6": {
5283
- "full_accuracy": 0.39,
5284
- "n_examples": 100,
 
5285
  "per_subtask": {
5286
  "SC": {
5287
  "accuracy": 1.0,
5288
- "count": 100
5289
  },
5290
  "UC": {
5291
- "accuracy": 0.825136612021858,
5292
- "count": 366
5293
  },
5294
  "US": {
5295
- "accuracy": 0.8760683760683761,
5296
- "count": 234
5297
  }
5298
  }
5299
  },
5300
  "sub_M0": {
5301
- "full_accuracy": 0.66,
5302
- "n_examples": 100,
 
5303
  "per_subtask": {
5304
  "MD": {
5305
- "accuracy": 0.9384359400998337,
5306
- "count": 601
5307
  },
5308
  "ME": {
5309
- "accuracy": 0.9696969696969697,
5310
- "count": 99
5311
  }
5312
  }
5313
  },
5314
  "sub_M1": {
5315
- "full_accuracy": 0.45,
5316
- "n_examples": 100,
 
5317
  "per_subtask": {
5318
  "MD": {
5319
- "accuracy": 0.956989247311828,
5320
- "count": 279
5321
  },
5322
  "MB": {
5323
- "accuracy": 0.9517241379310345,
5324
- "count": 145
5325
  },
5326
  "ME": {
5327
- "accuracy": 0.875,
5328
- "count": 24
5329
  },
5330
  "UB": {
5331
- "accuracy": 0.8015873015873016,
5332
- "count": 252
5333
  }
5334
  }
5335
  },
5336
  "sub_M2": {
5337
- "full_accuracy": 0.18,
5338
- "n_examples": 100,
 
5339
  "per_subtask": {
5340
  "MD": {
5341
- "accuracy": 0.9624413145539906,
5342
- "count": 213
5343
  },
5344
  "MB": {
5345
- "accuracy": 0.8761061946902655,
5346
- "count": 113
5347
  },
5348
  "ME": {
5349
- "accuracy": 0.9411764705882353,
5350
- "count": 85
5351
  },
5352
  "UB": {
5353
- "accuracy": 0.6353591160220995,
5354
- "count": 181
5355
  },
5356
  "UD": {
5357
- "accuracy": 0.7685185185185185,
5358
- "count": 108
5359
  }
5360
  }
5361
  },
5362
  "sub_M3": {
5363
- "full_accuracy": 0.07,
5364
- "n_examples": 100,
 
5365
  "per_subtask": {
5366
  "MD": {
5367
- "accuracy": 0.9832402234636871,
5368
- "count": 179
5369
  },
5370
  "MB": {
5371
- "accuracy": 0.8737864077669902,
5372
- "count": 103
5373
  },
5374
  "ME": {
5375
- "accuracy": 0.9821428571428571,
5376
- "count": 56
5377
  },
5378
  "UB": {
5379
- "accuracy": 0.47651006711409394,
5380
- "count": 149
5381
  },
5382
  "UD": {
5383
- "accuracy": 0.49295774647887325,
5384
- "count": 213
5385
  }
5386
  }
5387
  },
5388
  "sub_M4": {
5389
- "full_accuracy": 0.07,
5390
- "n_examples": 100,
 
5391
  "per_subtask": {
5392
  "MD": {
5393
  "accuracy": 0.96,
5394
- "count": 200
5395
  },
5396
  "MB": {
5397
- "accuracy": 0.96,
5398
- "count": 100
5399
  },
5400
  "UB": {
5401
- "accuracy": 0.29,
5402
- "count": 100
5403
  },
5404
  "UD": {
5405
- "accuracy": 0.30333333333333334,
5406
- "count": 300
5407
  }
5408
  }
5409
  },
5410
  "sub_M5": {
5411
  "full_accuracy": 0.04,
5412
- "n_examples": 100,
 
5413
  "per_subtask": {
5414
  "MD": {
5415
  "accuracy": 1.0,
5416
- "count": 100
5417
  },
5418
  "MB": {
5419
  "accuracy": 1.0,
5420
- "count": 100
5421
  },
5422
  "UB": {
5423
- "accuracy": 0.37,
5424
- "count": 100
5425
  },
5426
  "UD": {
5427
- "accuracy": 0.2275,
5428
- "count": 400
5429
  }
5430
  }
5431
  },
5432
  "sub_random": {
5433
- "full_accuracy": 0.465,
 
5434
  "n_examples": 200,
5435
  "per_subtask": {
5436
  "MD": {
5437
- "accuracy": 0.9433333333333334,
5438
- "count": 600
5439
  },
5440
  "MB": {
5441
- "accuracy": 0.9213483146067416,
5442
- "count": 267
5443
  },
5444
  "ME": {
5445
- "accuracy": 0.9245283018867925,
5446
  "count": 53
5447
  },
5448
  "UB": {
5449
- "accuracy": 0.806378132118451,
5450
- "count": 439
5451
  },
5452
  "UD": {
5453
- "accuracy": 0.7804878048780488,
5454
- "count": 41
5455
  }
5456
  }
5457
  },
5458
  "sub_B3": {
5459
- "full_accuracy": 0.21,
5460
- "n_examples": 100,
 
5461
  "per_subtask": {
5462
  "MD": {
5463
- "accuracy": 0.9433333333333334,
5464
- "count": 300
5465
  },
5466
  "MB": {
5467
- "accuracy": 0.95,
5468
- "count": 100
5469
  },
5470
  "UB": {
5471
- "accuracy": 0.5939086294416244,
5472
- "count": 197
5473
  },
5474
  "UD": {
5475
- "accuracy": 0.6019417475728155,
5476
- "count": 103
5477
  }
5478
  }
5479
  },
5480
  "sub_B4": {
5481
- "full_accuracy": 0.17,
5482
- "n_examples": 100,
 
5483
  "per_subtask": {
5484
  "MD": {
5485
- "accuracy": 0.925,
5486
- "count": 200
5487
  },
5488
  "MB": {
5489
  "accuracy": 0.98,
5490
- "count": 100
5491
  },
5492
  "UB": {
5493
- "accuracy": 0.5951417004048583,
5494
- "count": 247
5495
  },
5496
  "UD": {
5497
- "accuracy": 0.5947712418300654,
5498
- "count": 153
5499
  }
5500
  }
5501
  },
5502
  "sub_B5": {
5503
- "full_accuracy": 0.1,
5504
- "n_examples": 100,
 
5505
  "per_subtask": {
5506
  "MD": {
5507
  "accuracy": 1.0,
5508
- "count": 100
5509
  },
5510
  "MB": {
5511
  "accuracy": 1.0,
5512
- "count": 100
5513
  },
5514
  "UB": {
5515
- "accuracy": 0.5838926174496645,
5516
- "count": 298
5517
  },
5518
  "UD": {
5519
- "accuracy": 0.49504950495049505,
5520
- "count": 202
5521
  }
5522
  }
5523
  }
5524
  },
5525
  "summary": {
5526
- "overall_accuracy": 0.3279166666666667,
5527
- "total_examples": 2400,
5528
- "n_splits": 22
 
5529
  }
5530
  },
5531
  "sorl_eval": {
@@ -5534,502 +5599,567 @@
5534
  "K": 1,
5535
  "mode": "sorl",
5536
  "n_digits": 6,
5537
- "n_per_split": 100
5538
  },
5539
  "splits": {
5540
  "add_S0": {
5541
- "full_accuracy": 0.91,
5542
- "n_examples": 100,
 
5543
  "per_subtask": {
5544
  "SA": {
5545
- "accuracy": 0.9867768595041322,
5546
- "count": 605
5547
  },
5548
  "SS": {
5549
- "accuracy": 0.9894736842105263,
5550
- "count": 95
5551
  }
5552
  }
5553
  },
5554
  "add_S1": {
5555
- "full_accuracy": 0.64,
5556
- "n_examples": 100,
 
5557
  "per_subtask": {
5558
  "SA": {
5559
- "accuracy": 0.9705882352941176,
5560
- "count": 204
5561
  },
5562
  "SC": {
5563
- "accuracy": 0.9526627218934911,
5564
- "count": 169
5565
  },
5566
  "SS": {
5567
- "accuracy": 0.967741935483871,
5568
- "count": 31
5569
  },
5570
  "UC": {
5571
- "accuracy": 0.918918918918919,
5572
- "count": 296
5573
  }
5574
  }
5575
  },
5576
  "add_S2": {
5577
- "full_accuracy": 0.55,
5578
- "n_examples": 100,
 
5579
  "per_subtask": {
5580
  "SA": {
5581
- "accuracy": 0.9877300613496932,
5582
- "count": 163
5583
  },
5584
  "SC": {
5585
- "accuracy": 0.9230769230769231,
5586
- "count": 130
5587
  },
5588
  "SS": {
5589
- "accuracy": 0.9310344827586207,
5590
- "count": 87
5591
  },
5592
  "UC": {
5593
- "accuracy": 0.8374384236453202,
5594
- "count": 203
5595
  },
5596
  "US": {
5597
- "accuracy": 0.9572649572649573,
5598
- "count": 117
5599
  }
5600
  }
5601
  },
5602
  "add_S3": {
5603
- "full_accuracy": 0.47,
5604
- "n_examples": 100,
 
5605
  "per_subtask": {
5606
  "SA": {
5607
  "accuracy": 1.0,
5608
- "count": 121
5609
  },
5610
  "SC": {
5611
- "accuracy": 0.9752066115702479,
5612
- "count": 121
5613
  },
5614
  "SS": {
5615
- "accuracy": 0.9183673469387755,
5616
- "count": 49
5617
  },
5618
  "UC": {
5619
- "accuracy": 0.7634408602150538,
5620
- "count": 186
5621
  },
5622
  "US": {
5623
- "accuracy": 0.8565022421524664,
5624
- "count": 223
5625
  }
5626
  }
5627
  },
5628
  "add_S4": {
5629
  "full_accuracy": 0.4,
5630
- "n_examples": 100,
 
5631
  "per_subtask": {
5632
  "SA": {
5633
- "accuracy": 0.9903846153846154,
5634
- "count": 104
5635
  },
5636
  "SC": {
5637
- "accuracy": 0.9339622641509434,
5638
- "count": 106
5639
  },
5640
  "SS": {
5641
  "accuracy": 1.0,
5642
- "count": 23
5643
  },
5644
  "UC": {
5645
- "accuracy": 0.7125,
5646
- "count": 160
5647
  },
5648
  "US": {
5649
- "accuracy": 0.7850162866449512,
5650
- "count": 307
5651
  }
5652
  }
5653
  },
5654
  "add_S5": {
5655
- "full_accuracy": 0.1,
5656
- "n_examples": 100,
 
5657
  "per_subtask": {
5658
  "SA": {
5659
  "accuracy": 1.0,
5660
- "count": 100
5661
  },
5662
  "SC": {
5663
- "accuracy": 1.0,
5664
- "count": 100
5665
  },
5666
  "UC": {
5667
- "accuracy": 0.4,
5668
- "count": 100
5669
  },
5670
  "US": {
5671
- "accuracy": 0.525,
5672
- "count": 400
5673
  }
5674
  }
5675
  },
5676
  "add_S6": {
5677
- "full_accuracy": 0.19,
5678
- "n_examples": 100,
 
5679
  "per_subtask": {
5680
  "SC": {
5681
  "accuracy": 1.0,
5682
- "count": 100
5683
  },
5684
  "UC": {
5685
- "accuracy": 0.35,
5686
- "count": 100
5687
  },
5688
  "US": {
5689
- "accuracy": 0.534,
5690
- "count": 500
5691
  }
5692
  }
5693
  },
5694
  "add_random": {
5695
- "full_accuracy": 0.73,
 
5696
  "n_examples": 200,
5697
  "per_subtask": {
5698
  "SA": {
5699
- "accuracy": 0.9753914988814317,
5700
- "count": 447
5701
  },
5702
  "SC": {
5703
- "accuracy": 0.96875,
5704
- "count": 320
5705
  },
5706
  "SS": {
5707
- "accuracy": 0.8571428571428571,
5708
- "count": 56
5709
  },
5710
  "UC": {
5711
- "accuracy": 0.9376181474480151,
5712
- "count": 529
5713
  },
5714
  "US": {
5715
- "accuracy": 0.8541666666666666,
5716
- "count": 48
5717
  }
5718
  }
5719
  },
5720
- "add_C3": {
5721
- "full_accuracy": 0.43,
5722
- "n_examples": 100,
 
5723
  "per_subtask": {
5724
  "SA": {
5725
- "accuracy": 0.9966666666666667,
5726
- "count": 300
5727
  },
5728
  "SC": {
5729
- "accuracy": 0.99,
5730
- "count": 100
5731
  },
5732
  "UC": {
5733
- "accuracy": 0.7305699481865285,
5734
- "count": 193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5735
  },
5736
  "US": {
5737
- "accuracy": 0.7476635514018691,
5738
- "count": 107
5739
  }
5740
  }
5741
  },
5742
- "add_C4": {
5743
- "full_accuracy": 0.52,
5744
- "n_examples": 100,
 
5745
  "per_subtask": {
5746
  "SA": {
5747
  "accuracy": 1.0,
5748
- "count": 200
5749
  },
5750
  "SC": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5751
  "accuracy": 0.99,
5752
  "count": 100
5753
  },
 
 
 
 
5754
  "UC": {
5755
- "accuracy": 0.83203125,
5756
- "count": 256
5757
  },
5758
  "US": {
5759
- "accuracy": 0.8402777777777778,
5760
- "count": 144
5761
  }
5762
  }
5763
  },
5764
  "add_C5": {
5765
- "full_accuracy": 0.41,
5766
- "n_examples": 100,
 
5767
  "per_subtask": {
5768
  "SA": {
5769
  "accuracy": 1.0,
5770
- "count": 100
5771
  },
5772
  "SC": {
5773
- "accuracy": 0.99,
5774
- "count": 100
5775
  },
5776
  "UC": {
5777
- "accuracy": 0.7973856209150327,
5778
- "count": 306
5779
  },
5780
  "US": {
5781
- "accuracy": 0.8247422680412371,
5782
- "count": 194
5783
  }
5784
  }
5785
  },
5786
  "add_C6": {
5787
- "full_accuracy": 0.49,
5788
- "n_examples": 100,
 
5789
  "per_subtask": {
5790
  "SC": {
5791
  "accuracy": 1.0,
5792
- "count": 100
5793
  },
5794
  "UC": {
5795
- "accuracy": 0.8797814207650273,
5796
- "count": 366
5797
  },
5798
  "US": {
5799
- "accuracy": 0.8888888888888888,
5800
- "count": 234
5801
  }
5802
  }
5803
  },
5804
  "sub_M0": {
5805
- "full_accuracy": 0.93,
5806
- "n_examples": 100,
 
5807
  "per_subtask": {
5808
  "MD": {
5809
- "accuracy": 0.9883527454242929,
5810
- "count": 601
5811
  },
5812
  "ME": {
5813
  "accuracy": 1.0,
5814
- "count": 99
5815
  }
5816
  }
5817
  },
5818
  "sub_M1": {
5819
- "full_accuracy": 0.63,
5820
- "n_examples": 100,
 
5821
  "per_subtask": {
5822
  "MD": {
5823
- "accuracy": 0.985663082437276,
5824
- "count": 279
5825
  },
5826
  "MB": {
5827
- "accuracy": 0.9586206896551724,
5828
- "count": 145
5829
  },
5830
  "ME": {
5831
  "accuracy": 1.0,
5832
- "count": 24
5833
  },
5834
  "UB": {
5835
  "accuracy": 0.8571428571428571,
5836
- "count": 252
5837
  }
5838
  }
5839
  },
5840
  "sub_M2": {
5841
- "full_accuracy": 0.29,
5842
- "n_examples": 100,
 
5843
  "per_subtask": {
5844
  "MD": {
5845
- "accuracy": 0.9671361502347418,
5846
- "count": 213
5847
  },
5848
  "MB": {
5849
- "accuracy": 0.9203539823008849,
5850
- "count": 113
5851
  },
5852
  "ME": {
5853
- "accuracy": 0.9764705882352941,
5854
- "count": 85
5855
  },
5856
  "UB": {
5857
- "accuracy": 0.6243093922651933,
5858
- "count": 181
5859
  },
5860
  "UD": {
5861
- "accuracy": 0.8333333333333334,
5862
- "count": 108
5863
  }
5864
  }
5865
  },
5866
  "sub_M3": {
5867
- "full_accuracy": 0.25,
5868
- "n_examples": 100,
 
5869
  "per_subtask": {
5870
  "MD": {
5871
- "accuracy": 0.994413407821229,
5872
- "count": 179
5873
  },
5874
  "MB": {
5875
- "accuracy": 0.9223300970873787,
5876
- "count": 103
5877
  },
5878
  "ME": {
5879
- "accuracy": 1.0,
5880
- "count": 56
5881
  },
5882
  "UB": {
5883
- "accuracy": 0.5436241610738255,
5884
- "count": 149
5885
  },
5886
  "UD": {
5887
- "accuracy": 0.6384976525821596,
5888
- "count": 213
5889
  }
5890
  }
5891
  },
5892
  "sub_M4": {
5893
- "full_accuracy": 0.07,
5894
- "n_examples": 100,
 
5895
  "per_subtask": {
5896
  "MD": {
5897
- "accuracy": 0.99,
5898
- "count": 200
5899
  },
5900
  "MB": {
5901
- "accuracy": 0.95,
5902
- "count": 100
5903
  },
5904
  "UB": {
5905
- "accuracy": 0.41,
5906
- "count": 100
5907
  },
5908
  "UD": {
5909
- "accuracy": 0.34,
5910
- "count": 300
5911
  }
5912
  }
5913
  },
5914
  "sub_M5": {
5915
- "full_accuracy": 0.05,
5916
- "n_examples": 100,
 
5917
  "per_subtask": {
5918
  "MD": {
5919
  "accuracy": 1.0,
5920
- "count": 100
5921
  },
5922
  "MB": {
5923
- "accuracy": 0.83,
5924
- "count": 100
5925
  },
5926
  "UB": {
5927
- "accuracy": 0.34,
5928
- "count": 100
5929
  },
5930
  "UD": {
5931
- "accuracy": 0.2375,
5932
- "count": 400
5933
  }
5934
  }
5935
  },
5936
  "sub_random": {
5937
- "full_accuracy": 0.585,
 
5938
  "n_examples": 200,
5939
  "per_subtask": {
5940
  "MD": {
5941
- "accuracy": 0.9666666666666667,
5942
- "count": 600
5943
  },
5944
  "MB": {
5945
- "accuracy": 0.9700374531835206,
5946
- "count": 267
5947
  },
5948
  "ME": {
5949
- "accuracy": 0.9245283018867925,
5950
  "count": 53
5951
  },
5952
  "UB": {
5953
- "accuracy": 0.8405466970387244,
5954
- "count": 439
5955
  },
5956
  "UD": {
5957
- "accuracy": 0.8048780487804879,
5958
- "count": 41
5959
  }
5960
  }
5961
  },
5962
  "sub_B3": {
5963
- "full_accuracy": 0.34,
5964
- "n_examples": 100,
 
5965
  "per_subtask": {
5966
  "MD": {
5967
- "accuracy": 0.98,
5968
- "count": 300
5969
  },
5970
  "MB": {
5971
- "accuracy": 0.96,
5972
- "count": 100
5973
  },
5974
  "UB": {
5975
- "accuracy": 0.6598984771573604,
5976
- "count": 197
5977
  },
5978
  "UD": {
5979
- "accuracy": 0.6699029126213593,
5980
- "count": 103
5981
  }
5982
  }
5983
  },
5984
  "sub_B4": {
5985
- "full_accuracy": 0.19,
5986
- "n_examples": 100,
 
5987
  "per_subtask": {
5988
  "MD": {
5989
- "accuracy": 0.98,
5990
- "count": 200
5991
- },
5992
- "MB": {
5993
  "accuracy": 0.99,
5994
  "count": 100
5995
  },
 
 
 
 
5996
  "UB": {
5997
- "accuracy": 0.631578947368421,
5998
- "count": 247
5999
  },
6000
  "UD": {
6001
- "accuracy": 0.5555555555555556,
6002
- "count": 153
6003
  }
6004
  }
6005
  },
6006
  "sub_B5": {
6007
- "full_accuracy": 0.17,
6008
- "n_examples": 100,
 
6009
  "per_subtask": {
6010
  "MD": {
6011
  "accuracy": 1.0,
6012
- "count": 100
6013
  },
6014
  "MB": {
6015
- "accuracy": 0.99,
6016
- "count": 100
6017
  },
6018
  "UB": {
6019
- "accuracy": 0.6610738255033557,
6020
- "count": 298
6021
  },
6022
  "UD": {
6023
- "accuracy": 0.5544554455445545,
6024
- "count": 202
6025
  }
6026
  }
6027
  }
6028
  },
6029
  "summary": {
6030
- "overall_accuracy": 0.44375,
6031
- "total_examples": 2400,
6032
- "n_splits": 22
 
6033
  }
6034
  },
6035
  "sorl_overall_accuracy": 0.44375,
 
5030
  "K": null,
5031
  "mode": "sft",
5032
  "n_digits": 6,
5033
+ "n_per_split": 50
5034
  },
5035
  "splits": {
5036
  "add_S0": {
5037
+ "full_accuracy": 0.72,
5038
+ "digit_accuracy": 0.9514285714285714,
5039
+ "n_examples": 50,
5040
  "per_subtask": {
5041
  "SA": {
5042
+ "accuracy": 0.9457627118644067,
5043
+ "count": 295
5044
  },
5045
  "SS": {
5046
+ "accuracy": 0.9818181818181818,
5047
+ "count": 55
5048
  }
5049
  }
5050
  },
5051
  "add_S1": {
5052
+ "full_accuracy": 0.6,
5053
+ "digit_accuracy": 0.9314285714285714,
5054
+ "n_examples": 50,
5055
  "per_subtask": {
5056
  "SA": {
5057
+ "accuracy": 0.9444444444444444,
5058
+ "count": 126
5059
  },
5060
  "SC": {
5061
+ "accuracy": 0.9620253164556962,
5062
+ "count": 79
5063
  },
5064
  "SS": {
5065
+ "accuracy": 1.0,
5066
+ "count": 21
5067
  },
5068
  "UC": {
5069
+ "accuracy": 0.8870967741935484,
5070
+ "count": 124
5071
  }
5072
  }
5073
  },
5074
  "add_S2": {
5075
+ "full_accuracy": 0.2,
5076
+ "digit_accuracy": 0.8085714285714286,
5077
+ "n_examples": 50,
5078
  "per_subtask": {
5079
  "SA": {
5080
+ "accuracy": 0.9066666666666666,
5081
+ "count": 75
5082
  },
5083
  "SC": {
5084
+ "accuracy": 0.8387096774193549,
5085
+ "count": 62
5086
  },
5087
  "SS": {
5088
+ "accuracy": 0.6410256410256411,
5089
+ "count": 39
5090
  },
5091
  "UC": {
5092
+ "accuracy": 0.7117117117117117,
5093
+ "count": 111
5094
  },
5095
  "US": {
5096
+ "accuracy": 0.9365079365079365,
5097
+ "count": 63
5098
  }
5099
  }
5100
  },
5101
  "add_S3": {
5102
+ "full_accuracy": 0.24,
5103
+ "digit_accuracy": 0.8085714285714286,
5104
+ "n_examples": 50,
5105
  "per_subtask": {
5106
  "SA": {
5107
+ "accuracy": 0.95,
5108
+ "count": 60
5109
  },
5110
  "SC": {
5111
+ "accuracy": 0.8596491228070176,
5112
+ "count": 57
5113
  },
5114
  "SS": {
5115
+ "accuracy": 1.0,
5116
+ "count": 19
5117
  },
5118
  "UC": {
5119
+ "accuracy": 0.7115384615384616,
5120
+ "count": 104
5121
  },
5122
  "US": {
5123
+ "accuracy": 0.7636363636363637,
5124
+ "count": 110
5125
  }
5126
  }
5127
  },
5128
  "add_S4": {
5129
+ "full_accuracy": 0.24,
5130
+ "digit_accuracy": 0.7657142857142857,
5131
+ "n_examples": 50,
5132
  "per_subtask": {
5133
  "SA": {
5134
+ "accuracy": 1.0,
5135
+ "count": 48
5136
  },
5137
  "SC": {
5138
+ "accuracy": 0.9038461538461539,
5139
+ "count": 52
5140
  },
5141
  "SS": {
5142
+ "accuracy": 1.0,
5143
+ "count": 7
5144
  },
5145
  "UC": {
5146
+ "accuracy": 0.7078651685393258,
5147
+ "count": 89
5148
  },
5149
  "US": {
5150
+ "accuracy": 0.6688311688311688,
5151
+ "count": 154
5152
  }
5153
  }
5154
  },
5155
  "add_S5": {
5156
+ "full_accuracy": 0.08,
5157
+ "digit_accuracy": 0.5228571428571429,
5158
+ "n_examples": 50,
5159
  "per_subtask": {
5160
  "SA": {
5161
  "accuracy": 1.0,
5162
+ "count": 50
5163
  },
5164
  "SC": {
5165
  "accuracy": 0.98,
5166
+ "count": 50
5167
  },
5168
  "UC": {
5169
+ "accuracy": 0.34,
5170
+ "count": 50
5171
  },
5172
  "US": {
5173
+ "accuracy": 0.335,
5174
+ "count": 200
5175
  }
5176
  }
5177
  },
5178
  "add_S6": {
5179
+ "full_accuracy": 0.24,
5180
+ "digit_accuracy": 0.52,
5181
+ "n_examples": 50,
5182
  "per_subtask": {
5183
  "SC": {
5184
  "accuracy": 1.0,
5185
+ "count": 50
5186
  },
5187
  "UC": {
5188
+ "accuracy": 0.52,
5189
+ "count": 50
5190
  },
5191
  "US": {
5192
+ "accuracy": 0.424,
5193
+ "count": 250
5194
  }
5195
  }
5196
  },
5197
  "add_random": {
5198
+ "full_accuracy": 0.5,
5199
+ "digit_accuracy": 0.9157142857142857,
5200
  "n_examples": 200,
5201
  "per_subtask": {
5202
  "SA": {
5203
+ "accuracy": 0.9675174013921114,
5204
+ "count": 431
5205
  },
5206
  "SC": {
5207
+ "accuracy": 0.9240506329113924,
5208
+ "count": 316
5209
  },
5210
  "SS": {
5211
+ "accuracy": 0.9743589743589743,
5212
+ "count": 39
5213
  },
5214
  "UC": {
5215
+ "accuracy": 0.8732142857142857,
5216
+ "count": 560
5217
  },
5218
  "US": {
5219
+ "accuracy": 0.8518518518518519,
5220
+ "count": 54
5221
  }
5222
  }
5223
  },
5224
+ "add_C1": {
5225
+ "full_accuracy": 0.68,
5226
+ "digit_accuracy": 0.9457142857142857,
5227
+ "n_examples": 50,
5228
  "per_subtask": {
5229
  "SA": {
5230
+ "accuracy": 0.992,
5231
+ "count": 250
5232
+ },
5233
+ "SC": {
5234
+ "accuracy": 0.94,
5235
+ "count": 50
5236
+ },
5237
+ "UC": {
5238
+ "accuracy": 0.72,
5239
+ "count": 50
5240
+ }
5241
+ }
5242
+ },
5243
+ "add_C2": {
5244
+ "full_accuracy": 0.6,
5245
+ "digit_accuracy": 0.9342857142857143,
5246
+ "n_examples": 50,
5247
+ "per_subtask": {
5248
+ "SA": {
5249
+ "accuracy": 0.995,
5250
+ "count": 200
5251
  },
5252
  "SC": {
5253
  "accuracy": 0.98,
5254
+ "count": 50
5255
  },
5256
  "UC": {
5257
+ "accuracy": 0.7831325301204819,
5258
+ "count": 83
5259
  },
5260
  "US": {
5261
+ "accuracy": 0.8235294117647058,
5262
+ "count": 17
5263
  }
5264
  }
5265
  },
5266
+ "add_C3": {
5267
+ "full_accuracy": 0.3,
5268
+ "digit_accuracy": 0.8685714285714285,
5269
+ "n_examples": 50,
5270
  "per_subtask": {
5271
  "SA": {
5272
+ "accuracy": 0.9933333333333333,
5273
+ "count": 150
5274
  },
5275
  "SC": {
5276
+ "accuracy": 0.96,
5277
+ "count": 50
5278
+ },
5279
+ "UC": {
5280
+ "accuracy": 0.7,
5281
+ "count": 100
5282
+ },
5283
+ "US": {
5284
+ "accuracy": 0.74,
5285
+ "count": 50
5286
+ }
5287
+ }
5288
+ },
5289
+ "add_C4": {
5290
+ "full_accuracy": 0.38,
5291
+ "digit_accuracy": 0.86,
5292
+ "n_examples": 50,
5293
+ "per_subtask": {
5294
+ "SA": {
5295
+ "accuracy": 1.0,
5296
  "count": 100
5297
  },
5298
+ "SC": {
5299
+ "accuracy": 1.0,
5300
+ "count": 50
5301
+ },
5302
  "UC": {
5303
+ "accuracy": 0.75,
5304
+ "count": 132
5305
  },
5306
  "US": {
5307
+ "accuracy": 0.7647058823529411,
5308
+ "count": 68
5309
  }
5310
  }
5311
  },
5312
  "add_C5": {
5313
+ "full_accuracy": 0.42,
5314
+ "digit_accuracy": 0.8285714285714286,
5315
+ "n_examples": 50,
5316
  "per_subtask": {
5317
  "SA": {
5318
  "accuracy": 1.0,
5319
+ "count": 50
5320
  },
5321
  "SC": {
5322
+ "accuracy": 1.0,
5323
+ "count": 50
5324
  },
5325
  "UC": {
5326
+ "accuracy": 0.7671232876712328,
5327
+ "count": 146
5328
  },
5329
  "US": {
5330
+ "accuracy": 0.75,
5331
+ "count": 104
5332
  }
5333
  }
5334
  },
5335
  "add_C6": {
5336
+ "full_accuracy": 0.36,
5337
+ "digit_accuracy": 0.8714285714285714,
5338
+ "n_examples": 50,
5339
  "per_subtask": {
5340
  "SC": {
5341
  "accuracy": 1.0,
5342
+ "count": 50
5343
  },
5344
  "UC": {
5345
+ "accuracy": 0.8253968253968254,
5346
+ "count": 189
5347
  },
5348
  "US": {
5349
+ "accuracy": 0.8918918918918919,
5350
+ "count": 111
5351
  }
5352
  }
5353
  },
5354
  "sub_M0": {
5355
+ "full_accuracy": 0.6,
5356
+ "digit_accuracy": 0.9257142857142857,
5357
+ "n_examples": 50,
5358
  "per_subtask": {
5359
  "MD": {
5360
+ "accuracy": 0.9174917491749175,
5361
+ "count": 303
5362
  },
5363
  "ME": {
5364
+ "accuracy": 0.9787234042553191,
5365
+ "count": 47
5366
  }
5367
  }
5368
  },
5369
  "sub_M1": {
5370
+ "full_accuracy": 0.42,
5371
+ "digit_accuracy": 0.8914285714285715,
5372
+ "n_examples": 50,
5373
  "per_subtask": {
5374
  "MD": {
5375
+ "accuracy": 0.9645390070921985,
5376
+ "count": 141
5377
  },
5378
  "MB": {
5379
+ "accuracy": 0.8611111111111112,
5380
+ "count": 72
5381
  },
5382
  "ME": {
5383
+ "accuracy": 0.9444444444444444,
5384
+ "count": 18
5385
  },
5386
  "UB": {
5387
+ "accuracy": 0.8151260504201681,
5388
+ "count": 119
5389
  }
5390
  }
5391
  },
5392
  "sub_M2": {
5393
+ "full_accuracy": 0.16,
5394
+ "digit_accuracy": 0.8085714285714286,
5395
+ "n_examples": 50,
5396
  "per_subtask": {
5397
  "MD": {
5398
+ "accuracy": 0.9464285714285714,
5399
+ "count": 112
5400
  },
5401
  "MB": {
5402
+ "accuracy": 0.6981132075471698,
5403
+ "count": 53
5404
  },
5405
  "ME": {
5406
+ "accuracy": 0.9361702127659575,
5407
+ "count": 47
5408
  },
5409
  "UB": {
5410
+ "accuracy": 0.6352941176470588,
5411
+ "count": 85
5412
  },
5413
  "UD": {
5414
+ "accuracy": 0.7924528301886793,
5415
+ "count": 53
5416
  }
5417
  }
5418
  },
5419
  "sub_M3": {
5420
+ "full_accuracy": 0.06,
5421
+ "digit_accuracy": 0.7285714285714285,
5422
+ "n_examples": 50,
5423
  "per_subtask": {
5424
  "MD": {
5425
+ "accuracy": 0.979381443298969,
5426
+ "count": 97
5427
  },
5428
  "MB": {
5429
+ "accuracy": 0.9215686274509803,
5430
+ "count": 51
5431
  },
5432
  "ME": {
5433
+ "accuracy": 0.9629629629629629,
5434
+ "count": 27
5435
  },
5436
  "UB": {
5437
+ "accuracy": 0.5,
5438
+ "count": 74
5439
  },
5440
  "UD": {
5441
+ "accuracy": 0.49504950495049505,
5442
+ "count": 101
5443
  }
5444
  }
5445
  },
5446
  "sub_M4": {
5447
+ "full_accuracy": 0.04,
5448
+ "digit_accuracy": 0.58,
5449
+ "n_examples": 50,
5450
  "per_subtask": {
5451
  "MD": {
5452
  "accuracy": 0.96,
5453
+ "count": 100
5454
  },
5455
  "MB": {
5456
+ "accuracy": 0.98,
5457
+ "count": 50
5458
  },
5459
  "UB": {
5460
+ "accuracy": 0.4,
5461
+ "count": 50
5462
  },
5463
  "UD": {
5464
+ "accuracy": 0.25333333333333335,
5465
+ "count": 150
5466
  }
5467
  }
5468
  },
5469
  "sub_M5": {
5470
  "full_accuracy": 0.04,
5471
+ "digit_accuracy": 0.4514285714285714,
5472
+ "n_examples": 50,
5473
  "per_subtask": {
5474
  "MD": {
5475
  "accuracy": 1.0,
5476
+ "count": 50
5477
  },
5478
  "MB": {
5479
  "accuracy": 1.0,
5480
+ "count": 50
5481
  },
5482
  "UB": {
5483
+ "accuracy": 0.34,
5484
+ "count": 50
5485
  },
5486
  "UD": {
5487
+ "accuracy": 0.205,
5488
+ "count": 200
5489
  }
5490
  }
5491
  },
5492
  "sub_random": {
5493
+ "full_accuracy": 0.415,
5494
+ "digit_accuracy": 0.8857142857142857,
5495
  "n_examples": 200,
5496
  "per_subtask": {
5497
  "MD": {
5498
+ "accuracy": 0.9631578947368421,
5499
+ "count": 570
5500
  },
5501
  "MB": {
5502
+ "accuracy": 0.8772563176895307,
5503
+ "count": 277
5504
  },
5505
  "ME": {
5506
+ "accuracy": 0.9433962264150944,
5507
  "count": 53
5508
  },
5509
  "UB": {
5510
+ "accuracy": 0.7983014861995754,
5511
+ "count": 471
5512
  },
5513
  "UD": {
5514
+ "accuracy": 0.7586206896551724,
5515
+ "count": 29
5516
  }
5517
  }
5518
  },
5519
  "sub_B3": {
5520
+ "full_accuracy": 0.2,
5521
+ "digit_accuracy": 0.8171428571428572,
5522
+ "n_examples": 50,
5523
  "per_subtask": {
5524
  "MD": {
5525
+ "accuracy": 0.9466666666666667,
5526
+ "count": 150
5527
  },
5528
  "MB": {
5529
+ "accuracy": 0.96,
5530
+ "count": 50
5531
  },
5532
  "UB": {
5533
+ "accuracy": 0.7029702970297029,
5534
+ "count": 101
5535
  },
5536
  "UD": {
5537
+ "accuracy": 0.5102040816326531,
5538
+ "count": 49
5539
  }
5540
  }
5541
  },
5542
  "sub_B4": {
5543
+ "full_accuracy": 0.2,
5544
+ "digit_accuracy": 0.7142857142857143,
5545
+ "n_examples": 50,
5546
  "per_subtask": {
5547
  "MD": {
5548
+ "accuracy": 0.95,
5549
+ "count": 100
5550
  },
5551
  "MB": {
5552
  "accuracy": 0.98,
5553
+ "count": 50
5554
  },
5555
  "UB": {
5556
+ "accuracy": 0.5950413223140496,
5557
+ "count": 121
5558
  },
5559
  "UD": {
5560
+ "accuracy": 0.43037974683544306,
5561
+ "count": 79
5562
  }
5563
  }
5564
  },
5565
  "sub_B5": {
5566
+ "full_accuracy": 0.16,
5567
+ "digit_accuracy": 0.6914285714285714,
5568
+ "n_examples": 50,
5569
  "per_subtask": {
5570
  "MD": {
5571
  "accuracy": 1.0,
5572
+ "count": 50
5573
  },
5574
  "MB": {
5575
  "accuracy": 1.0,
5576
+ "count": 50
5577
  },
5578
  "UB": {
5579
+ "accuracy": 0.625,
5580
+ "count": 152
5581
  },
5582
  "UD": {
5583
+ "accuracy": 0.47959183673469385,
5584
+ "count": 98
5585
  }
5586
  }
5587
  }
5588
  },
5589
  "summary": {
5590
+ "overall_accuracy": 0.35333333333333333,
5591
+ "digit_accuracy": 0.8141904761904762,
5592
+ "total_examples": 1500,
5593
+ "n_splits": 24
5594
  }
5595
  },
5596
  "sorl_eval": {
 
5599
  "K": 1,
5600
  "mode": "sorl",
5601
  "n_digits": 6,
5602
+ "n_per_split": 50
5603
  },
5604
  "splits": {
5605
  "add_S0": {
5606
+ "full_accuracy": 0.86,
5607
+ "digit_accuracy": 0.98,
5608
+ "n_examples": 50,
5609
  "per_subtask": {
5610
  "SA": {
5611
+ "accuracy": 0.9796610169491525,
5612
+ "count": 295
5613
  },
5614
  "SS": {
5615
+ "accuracy": 0.9818181818181818,
5616
+ "count": 55
5617
  }
5618
  }
5619
  },
5620
  "add_S1": {
5621
+ "full_accuracy": 0.74,
5622
+ "digit_accuracy": 0.96,
5623
+ "n_examples": 50,
5624
  "per_subtask": {
5625
  "SA": {
5626
+ "accuracy": 0.9761904761904762,
5627
+ "count": 126
5628
  },
5629
  "SC": {
5630
+ "accuracy": 0.9873417721518988,
5631
+ "count": 79
5632
  },
5633
  "SS": {
5634
+ "accuracy": 1.0,
5635
+ "count": 21
5636
  },
5637
  "UC": {
5638
+ "accuracy": 0.9193548387096774,
5639
+ "count": 124
5640
  }
5641
  }
5642
  },
5643
  "add_S2": {
5644
+ "full_accuracy": 0.5,
5645
+ "digit_accuracy": 0.9142857142857143,
5646
+ "n_examples": 50,
5647
  "per_subtask": {
5648
  "SA": {
5649
+ "accuracy": 1.0,
5650
+ "count": 75
5651
  },
5652
  "SC": {
5653
+ "accuracy": 0.9354838709677419,
5654
+ "count": 62
5655
  },
5656
  "SS": {
5657
+ "accuracy": 0.9487179487179487,
5658
+ "count": 39
5659
  },
5660
  "UC": {
5661
+ "accuracy": 0.8108108108108109,
5662
+ "count": 111
5663
  },
5664
  "US": {
5665
+ "accuracy": 0.9523809523809523,
5666
+ "count": 63
5667
  }
5668
  }
5669
  },
5670
  "add_S3": {
5671
+ "full_accuracy": 0.54,
5672
+ "digit_accuracy": 0.9,
5673
+ "n_examples": 50,
5674
  "per_subtask": {
5675
  "SA": {
5676
  "accuracy": 1.0,
5677
+ "count": 60
5678
  },
5679
  "SC": {
5680
+ "accuracy": 0.9824561403508771,
5681
+ "count": 57
5682
  },
5683
  "SS": {
5684
+ "accuracy": 1.0,
5685
+ "count": 19
5686
  },
5687
  "UC": {
5688
+ "accuracy": 0.7980769230769231,
5689
+ "count": 104
5690
  },
5691
  "US": {
5692
+ "accuracy": 0.8818181818181818,
5693
+ "count": 110
5694
  }
5695
  }
5696
  },
5697
  "add_S4": {
5698
  "full_accuracy": 0.4,
5699
+ "digit_accuracy": 0.8485714285714285,
5700
+ "n_examples": 50,
5701
  "per_subtask": {
5702
  "SA": {
5703
+ "accuracy": 1.0,
5704
+ "count": 48
5705
  },
5706
  "SC": {
5707
+ "accuracy": 0.9615384615384616,
5708
+ "count": 52
5709
  },
5710
  "SS": {
5711
  "accuracy": 1.0,
5712
+ "count": 7
5713
  },
5714
  "UC": {
5715
+ "accuracy": 0.7528089887640449,
5716
+ "count": 89
5717
  },
5718
  "US": {
5719
+ "accuracy": 0.8116883116883117,
5720
+ "count": 154
5721
  }
5722
  }
5723
  },
5724
  "add_S5": {
5725
+ "full_accuracy": 0.06,
5726
+ "digit_accuracy": 0.58,
5727
+ "n_examples": 50,
5728
  "per_subtask": {
5729
  "SA": {
5730
  "accuracy": 1.0,
5731
+ "count": 50
5732
  },
5733
  "SC": {
5734
+ "accuracy": 0.98,
5735
+ "count": 50
5736
  },
5737
  "UC": {
5738
+ "accuracy": 0.26,
5739
+ "count": 50
5740
  },
5741
  "US": {
5742
+ "accuracy": 0.455,
5743
+ "count": 200
5744
  }
5745
  }
5746
  },
5747
  "add_S6": {
5748
+ "full_accuracy": 0.14,
5749
+ "digit_accuracy": 0.5285714285714286,
5750
+ "n_examples": 50,
5751
  "per_subtask": {
5752
  "SC": {
5753
  "accuracy": 1.0,
5754
+ "count": 50
5755
  },
5756
  "UC": {
5757
+ "accuracy": 0.24,
5758
+ "count": 50
5759
  },
5760
  "US": {
5761
+ "accuracy": 0.492,
5762
+ "count": 250
5763
  }
5764
  }
5765
  },
5766
  "add_random": {
5767
+ "full_accuracy": 0.71,
5768
+ "digit_accuracy": 0.9528571428571428,
5769
  "n_examples": 200,
5770
  "per_subtask": {
5771
  "SA": {
5772
+ "accuracy": 0.9791183294663574,
5773
+ "count": 431
5774
  },
5775
  "SC": {
5776
+ "accuracy": 0.9430379746835443,
5777
+ "count": 316
5778
  },
5779
  "SS": {
5780
+ "accuracy": 0.9743589743589743,
5781
+ "count": 39
5782
  },
5783
  "UC": {
5784
+ "accuracy": 0.9357142857142857,
5785
+ "count": 560
5786
  },
5787
  "US": {
5788
+ "accuracy": 0.9629629629629629,
5789
+ "count": 54
5790
  }
5791
  }
5792
  },
5793
+ "add_C1": {
5794
+ "full_accuracy": 0.78,
5795
+ "digit_accuracy": 0.9685714285714285,
5796
+ "n_examples": 50,
5797
  "per_subtask": {
5798
  "SA": {
5799
+ "accuracy": 1.0,
5800
+ "count": 250
5801
  },
5802
  "SC": {
5803
+ "accuracy": 1.0,
5804
+ "count": 50
5805
  },
5806
  "UC": {
5807
+ "accuracy": 0.78,
5808
+ "count": 50
5809
+ }
5810
+ }
5811
+ },
5812
+ "add_C2": {
5813
+ "full_accuracy": 0.58,
5814
+ "digit_accuracy": 0.9228571428571428,
5815
+ "n_examples": 50,
5816
+ "per_subtask": {
5817
+ "SA": {
5818
+ "accuracy": 0.985,
5819
+ "count": 200
5820
+ },
5821
+ "SC": {
5822
+ "accuracy": 0.96,
5823
+ "count": 50
5824
+ },
5825
+ "UC": {
5826
+ "accuracy": 0.7831325301204819,
5827
+ "count": 83
5828
  },
5829
  "US": {
5830
+ "accuracy": 0.7647058823529411,
5831
+ "count": 17
5832
  }
5833
  }
5834
  },
5835
+ "add_C3": {
5836
+ "full_accuracy": 0.5,
5837
+ "digit_accuracy": 0.8942857142857142,
5838
+ "n_examples": 50,
5839
  "per_subtask": {
5840
  "SA": {
5841
  "accuracy": 1.0,
5842
+ "count": 150
5843
  },
5844
  "SC": {
5845
+ "accuracy": 1.0,
5846
+ "count": 50
5847
+ },
5848
+ "UC": {
5849
+ "accuracy": 0.75,
5850
+ "count": 100
5851
+ },
5852
+ "US": {
5853
+ "accuracy": 0.76,
5854
+ "count": 50
5855
+ }
5856
+ }
5857
+ },
5858
+ "add_C4": {
5859
+ "full_accuracy": 0.54,
5860
+ "digit_accuracy": 0.9057142857142857,
5861
+ "n_examples": 50,
5862
+ "per_subtask": {
5863
+ "SA": {
5864
  "accuracy": 0.99,
5865
  "count": 100
5866
  },
5867
+ "SC": {
5868
+ "accuracy": 1.0,
5869
+ "count": 50
5870
+ },
5871
  "UC": {
5872
+ "accuracy": 0.8636363636363636,
5873
+ "count": 132
5874
  },
5875
  "US": {
5876
+ "accuracy": 0.7941176470588235,
5877
+ "count": 68
5878
  }
5879
  }
5880
  },
5881
  "add_C5": {
5882
+ "full_accuracy": 0.54,
5883
+ "digit_accuracy": 0.88,
5884
+ "n_examples": 50,
5885
  "per_subtask": {
5886
  "SA": {
5887
  "accuracy": 1.0,
5888
+ "count": 50
5889
  },
5890
  "SC": {
5891
+ "accuracy": 1.0,
5892
+ "count": 50
5893
  },
5894
  "UC": {
5895
+ "accuracy": 0.815068493150685,
5896
+ "count": 146
5897
  },
5898
  "US": {
5899
+ "accuracy": 0.8557692307692307,
5900
+ "count": 104
5901
  }
5902
  }
5903
  },
5904
  "add_C6": {
5905
+ "full_accuracy": 0.48,
5906
+ "digit_accuracy": 0.8771428571428571,
5907
+ "n_examples": 50,
5908
  "per_subtask": {
5909
  "SC": {
5910
  "accuracy": 1.0,
5911
+ "count": 50
5912
  },
5913
  "UC": {
5914
+ "accuracy": 0.8518518518518519,
5915
+ "count": 189
5916
  },
5917
  "US": {
5918
+ "accuracy": 0.8648648648648649,
5919
+ "count": 111
5920
  }
5921
  }
5922
  },
5923
  "sub_M0": {
5924
+ "full_accuracy": 0.88,
5925
+ "digit_accuracy": 0.98,
5926
+ "n_examples": 50,
5927
  "per_subtask": {
5928
  "MD": {
5929
+ "accuracy": 0.976897689768977,
5930
+ "count": 303
5931
  },
5932
  "ME": {
5933
  "accuracy": 1.0,
5934
+ "count": 47
5935
  }
5936
  }
5937
  },
5938
  "sub_M1": {
5939
+ "full_accuracy": 0.56,
5940
+ "digit_accuracy": 0.9257142857142857,
5941
+ "n_examples": 50,
5942
  "per_subtask": {
5943
  "MD": {
5944
+ "accuracy": 0.9645390070921985,
5945
+ "count": 141
5946
  },
5947
  "MB": {
5948
+ "accuracy": 0.9444444444444444,
5949
+ "count": 72
5950
  },
5951
  "ME": {
5952
  "accuracy": 1.0,
5953
+ "count": 18
5954
  },
5955
  "UB": {
5956
  "accuracy": 0.8571428571428571,
5957
+ "count": 119
5958
  }
5959
  }
5960
  },
5961
  "sub_M2": {
5962
+ "full_accuracy": 0.3,
5963
+ "digit_accuracy": 0.8542857142857143,
5964
+ "n_examples": 50,
5965
  "per_subtask": {
5966
  "MD": {
5967
+ "accuracy": 0.9910714285714286,
5968
+ "count": 112
5969
  },
5970
  "MB": {
5971
+ "accuracy": 0.8301886792452831,
5972
+ "count": 53
5973
  },
5974
  "ME": {
5975
+ "accuracy": 0.9787234042553191,
5976
+ "count": 47
5977
  },
5978
  "UB": {
5979
+ "accuracy": 0.6,
5980
+ "count": 85
5981
  },
5982
  "UD": {
5983
+ "accuracy": 0.8867924528301887,
5984
+ "count": 53
5985
  }
5986
  }
5987
  },
5988
  "sub_M3": {
5989
+ "full_accuracy": 0.24,
5990
+ "digit_accuracy": 0.7971428571428572,
5991
+ "n_examples": 50,
5992
  "per_subtask": {
5993
  "MD": {
5994
+ "accuracy": 0.9896907216494846,
5995
+ "count": 97
5996
  },
5997
  "MB": {
5998
+ "accuracy": 0.9607843137254902,
5999
+ "count": 51
6000
  },
6001
  "ME": {
6002
+ "accuracy": 0.9629629629629629,
6003
+ "count": 27
6004
  },
6005
  "UB": {
6006
+ "accuracy": 0.5405405405405406,
6007
+ "count": 74
6008
  },
6009
  "UD": {
6010
+ "accuracy": 0.6732673267326733,
6011
+ "count": 101
6012
  }
6013
  }
6014
  },
6015
  "sub_M4": {
6016
+ "full_accuracy": 0.1,
6017
+ "digit_accuracy": 0.6257142857142857,
6018
+ "n_examples": 50,
6019
  "per_subtask": {
6020
  "MD": {
6021
+ "accuracy": 0.98,
6022
+ "count": 100
6023
  },
6024
  "MB": {
6025
+ "accuracy": 0.96,
6026
+ "count": 50
6027
  },
6028
  "UB": {
6029
+ "accuracy": 0.36,
6030
+ "count": 50
6031
  },
6032
  "UD": {
6033
+ "accuracy": 0.36666666666666664,
6034
+ "count": 150
6035
  }
6036
  }
6037
  },
6038
  "sub_M5": {
6039
+ "full_accuracy": 0.06,
6040
+ "digit_accuracy": 0.48857142857142855,
6041
+ "n_examples": 50,
6042
  "per_subtask": {
6043
  "MD": {
6044
  "accuracy": 1.0,
6045
+ "count": 50
6046
  },
6047
  "MB": {
6048
+ "accuracy": 0.88,
6049
+ "count": 50
6050
  },
6051
  "UB": {
6052
+ "accuracy": 0.3,
6053
+ "count": 50
6054
  },
6055
  "UD": {
6056
+ "accuracy": 0.31,
6057
+ "count": 200
6058
  }
6059
  }
6060
  },
6061
  "sub_random": {
6062
+ "full_accuracy": 0.59,
6063
+ "digit_accuracy": 0.9321428571428572,
6064
  "n_examples": 200,
6065
  "per_subtask": {
6066
  "MD": {
6067
+ "accuracy": 0.9754385964912281,
6068
+ "count": 570
6069
  },
6070
  "MB": {
6071
+ "accuracy": 0.9638989169675091,
6072
+ "count": 277
6073
  },
6074
  "ME": {
6075
+ "accuracy": 0.9811320754716981,
6076
  "count": 53
6077
  },
6078
  "UB": {
6079
+ "accuracy": 0.8598726114649682,
6080
+ "count": 471
6081
  },
6082
  "UD": {
6083
+ "accuracy": 0.8620689655172413,
6084
+ "count": 29
6085
  }
6086
  }
6087
  },
6088
  "sub_B3": {
6089
+ "full_accuracy": 0.26,
6090
+ "digit_accuracy": 0.8142857142857143,
6091
+ "n_examples": 50,
6092
  "per_subtask": {
6093
  "MD": {
6094
+ "accuracy": 0.9666666666666667,
6095
+ "count": 150
6096
  },
6097
  "MB": {
6098
+ "accuracy": 0.98,
6099
+ "count": 50
6100
  },
6101
  "UB": {
6102
+ "accuracy": 0.6237623762376238,
6103
+ "count": 101
6104
  },
6105
  "UD": {
6106
+ "accuracy": 0.5714285714285714,
6107
+ "count": 49
6108
  }
6109
  }
6110
  },
6111
  "sub_B4": {
6112
+ "full_accuracy": 0.28,
6113
+ "digit_accuracy": 0.7857142857142857,
6114
+ "n_examples": 50,
6115
  "per_subtask": {
6116
  "MD": {
 
 
 
 
6117
  "accuracy": 0.99,
6118
  "count": 100
6119
  },
6120
+ "MB": {
6121
+ "accuracy": 0.98,
6122
+ "count": 50
6123
+ },
6124
  "UB": {
6125
+ "accuracy": 0.6694214876033058,
6126
+ "count": 121
6127
  },
6128
  "UD": {
6129
+ "accuracy": 0.5822784810126582,
6130
+ "count": 79
6131
  }
6132
  }
6133
  },
6134
  "sub_B5": {
6135
+ "full_accuracy": 0.14,
6136
+ "digit_accuracy": 0.7571428571428571,
6137
+ "n_examples": 50,
6138
  "per_subtask": {
6139
  "MD": {
6140
  "accuracy": 1.0,
6141
+ "count": 50
6142
  },
6143
  "MB": {
6144
+ "accuracy": 1.0,
6145
+ "count": 50
6146
  },
6147
  "UB": {
6148
+ "accuracy": 0.7171052631578947,
6149
+ "count": 152
6150
  },
6151
  "UD": {
6152
+ "accuracy": 0.5714285714285714,
6153
+ "count": 98
6154
  }
6155
  }
6156
  }
6157
  },
6158
  "summary": {
6159
+ "overall_accuracy": 0.4886666666666667,
6160
+ "digit_accuracy": 0.8573333333333333,
6161
+ "total_examples": 1500,
6162
+ "n_splits": 24
6163
  }
6164
  },
6165
  "sorl_overall_accuracy": 0.44375,