amirali1985 commited on
Commit
bfc88f8
·
verified ·
1 Parent(s): ef7c6f6

Upload add_sub_sorl_v1_abs30_K1_10K/metrics.json with huggingface_hub

Browse files
add_sub_sorl_v1_abs30_K1_10K/metrics.json CHANGED
@@ -550,502 +550,567 @@
550
  "K": null,
551
  "mode": "sft",
552
  "n_digits": 6,
553
- "n_per_split": 100
554
  },
555
  "splits": {
556
  "add_S0": {
557
- "full_accuracy": 0.97,
558
- "n_examples": 100,
 
559
  "per_subtask": {
560
  "SA": {
561
- "accuracy": 0.9950413223140496,
562
- "count": 605
563
  },
564
  "SS": {
565
  "accuracy": 1.0,
566
- "count": 95
567
  }
568
  }
569
  },
570
  "add_S1": {
571
  "full_accuracy": 1.0,
572
- "n_examples": 100,
 
573
  "per_subtask": {
574
  "SA": {
575
  "accuracy": 1.0,
576
- "count": 204
577
  },
578
  "SC": {
579
  "accuracy": 1.0,
580
- "count": 169
581
  },
582
  "SS": {
583
  "accuracy": 1.0,
584
- "count": 31
585
  },
586
  "UC": {
587
  "accuracy": 1.0,
588
- "count": 296
589
  }
590
  }
591
  },
592
  "add_S2": {
593
  "full_accuracy": 0.96,
594
- "n_examples": 100,
 
595
  "per_subtask": {
596
  "SA": {
597
  "accuracy": 1.0,
598
- "count": 163
599
  },
600
  "SC": {
601
- "accuracy": 0.9769230769230769,
602
- "count": 130
603
  },
604
  "SS": {
605
- "accuracy": 0.9770114942528736,
606
- "count": 87
607
  },
608
  "UC": {
609
  "accuracy": 1.0,
610
- "count": 203
611
  },
612
  "US": {
613
  "accuracy": 1.0,
614
- "count": 117
615
  }
616
  }
617
  },
618
  "add_S3": {
619
- "full_accuracy": 0.64,
620
- "n_examples": 100,
 
621
  "per_subtask": {
622
  "SA": {
623
  "accuracy": 1.0,
624
- "count": 121
625
  },
626
  "SC": {
627
- "accuracy": 0.9834710743801653,
628
- "count": 121
629
  },
630
  "SS": {
631
  "accuracy": 1.0,
632
- "count": 49
633
  },
634
  "UC": {
635
- "accuracy": 0.8172043010752689,
636
- "count": 186
637
  },
638
  "US": {
639
  "accuracy": 1.0,
640
- "count": 223
641
  }
642
  }
643
  },
644
  "add_S4": {
645
- "full_accuracy": 0.59,
646
- "n_examples": 100,
 
647
  "per_subtask": {
648
  "SA": {
649
  "accuracy": 1.0,
650
- "count": 104
651
  },
652
  "SC": {
653
  "accuracy": 1.0,
654
- "count": 106
655
  },
656
  "SS": {
657
  "accuracy": 1.0,
658
- "count": 23
659
  },
660
  "UC": {
661
- "accuracy": 0.8125,
662
- "count": 160
663
  },
664
  "US": {
665
- "accuracy": 0.9153094462540716,
666
- "count": 307
667
  }
668
  }
669
  },
670
  "add_S5": {
671
- "full_accuracy": 0.62,
672
- "n_examples": 100,
 
673
  "per_subtask": {
674
  "SA": {
675
  "accuracy": 1.0,
676
- "count": 100
677
  },
678
  "SC": {
679
  "accuracy": 1.0,
680
- "count": 100
681
  },
682
  "UC": {
683
- "accuracy": 0.71,
684
- "count": 100
685
  },
686
  "US": {
687
- "accuracy": 0.86,
688
- "count": 400
689
  }
690
  }
691
  },
692
  "add_S6": {
693
  "full_accuracy": 1.0,
694
- "n_examples": 100,
 
695
  "per_subtask": {
696
  "SC": {
697
  "accuracy": 1.0,
698
- "count": 100
699
  },
700
  "UC": {
701
  "accuracy": 1.0,
702
- "count": 100
703
  },
704
  "US": {
705
  "accuracy": 1.0,
706
- "count": 500
707
  }
708
  }
709
  },
710
  "add_random": {
711
  "full_accuracy": 0.99,
 
712
  "n_examples": 200,
713
  "per_subtask": {
714
  "SA": {
715
  "accuracy": 1.0,
716
- "count": 447
717
  },
718
  "SC": {
719
- "accuracy": 0.996875,
720
- "count": 320
721
  },
722
  "SS": {
723
  "accuracy": 1.0,
724
- "count": 56
725
  },
726
  "UC": {
727
- "accuracy": 0.998109640831758,
728
- "count": 529
729
  },
730
  "US": {
731
  "accuracy": 1.0,
732
- "count": 48
733
  }
734
  }
735
  },
736
- "add_C3": {
737
- "full_accuracy": 0.84,
738
- "n_examples": 100,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739
  "per_subtask": {
740
  "SA": {
741
  "accuracy": 1.0,
742
- "count": 300
743
  },
744
  "SC": {
745
  "accuracy": 1.0,
746
- "count": 100
747
  },
748
  "UC": {
749
- "accuracy": 0.917098445595855,
750
- "count": 193
751
  },
752
  "US": {
753
  "accuracy": 1.0,
754
- "count": 107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
755
  }
756
  }
757
  },
758
  "add_C4": {
759
- "full_accuracy": 0.78,
760
- "n_examples": 100,
 
761
  "per_subtask": {
762
  "SA": {
763
  "accuracy": 1.0,
764
- "count": 200
765
  },
766
  "SC": {
767
  "accuracy": 1.0,
768
- "count": 100
769
  },
770
  "UC": {
771
- "accuracy": 0.92578125,
772
- "count": 256
773
  },
774
  "US": {
775
- "accuracy": 0.9583333333333334,
776
- "count": 144
777
  }
778
  }
779
  },
780
  "add_C5": {
781
- "full_accuracy": 0.72,
782
- "n_examples": 100,
 
783
  "per_subtask": {
784
  "SA": {
785
  "accuracy": 1.0,
786
- "count": 100
787
  },
788
  "SC": {
789
  "accuracy": 1.0,
790
- "count": 100
791
  },
792
  "UC": {
793
- "accuracy": 0.9313725490196079,
794
- "count": 306
795
  },
796
  "US": {
797
- "accuracy": 0.9536082474226805,
798
- "count": 194
799
  }
800
  }
801
  },
802
  "add_C6": {
803
- "full_accuracy": 0.82,
804
- "n_examples": 100,
 
805
  "per_subtask": {
806
  "SC": {
807
  "accuracy": 1.0,
808
- "count": 100
809
  },
810
  "UC": {
811
- "accuracy": 0.953551912568306,
812
- "count": 366
813
  },
814
  "US": {
815
- "accuracy": 0.9957264957264957,
816
- "count": 234
817
  }
818
  }
819
  },
820
  "sub_M0": {
821
- "full_accuracy": 0.96,
822
- "n_examples": 100,
 
823
  "per_subtask": {
824
  "MD": {
825
- "accuracy": 0.9933444259567388,
826
- "count": 601
827
  },
828
  "ME": {
829
  "accuracy": 1.0,
830
- "count": 99
831
  }
832
  }
833
  },
834
  "sub_M1": {
835
  "full_accuracy": 1.0,
836
- "n_examples": 100,
 
837
  "per_subtask": {
838
  "MD": {
839
  "accuracy": 1.0,
840
- "count": 279
841
  },
842
  "MB": {
843
  "accuracy": 1.0,
844
- "count": 145
845
  },
846
  "ME": {
847
  "accuracy": 1.0,
848
- "count": 24
849
  },
850
  "UB": {
851
  "accuracy": 1.0,
852
- "count": 252
853
  }
854
  }
855
  },
856
  "sub_M2": {
857
  "full_accuracy": 1.0,
858
- "n_examples": 100,
 
859
  "per_subtask": {
860
  "MD": {
861
  "accuracy": 1.0,
862
- "count": 213
863
  },
864
  "MB": {
865
  "accuracy": 1.0,
866
- "count": 113
867
  },
868
  "ME": {
869
  "accuracy": 1.0,
870
- "count": 85
871
  },
872
  "UB": {
873
  "accuracy": 1.0,
874
- "count": 181
875
  },
876
  "UD": {
877
  "accuracy": 1.0,
878
- "count": 108
879
  }
880
  }
881
  },
882
  "sub_M3": {
883
- "full_accuracy": 0.35,
884
- "n_examples": 100,
 
885
  "per_subtask": {
886
  "MD": {
887
  "accuracy": 1.0,
888
- "count": 179
889
  },
890
  "MB": {
891
  "accuracy": 1.0,
892
- "count": 103
893
  },
894
  "ME": {
895
  "accuracy": 1.0,
896
- "count": 56
897
  },
898
  "UB": {
899
- "accuracy": 0.5637583892617449,
900
- "count": 149
901
  },
902
  "UD": {
903
  "accuracy": 1.0,
904
- "count": 213
905
  }
906
  }
907
  },
908
  "sub_M4": {
909
- "full_accuracy": 0.03,
910
- "n_examples": 100,
 
911
  "per_subtask": {
912
  "MD": {
913
  "accuracy": 1.0,
914
- "count": 200
915
  },
916
  "MB": {
917
  "accuracy": 1.0,
918
- "count": 100
919
  },
920
  "UB": {
921
- "accuracy": 0.49,
922
- "count": 100
923
  },
924
  "UD": {
925
- "accuracy": 0.6766666666666666,
926
- "count": 300
927
  }
928
  }
929
  },
930
  "sub_M5": {
931
  "full_accuracy": 0.0,
932
- "n_examples": 100,
 
933
  "per_subtask": {
934
  "MD": {
935
  "accuracy": 1.0,
936
- "count": 100
937
  },
938
  "MB": {
939
  "accuracy": 1.0,
940
- "count": 100
941
  },
942
  "UB": {
943
- "accuracy": 0.38,
944
- "count": 100
945
  },
946
  "UD": {
947
- "accuracy": 0.4975,
948
- "count": 400
949
  }
950
  }
951
  },
952
  "sub_random": {
953
- "full_accuracy": 0.995,
 
954
  "n_examples": 200,
955
  "per_subtask": {
956
  "MD": {
957
  "accuracy": 1.0,
958
- "count": 600
959
  },
960
  "MB": {
961
  "accuracy": 1.0,
962
- "count": 267
963
  },
964
  "ME": {
965
  "accuracy": 1.0,
966
  "count": 53
967
  },
968
  "UB": {
969
- "accuracy": 0.9977220956719818,
970
- "count": 439
971
  },
972
  "UD": {
973
  "accuracy": 1.0,
974
- "count": 41
975
  }
976
  }
977
  },
978
  "sub_B3": {
979
- "full_accuracy": 0.84,
980
- "n_examples": 100,
 
981
  "per_subtask": {
982
  "MD": {
983
  "accuracy": 1.0,
984
- "count": 300
985
  },
986
  "MB": {
987
  "accuracy": 1.0,
988
- "count": 100
989
  },
990
  "UB": {
991
- "accuracy": 0.9187817258883249,
992
- "count": 197
993
  },
994
  "UD": {
995
  "accuracy": 1.0,
996
- "count": 103
997
  }
998
  }
999
  },
1000
  "sub_B4": {
1001
- "full_accuracy": 0.76,
1002
- "n_examples": 100,
 
1003
  "per_subtask": {
1004
  "MD": {
1005
  "accuracy": 1.0,
1006
- "count": 200
1007
  },
1008
  "MB": {
1009
  "accuracy": 1.0,
1010
- "count": 100
1011
  },
1012
  "UB": {
1013
- "accuracy": 0.9311740890688259,
1014
- "count": 247
1015
  },
1016
  "UD": {
1017
- "accuracy": 0.9084967320261438,
1018
- "count": 153
1019
  }
1020
  }
1021
  },
1022
  "sub_B5": {
1023
- "full_accuracy": 0.59,
1024
- "n_examples": 100,
 
1025
  "per_subtask": {
1026
  "MD": {
1027
  "accuracy": 1.0,
1028
- "count": 100
1029
  },
1030
  "MB": {
1031
  "accuracy": 1.0,
1032
- "count": 100
1033
  },
1034
  "UB": {
1035
- "accuracy": 0.8859060402684564,
1036
- "count": 298
1037
  },
1038
  "UD": {
1039
- "accuracy": 0.8861386138613861,
1040
- "count": 202
1041
  }
1042
  }
1043
  }
1044
  },
1045
  "summary": {
1046
- "overall_accuracy": 0.7683333333333333,
1047
- "total_examples": 2400,
1048
- "n_splits": 22
 
1049
  }
1050
  },
1051
  "sorl_eval": {
@@ -1054,416 +1119,477 @@
1054
  "K": 1,
1055
  "mode": "sorl",
1056
  "n_digits": 6,
1057
- "n_per_split": 100
1058
  },
1059
  "splits": {
1060
  "add_S0": {
1061
  "full_accuracy": 1.0,
1062
- "n_examples": 100,
 
1063
  "per_subtask": {
1064
  "SA": {
1065
  "accuracy": 1.0,
1066
- "count": 605
1067
  },
1068
  "SS": {
1069
  "accuracy": 1.0,
1070
- "count": 95
1071
  }
1072
  }
1073
  },
1074
  "add_S1": {
1075
  "full_accuracy": 1.0,
1076
- "n_examples": 100,
 
1077
  "per_subtask": {
1078
  "SA": {
1079
  "accuracy": 1.0,
1080
- "count": 204
1081
  },
1082
  "SC": {
1083
  "accuracy": 1.0,
1084
- "count": 169
1085
  },
1086
  "SS": {
1087
  "accuracy": 1.0,
1088
- "count": 31
1089
  },
1090
  "UC": {
1091
  "accuracy": 1.0,
1092
- "count": 296
1093
  }
1094
  }
1095
  },
1096
  "add_S2": {
1097
  "full_accuracy": 1.0,
1098
- "n_examples": 100,
 
1099
  "per_subtask": {
1100
  "SA": {
1101
  "accuracy": 1.0,
1102
- "count": 163
1103
  },
1104
  "SC": {
1105
  "accuracy": 1.0,
1106
- "count": 130
1107
  },
1108
  "SS": {
1109
  "accuracy": 1.0,
1110
- "count": 87
1111
  },
1112
  "UC": {
1113
  "accuracy": 1.0,
1114
- "count": 203
1115
  },
1116
  "US": {
1117
  "accuracy": 1.0,
1118
- "count": 117
1119
  }
1120
  }
1121
  },
1122
  "add_S3": {
1123
  "full_accuracy": 1.0,
1124
- "n_examples": 100,
 
1125
  "per_subtask": {
1126
  "SA": {
1127
  "accuracy": 1.0,
1128
- "count": 121
1129
  },
1130
  "SC": {
1131
  "accuracy": 1.0,
1132
- "count": 121
1133
  },
1134
  "SS": {
1135
  "accuracy": 1.0,
1136
- "count": 49
1137
  },
1138
  "UC": {
1139
  "accuracy": 1.0,
1140
- "count": 186
1141
  },
1142
  "US": {
1143
  "accuracy": 1.0,
1144
- "count": 223
1145
  }
1146
  }
1147
  },
1148
  "add_S4": {
1149
  "full_accuracy": 1.0,
1150
- "n_examples": 100,
 
1151
  "per_subtask": {
1152
  "SA": {
1153
  "accuracy": 1.0,
1154
- "count": 104
1155
  },
1156
  "SC": {
1157
  "accuracy": 1.0,
1158
- "count": 106
1159
  },
1160
  "SS": {
1161
  "accuracy": 1.0,
1162
- "count": 23
1163
  },
1164
  "UC": {
1165
  "accuracy": 1.0,
1166
- "count": 160
1167
  },
1168
  "US": {
1169
  "accuracy": 1.0,
1170
- "count": 307
1171
  }
1172
  }
1173
  },
1174
  "add_S5": {
1175
- "full_accuracy": 0.99,
1176
- "n_examples": 100,
 
1177
  "per_subtask": {
1178
  "SA": {
1179
  "accuracy": 1.0,
1180
- "count": 100
1181
  },
1182
  "SC": {
1183
  "accuracy": 1.0,
1184
- "count": 100
1185
  },
1186
  "UC": {
1187
- "accuracy": 0.99,
1188
- "count": 100
1189
  },
1190
  "US": {
1191
  "accuracy": 1.0,
1192
- "count": 400
1193
  }
1194
  }
1195
  },
1196
  "add_S6": {
1197
  "full_accuracy": 1.0,
1198
- "n_examples": 100,
 
1199
  "per_subtask": {
1200
  "SC": {
1201
  "accuracy": 1.0,
1202
- "count": 100
1203
  },
1204
  "UC": {
1205
  "accuracy": 1.0,
1206
- "count": 100
1207
  },
1208
  "US": {
1209
  "accuracy": 1.0,
1210
- "count": 500
1211
  }
1212
  }
1213
  },
1214
  "add_random": {
1215
  "full_accuracy": 1.0,
 
1216
  "n_examples": 200,
1217
  "per_subtask": {
1218
  "SA": {
1219
  "accuracy": 1.0,
1220
- "count": 447
1221
  },
1222
  "SC": {
1223
  "accuracy": 1.0,
1224
- "count": 320
1225
  },
1226
  "SS": {
1227
  "accuracy": 1.0,
1228
- "count": 56
1229
  },
1230
  "UC": {
1231
  "accuracy": 1.0,
1232
- "count": 529
1233
  },
1234
  "US": {
1235
  "accuracy": 1.0,
1236
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1237
  }
1238
  }
1239
  },
1240
  "add_C3": {
1241
  "full_accuracy": 1.0,
1242
- "n_examples": 100,
 
1243
  "per_subtask": {
1244
  "SA": {
1245
  "accuracy": 1.0,
1246
- "count": 300
1247
  },
1248
  "SC": {
1249
  "accuracy": 1.0,
1250
- "count": 100
1251
  },
1252
  "UC": {
1253
  "accuracy": 1.0,
1254
- "count": 193
1255
  },
1256
  "US": {
1257
  "accuracy": 1.0,
1258
- "count": 107
1259
  }
1260
  }
1261
  },
1262
  "add_C4": {
1263
  "full_accuracy": 1.0,
1264
- "n_examples": 100,
 
1265
  "per_subtask": {
1266
  "SA": {
1267
  "accuracy": 1.0,
1268
- "count": 200
1269
  },
1270
  "SC": {
1271
  "accuracy": 1.0,
1272
- "count": 100
1273
  },
1274
  "UC": {
1275
  "accuracy": 1.0,
1276
- "count": 256
1277
  },
1278
  "US": {
1279
  "accuracy": 1.0,
1280
- "count": 144
1281
  }
1282
  }
1283
  },
1284
  "add_C5": {
1285
  "full_accuracy": 1.0,
1286
- "n_examples": 100,
 
1287
  "per_subtask": {
1288
  "SA": {
1289
  "accuracy": 1.0,
1290
- "count": 100
1291
  },
1292
  "SC": {
1293
  "accuracy": 1.0,
1294
- "count": 100
1295
  },
1296
  "UC": {
1297
  "accuracy": 1.0,
1298
- "count": 306
1299
  },
1300
  "US": {
1301
  "accuracy": 1.0,
1302
- "count": 194
1303
  }
1304
  }
1305
  },
1306
  "add_C6": {
1307
  "full_accuracy": 1.0,
1308
- "n_examples": 100,
 
1309
  "per_subtask": {
1310
  "SC": {
1311
  "accuracy": 1.0,
1312
- "count": 100
1313
  },
1314
  "UC": {
1315
  "accuracy": 1.0,
1316
- "count": 366
1317
  },
1318
  "US": {
1319
  "accuracy": 1.0,
1320
- "count": 234
1321
  }
1322
  }
1323
  },
1324
  "sub_M0": {
1325
  "full_accuracy": 1.0,
1326
- "n_examples": 100,
 
1327
  "per_subtask": {
1328
  "MD": {
1329
  "accuracy": 1.0,
1330
- "count": 601
1331
  },
1332
  "ME": {
1333
  "accuracy": 1.0,
1334
- "count": 99
1335
  }
1336
  }
1337
  },
1338
  "sub_M1": {
1339
  "full_accuracy": 1.0,
1340
- "n_examples": 100,
 
1341
  "per_subtask": {
1342
  "MD": {
1343
  "accuracy": 1.0,
1344
- "count": 279
1345
  },
1346
  "MB": {
1347
  "accuracy": 1.0,
1348
- "count": 145
1349
  },
1350
  "ME": {
1351
  "accuracy": 1.0,
1352
- "count": 24
1353
  },
1354
  "UB": {
1355
  "accuracy": 1.0,
1356
- "count": 252
1357
  }
1358
  }
1359
  },
1360
  "sub_M2": {
1361
  "full_accuracy": 1.0,
1362
- "n_examples": 100,
 
1363
  "per_subtask": {
1364
  "MD": {
1365
  "accuracy": 1.0,
1366
- "count": 213
1367
  },
1368
  "MB": {
1369
  "accuracy": 1.0,
1370
- "count": 113
1371
  },
1372
  "ME": {
1373
  "accuracy": 1.0,
1374
- "count": 85
1375
  },
1376
  "UB": {
1377
  "accuracy": 1.0,
1378
- "count": 181
1379
  },
1380
  "UD": {
1381
  "accuracy": 1.0,
1382
- "count": 108
1383
  }
1384
  }
1385
  },
1386
  "sub_M3": {
1387
  "full_accuracy": 1.0,
1388
- "n_examples": 100,
 
1389
  "per_subtask": {
1390
  "MD": {
1391
  "accuracy": 1.0,
1392
- "count": 179
1393
  },
1394
  "MB": {
1395
  "accuracy": 1.0,
1396
- "count": 103
1397
  },
1398
  "ME": {
1399
  "accuracy": 1.0,
1400
- "count": 56
1401
  },
1402
  "UB": {
1403
  "accuracy": 1.0,
1404
- "count": 149
1405
  },
1406
  "UD": {
1407
  "accuracy": 1.0,
1408
- "count": 213
1409
  }
1410
  }
1411
  },
1412
  "sub_M4": {
1413
  "full_accuracy": 1.0,
1414
- "n_examples": 100,
 
1415
  "per_subtask": {
1416
  "MD": {
1417
  "accuracy": 1.0,
1418
- "count": 200
1419
  },
1420
  "MB": {
1421
  "accuracy": 1.0,
1422
- "count": 100
1423
  },
1424
  "UB": {
1425
  "accuracy": 1.0,
1426
- "count": 100
1427
  },
1428
  "UD": {
1429
  "accuracy": 1.0,
1430
- "count": 300
1431
  }
1432
  }
1433
  },
1434
  "sub_M5": {
1435
- "full_accuracy": 0.14,
1436
- "n_examples": 100,
 
1437
  "per_subtask": {
1438
  "MD": {
1439
  "accuracy": 1.0,
1440
- "count": 100
1441
  },
1442
  "MB": {
1443
  "accuracy": 1.0,
1444
- "count": 100
1445
  },
1446
  "UB": {
1447
- "accuracy": 0.14,
1448
- "count": 100
1449
  },
1450
  "UD": {
1451
  "accuracy": 1.0,
1452
- "count": 400
1453
  }
1454
  }
1455
  },
1456
  "sub_random": {
1457
  "full_accuracy": 1.0,
 
1458
  "n_examples": 200,
1459
  "per_subtask": {
1460
  "MD": {
1461
  "accuracy": 1.0,
1462
- "count": 600
1463
  },
1464
  "MB": {
1465
  "accuracy": 1.0,
1466
- "count": 267
1467
  },
1468
  "ME": {
1469
  "accuracy": 1.0,
@@ -1471,85 +1597,89 @@
1471
  },
1472
  "UB": {
1473
  "accuracy": 1.0,
1474
- "count": 439
1475
  },
1476
  "UD": {
1477
  "accuracy": 1.0,
1478
- "count": 41
1479
  }
1480
  }
1481
  },
1482
  "sub_B3": {
1483
  "full_accuracy": 1.0,
1484
- "n_examples": 100,
 
1485
  "per_subtask": {
1486
  "MD": {
1487
  "accuracy": 1.0,
1488
- "count": 300
1489
  },
1490
  "MB": {
1491
  "accuracy": 1.0,
1492
- "count": 100
1493
  },
1494
  "UB": {
1495
  "accuracy": 1.0,
1496
- "count": 197
1497
  },
1498
  "UD": {
1499
  "accuracy": 1.0,
1500
- "count": 103
1501
  }
1502
  }
1503
  },
1504
  "sub_B4": {
1505
  "full_accuracy": 1.0,
1506
- "n_examples": 100,
 
1507
  "per_subtask": {
1508
  "MD": {
1509
  "accuracy": 1.0,
1510
- "count": 200
1511
  },
1512
  "MB": {
1513
  "accuracy": 1.0,
1514
- "count": 100
1515
  },
1516
  "UB": {
1517
  "accuracy": 1.0,
1518
- "count": 247
1519
  },
1520
  "UD": {
1521
  "accuracy": 1.0,
1522
- "count": 153
1523
  }
1524
  }
1525
  },
1526
  "sub_B5": {
1527
- "full_accuracy": 0.94,
1528
- "n_examples": 100,
 
1529
  "per_subtask": {
1530
  "MD": {
1531
  "accuracy": 1.0,
1532
- "count": 100
1533
  },
1534
  "MB": {
1535
  "accuracy": 1.0,
1536
- "count": 100
1537
  },
1538
  "UB": {
1539
- "accuracy": 0.9798657718120806,
1540
- "count": 298
1541
  },
1542
  "UD": {
1543
  "accuracy": 1.0,
1544
- "count": 202
1545
  }
1546
  }
1547
  }
1548
  },
1549
  "summary": {
1550
- "overall_accuracy": 0.96125,
1551
- "total_examples": 2400,
1552
- "n_splits": 22
 
1553
  }
1554
  },
1555
  "sorl_overall_accuracy": 0.96125,
 
550
  "K": null,
551
  "mode": "sft",
552
  "n_digits": 6,
553
+ "n_per_split": 50
554
  },
555
  "splits": {
556
  "add_S0": {
557
+ "full_accuracy": 0.94,
558
+ "digit_accuracy": 0.9914285714285714,
559
+ "n_examples": 50,
560
  "per_subtask": {
561
  "SA": {
562
+ "accuracy": 0.9898305084745763,
563
+ "count": 295
564
  },
565
  "SS": {
566
  "accuracy": 1.0,
567
+ "count": 55
568
  }
569
  }
570
  },
571
  "add_S1": {
572
  "full_accuracy": 1.0,
573
+ "digit_accuracy": 1.0,
574
+ "n_examples": 50,
575
  "per_subtask": {
576
  "SA": {
577
  "accuracy": 1.0,
578
+ "count": 126
579
  },
580
  "SC": {
581
  "accuracy": 1.0,
582
+ "count": 79
583
  },
584
  "SS": {
585
  "accuracy": 1.0,
586
+ "count": 21
587
  },
588
  "UC": {
589
  "accuracy": 1.0,
590
+ "count": 124
591
  }
592
  }
593
  },
594
  "add_S2": {
595
  "full_accuracy": 0.96,
596
+ "digit_accuracy": 0.9942857142857143,
597
+ "n_examples": 50,
598
  "per_subtask": {
599
  "SA": {
600
  "accuracy": 1.0,
601
+ "count": 75
602
  },
603
  "SC": {
604
+ "accuracy": 0.967741935483871,
605
+ "count": 62
606
  },
607
  "SS": {
608
+ "accuracy": 1.0,
609
+ "count": 39
610
  },
611
  "UC": {
612
  "accuracy": 1.0,
613
+ "count": 111
614
  },
615
  "US": {
616
  "accuracy": 1.0,
617
+ "count": 63
618
  }
619
  }
620
  },
621
  "add_S3": {
622
+ "full_accuracy": 0.68,
623
+ "digit_accuracy": 0.9542857142857143,
624
+ "n_examples": 50,
625
  "per_subtask": {
626
  "SA": {
627
  "accuracy": 1.0,
628
+ "count": 60
629
  },
630
  "SC": {
631
+ "accuracy": 0.9824561403508771,
632
+ "count": 57
633
  },
634
  "SS": {
635
  "accuracy": 1.0,
636
+ "count": 19
637
  },
638
  "UC": {
639
+ "accuracy": 0.8557692307692307,
640
+ "count": 104
641
  },
642
  "US": {
643
  "accuracy": 1.0,
644
+ "count": 110
645
  }
646
  }
647
  },
648
  "add_S4": {
649
+ "full_accuracy": 0.66,
650
+ "digit_accuracy": 0.94,
651
+ "n_examples": 50,
652
  "per_subtask": {
653
  "SA": {
654
  "accuracy": 1.0,
655
+ "count": 48
656
  },
657
  "SC": {
658
  "accuracy": 1.0,
659
+ "count": 52
660
  },
661
  "SS": {
662
  "accuracy": 1.0,
663
+ "count": 7
664
  },
665
  "UC": {
666
+ "accuracy": 0.898876404494382,
667
+ "count": 89
668
  },
669
  "US": {
670
+ "accuracy": 0.922077922077922,
671
+ "count": 154
672
  }
673
  }
674
  },
675
  "add_S5": {
676
+ "full_accuracy": 0.68,
677
+ "digit_accuracy": 0.8714285714285714,
678
+ "n_examples": 50,
679
  "per_subtask": {
680
  "SA": {
681
  "accuracy": 1.0,
682
+ "count": 50
683
  },
684
  "SC": {
685
  "accuracy": 1.0,
686
+ "count": 50
687
  },
688
  "UC": {
689
+ "accuracy": 0.74,
690
+ "count": 50
691
  },
692
  "US": {
693
+ "accuracy": 0.84,
694
+ "count": 200
695
  }
696
  }
697
  },
698
  "add_S6": {
699
  "full_accuracy": 1.0,
700
+ "digit_accuracy": 1.0,
701
+ "n_examples": 50,
702
  "per_subtask": {
703
  "SC": {
704
  "accuracy": 1.0,
705
+ "count": 50
706
  },
707
  "UC": {
708
  "accuracy": 1.0,
709
+ "count": 50
710
  },
711
  "US": {
712
  "accuracy": 1.0,
713
+ "count": 250
714
  }
715
  }
716
  },
717
  "add_random": {
718
  "full_accuracy": 0.99,
719
+ "digit_accuracy": 0.9985714285714286,
720
  "n_examples": 200,
721
  "per_subtask": {
722
  "SA": {
723
  "accuracy": 1.0,
724
+ "count": 431
725
  },
726
  "SC": {
727
+ "accuracy": 1.0,
728
+ "count": 316
729
  },
730
  "SS": {
731
  "accuracy": 1.0,
732
+ "count": 39
733
  },
734
  "UC": {
735
+ "accuracy": 0.9964285714285714,
736
+ "count": 560
737
  },
738
  "US": {
739
  "accuracy": 1.0,
740
+ "count": 54
741
  }
742
  }
743
  },
744
+ "add_C1": {
745
+ "full_accuracy": 0.98,
746
+ "digit_accuracy": 0.9971428571428571,
747
+ "n_examples": 50,
748
+ "per_subtask": {
749
+ "SA": {
750
+ "accuracy": 0.996,
751
+ "count": 250
752
+ },
753
+ "SC": {
754
+ "accuracy": 1.0,
755
+ "count": 50
756
+ },
757
+ "UC": {
758
+ "accuracy": 1.0,
759
+ "count": 50
760
+ }
761
+ }
762
+ },
763
+ "add_C2": {
764
+ "full_accuracy": 1.0,
765
+ "digit_accuracy": 1.0,
766
+ "n_examples": 50,
767
  "per_subtask": {
768
  "SA": {
769
  "accuracy": 1.0,
770
+ "count": 200
771
  },
772
  "SC": {
773
  "accuracy": 1.0,
774
+ "count": 50
775
  },
776
  "UC": {
777
+ "accuracy": 1.0,
778
+ "count": 83
779
  },
780
  "US": {
781
  "accuracy": 1.0,
782
+ "count": 17
783
+ }
784
+ }
785
+ },
786
+ "add_C3": {
787
+ "full_accuracy": 0.8,
788
+ "digit_accuracy": 0.9714285714285714,
789
+ "n_examples": 50,
790
+ "per_subtask": {
791
+ "SA": {
792
+ "accuracy": 0.9933333333333333,
793
+ "count": 150
794
+ },
795
+ "SC": {
796
+ "accuracy": 1.0,
797
+ "count": 50
798
+ },
799
+ "UC": {
800
+ "accuracy": 0.91,
801
+ "count": 100
802
+ },
803
+ "US": {
804
+ "accuracy": 1.0,
805
+ "count": 50
806
  }
807
  }
808
  },
809
  "add_C4": {
810
+ "full_accuracy": 0.86,
811
+ "digit_accuracy": 0.9771428571428571,
812
+ "n_examples": 50,
813
  "per_subtask": {
814
  "SA": {
815
  "accuracy": 1.0,
816
+ "count": 100
817
  },
818
  "SC": {
819
  "accuracy": 1.0,
820
+ "count": 50
821
  },
822
  "UC": {
823
+ "accuracy": 0.9545454545454546,
824
+ "count": 132
825
  },
826
  "US": {
827
+ "accuracy": 0.9705882352941176,
828
+ "count": 68
829
  }
830
  }
831
  },
832
  "add_C5": {
833
+ "full_accuracy": 0.76,
834
+ "digit_accuracy": 0.9571428571428572,
835
+ "n_examples": 50,
836
  "per_subtask": {
837
  "SA": {
838
  "accuracy": 1.0,
839
+ "count": 50
840
  },
841
  "SC": {
842
  "accuracy": 1.0,
843
+ "count": 50
844
  },
845
  "UC": {
846
+ "accuracy": 0.9383561643835616,
847
+ "count": 146
848
  },
849
  "US": {
850
+ "accuracy": 0.9423076923076923,
851
+ "count": 104
852
  }
853
  }
854
  },
855
  "add_C6": {
856
+ "full_accuracy": 0.92,
857
+ "digit_accuracy": 0.9885714285714285,
858
+ "n_examples": 50,
859
  "per_subtask": {
860
  "SC": {
861
  "accuracy": 1.0,
862
+ "count": 50
863
  },
864
  "UC": {
865
+ "accuracy": 0.9788359788359788,
866
+ "count": 189
867
  },
868
  "US": {
869
+ "accuracy": 1.0,
870
+ "count": 111
871
  }
872
  }
873
  },
874
  "sub_M0": {
875
+ "full_accuracy": 1.0,
876
+ "digit_accuracy": 1.0,
877
+ "n_examples": 50,
878
  "per_subtask": {
879
  "MD": {
880
+ "accuracy": 1.0,
881
+ "count": 303
882
  },
883
  "ME": {
884
  "accuracy": 1.0,
885
+ "count": 47
886
  }
887
  }
888
  },
889
  "sub_M1": {
890
  "full_accuracy": 1.0,
891
+ "digit_accuracy": 1.0,
892
+ "n_examples": 50,
893
  "per_subtask": {
894
  "MD": {
895
  "accuracy": 1.0,
896
+ "count": 141
897
  },
898
  "MB": {
899
  "accuracy": 1.0,
900
+ "count": 72
901
  },
902
  "ME": {
903
  "accuracy": 1.0,
904
+ "count": 18
905
  },
906
  "UB": {
907
  "accuracy": 1.0,
908
+ "count": 119
909
  }
910
  }
911
  },
912
  "sub_M2": {
913
  "full_accuracy": 1.0,
914
+ "digit_accuracy": 1.0,
915
+ "n_examples": 50,
916
  "per_subtask": {
917
  "MD": {
918
  "accuracy": 1.0,
919
+ "count": 112
920
  },
921
  "MB": {
922
  "accuracy": 1.0,
923
+ "count": 53
924
  },
925
  "ME": {
926
  "accuracy": 1.0,
927
+ "count": 47
928
  },
929
  "UB": {
930
  "accuracy": 1.0,
931
+ "count": 85
932
  },
933
  "UD": {
934
  "accuracy": 1.0,
935
+ "count": 53
936
  }
937
  }
938
  },
939
  "sub_M3": {
940
+ "full_accuracy": 0.32,
941
+ "digit_accuracy": 0.9028571428571428,
942
+ "n_examples": 50,
943
  "per_subtask": {
944
  "MD": {
945
  "accuracy": 1.0,
946
+ "count": 97
947
  },
948
  "MB": {
949
  "accuracy": 1.0,
950
+ "count": 51
951
  },
952
  "ME": {
953
  "accuracy": 1.0,
954
+ "count": 27
955
  },
956
  "UB": {
957
+ "accuracy": 0.5405405405405406,
958
+ "count": 74
959
  },
960
  "UD": {
961
  "accuracy": 1.0,
962
+ "count": 101
963
  }
964
  }
965
  },
966
  "sub_M4": {
967
+ "full_accuracy": 0.0,
968
+ "digit_accuracy": 0.8028571428571428,
969
+ "n_examples": 50,
970
  "per_subtask": {
971
  "MD": {
972
  "accuracy": 1.0,
973
+ "count": 100
974
  },
975
  "MB": {
976
  "accuracy": 1.0,
977
+ "count": 50
978
  },
979
  "UB": {
980
+ "accuracy": 0.6,
981
+ "count": 50
982
  },
983
  "UD": {
984
+ "accuracy": 0.6733333333333333,
985
+ "count": 150
986
  }
987
  }
988
  },
989
  "sub_M5": {
990
  "full_accuracy": 0.0,
991
+ "digit_accuracy": 0.6428571428571429,
992
+ "n_examples": 50,
993
  "per_subtask": {
994
  "MD": {
995
  "accuracy": 1.0,
996
+ "count": 50
997
  },
998
  "MB": {
999
  "accuracy": 1.0,
1000
+ "count": 50
1001
  },
1002
  "UB": {
1003
+ "accuracy": 0.46,
1004
+ "count": 50
1005
  },
1006
  "UD": {
1007
+ "accuracy": 0.51,
1008
+ "count": 200
1009
  }
1010
  }
1011
  },
1012
  "sub_random": {
1013
+ "full_accuracy": 1.0,
1014
+ "digit_accuracy": 1.0,
1015
  "n_examples": 200,
1016
  "per_subtask": {
1017
  "MD": {
1018
  "accuracy": 1.0,
1019
+ "count": 570
1020
  },
1021
  "MB": {
1022
  "accuracy": 1.0,
1023
+ "count": 277
1024
  },
1025
  "ME": {
1026
  "accuracy": 1.0,
1027
  "count": 53
1028
  },
1029
  "UB": {
1030
+ "accuracy": 1.0,
1031
+ "count": 471
1032
  },
1033
  "UD": {
1034
  "accuracy": 1.0,
1035
+ "count": 29
1036
  }
1037
  }
1038
  },
1039
  "sub_B3": {
1040
+ "full_accuracy": 0.9,
1041
+ "digit_accuracy": 0.9857142857142858,
1042
+ "n_examples": 50,
1043
  "per_subtask": {
1044
  "MD": {
1045
  "accuracy": 1.0,
1046
+ "count": 150
1047
  },
1048
  "MB": {
1049
  "accuracy": 1.0,
1050
+ "count": 50
1051
  },
1052
  "UB": {
1053
+ "accuracy": 0.9504950495049505,
1054
+ "count": 101
1055
  },
1056
  "UD": {
1057
  "accuracy": 1.0,
1058
+ "count": 49
1059
  }
1060
  }
1061
  },
1062
  "sub_B4": {
1063
+ "full_accuracy": 0.68,
1064
+ "digit_accuracy": 0.9457142857142857,
1065
+ "n_examples": 50,
1066
  "per_subtask": {
1067
  "MD": {
1068
  "accuracy": 1.0,
1069
+ "count": 100
1070
  },
1071
  "MB": {
1072
  "accuracy": 1.0,
1073
+ "count": 50
1074
  },
1075
  "UB": {
1076
+ "accuracy": 0.9008264462809917,
1077
+ "count": 121
1078
  },
1079
  "UD": {
1080
+ "accuracy": 0.9113924050632911,
1081
+ "count": 79
1082
  }
1083
  }
1084
  },
1085
  "sub_B5": {
1086
+ "full_accuracy": 0.74,
1087
+ "digit_accuracy": 0.9514285714285714,
1088
+ "n_examples": 50,
1089
  "per_subtask": {
1090
  "MD": {
1091
  "accuracy": 1.0,
1092
+ "count": 50
1093
  },
1094
  "MB": {
1095
  "accuracy": 1.0,
1096
+ "count": 50
1097
  },
1098
  "UB": {
1099
+ "accuracy": 0.9407894736842105,
1100
+ "count": 152
1101
  },
1102
  "UD": {
1103
+ "accuracy": 0.9183673469387755,
1104
+ "count": 98
1105
  }
1106
  }
1107
  }
1108
  },
1109
  "summary": {
1110
+ "overall_accuracy": 0.828,
1111
+ "digit_accuracy": 0.9622857142857143,
1112
+ "total_examples": 1500,
1113
+ "n_splits": 24
1114
  }
1115
  },
1116
  "sorl_eval": {
 
1119
  "K": 1,
1120
  "mode": "sorl",
1121
  "n_digits": 6,
1122
+ "n_per_split": 50
1123
  },
1124
  "splits": {
1125
  "add_S0": {
1126
  "full_accuracy": 1.0,
1127
+ "digit_accuracy": 1.0,
1128
+ "n_examples": 50,
1129
  "per_subtask": {
1130
  "SA": {
1131
  "accuracy": 1.0,
1132
+ "count": 295
1133
  },
1134
  "SS": {
1135
  "accuracy": 1.0,
1136
+ "count": 55
1137
  }
1138
  }
1139
  },
1140
  "add_S1": {
1141
  "full_accuracy": 1.0,
1142
+ "digit_accuracy": 1.0,
1143
+ "n_examples": 50,
1144
  "per_subtask": {
1145
  "SA": {
1146
  "accuracy": 1.0,
1147
+ "count": 126
1148
  },
1149
  "SC": {
1150
  "accuracy": 1.0,
1151
+ "count": 79
1152
  },
1153
  "SS": {
1154
  "accuracy": 1.0,
1155
+ "count": 21
1156
  },
1157
  "UC": {
1158
  "accuracy": 1.0,
1159
+ "count": 124
1160
  }
1161
  }
1162
  },
1163
  "add_S2": {
1164
  "full_accuracy": 1.0,
1165
+ "digit_accuracy": 1.0,
1166
+ "n_examples": 50,
1167
  "per_subtask": {
1168
  "SA": {
1169
  "accuracy": 1.0,
1170
+ "count": 75
1171
  },
1172
  "SC": {
1173
  "accuracy": 1.0,
1174
+ "count": 62
1175
  },
1176
  "SS": {
1177
  "accuracy": 1.0,
1178
+ "count": 39
1179
  },
1180
  "UC": {
1181
  "accuracy": 1.0,
1182
+ "count": 111
1183
  },
1184
  "US": {
1185
  "accuracy": 1.0,
1186
+ "count": 63
1187
  }
1188
  }
1189
  },
1190
  "add_S3": {
1191
  "full_accuracy": 1.0,
1192
+ "digit_accuracy": 1.0,
1193
+ "n_examples": 50,
1194
  "per_subtask": {
1195
  "SA": {
1196
  "accuracy": 1.0,
1197
+ "count": 60
1198
  },
1199
  "SC": {
1200
  "accuracy": 1.0,
1201
+ "count": 57
1202
  },
1203
  "SS": {
1204
  "accuracy": 1.0,
1205
+ "count": 19
1206
  },
1207
  "UC": {
1208
  "accuracy": 1.0,
1209
+ "count": 104
1210
  },
1211
  "US": {
1212
  "accuracy": 1.0,
1213
+ "count": 110
1214
  }
1215
  }
1216
  },
1217
  "add_S4": {
1218
  "full_accuracy": 1.0,
1219
+ "digit_accuracy": 1.0,
1220
+ "n_examples": 50,
1221
  "per_subtask": {
1222
  "SA": {
1223
  "accuracy": 1.0,
1224
+ "count": 48
1225
  },
1226
  "SC": {
1227
  "accuracy": 1.0,
1228
+ "count": 52
1229
  },
1230
  "SS": {
1231
  "accuracy": 1.0,
1232
+ "count": 7
1233
  },
1234
  "UC": {
1235
  "accuracy": 1.0,
1236
+ "count": 89
1237
  },
1238
  "US": {
1239
  "accuracy": 1.0,
1240
+ "count": 154
1241
  }
1242
  }
1243
  },
1244
  "add_S5": {
1245
+ "full_accuracy": 0.98,
1246
+ "digit_accuracy": 0.9971428571428571,
1247
+ "n_examples": 50,
1248
  "per_subtask": {
1249
  "SA": {
1250
  "accuracy": 1.0,
1251
+ "count": 50
1252
  },
1253
  "SC": {
1254
  "accuracy": 1.0,
1255
+ "count": 50
1256
  },
1257
  "UC": {
1258
+ "accuracy": 0.98,
1259
+ "count": 50
1260
  },
1261
  "US": {
1262
  "accuracy": 1.0,
1263
+ "count": 200
1264
  }
1265
  }
1266
  },
1267
  "add_S6": {
1268
  "full_accuracy": 1.0,
1269
+ "digit_accuracy": 1.0,
1270
+ "n_examples": 50,
1271
  "per_subtask": {
1272
  "SC": {
1273
  "accuracy": 1.0,
1274
+ "count": 50
1275
  },
1276
  "UC": {
1277
  "accuracy": 1.0,
1278
+ "count": 50
1279
  },
1280
  "US": {
1281
  "accuracy": 1.0,
1282
+ "count": 250
1283
  }
1284
  }
1285
  },
1286
  "add_random": {
1287
  "full_accuracy": 1.0,
1288
+ "digit_accuracy": 1.0,
1289
  "n_examples": 200,
1290
  "per_subtask": {
1291
  "SA": {
1292
  "accuracy": 1.0,
1293
+ "count": 431
1294
  },
1295
  "SC": {
1296
  "accuracy": 1.0,
1297
+ "count": 316
1298
  },
1299
  "SS": {
1300
  "accuracy": 1.0,
1301
+ "count": 39
1302
  },
1303
  "UC": {
1304
  "accuracy": 1.0,
1305
+ "count": 560
1306
  },
1307
  "US": {
1308
  "accuracy": 1.0,
1309
+ "count": 54
1310
+ }
1311
+ }
1312
+ },
1313
+ "add_C1": {
1314
+ "full_accuracy": 1.0,
1315
+ "digit_accuracy": 1.0,
1316
+ "n_examples": 50,
1317
+ "per_subtask": {
1318
+ "SA": {
1319
+ "accuracy": 1.0,
1320
+ "count": 250
1321
+ },
1322
+ "SC": {
1323
+ "accuracy": 1.0,
1324
+ "count": 50
1325
+ },
1326
+ "UC": {
1327
+ "accuracy": 1.0,
1328
+ "count": 50
1329
+ }
1330
+ }
1331
+ },
1332
+ "add_C2": {
1333
+ "full_accuracy": 1.0,
1334
+ "digit_accuracy": 1.0,
1335
+ "n_examples": 50,
1336
+ "per_subtask": {
1337
+ "SA": {
1338
+ "accuracy": 1.0,
1339
+ "count": 200
1340
+ },
1341
+ "SC": {
1342
+ "accuracy": 1.0,
1343
+ "count": 50
1344
+ },
1345
+ "UC": {
1346
+ "accuracy": 1.0,
1347
+ "count": 83
1348
+ },
1349
+ "US": {
1350
+ "accuracy": 1.0,
1351
+ "count": 17
1352
  }
1353
  }
1354
  },
1355
  "add_C3": {
1356
  "full_accuracy": 1.0,
1357
+ "digit_accuracy": 1.0,
1358
+ "n_examples": 50,
1359
  "per_subtask": {
1360
  "SA": {
1361
  "accuracy": 1.0,
1362
+ "count": 150
1363
  },
1364
  "SC": {
1365
  "accuracy": 1.0,
1366
+ "count": 50
1367
  },
1368
  "UC": {
1369
  "accuracy": 1.0,
1370
+ "count": 100
1371
  },
1372
  "US": {
1373
  "accuracy": 1.0,
1374
+ "count": 50
1375
  }
1376
  }
1377
  },
1378
  "add_C4": {
1379
  "full_accuracy": 1.0,
1380
+ "digit_accuracy": 1.0,
1381
+ "n_examples": 50,
1382
  "per_subtask": {
1383
  "SA": {
1384
  "accuracy": 1.0,
1385
+ "count": 100
1386
  },
1387
  "SC": {
1388
  "accuracy": 1.0,
1389
+ "count": 50
1390
  },
1391
  "UC": {
1392
  "accuracy": 1.0,
1393
+ "count": 132
1394
  },
1395
  "US": {
1396
  "accuracy": 1.0,
1397
+ "count": 68
1398
  }
1399
  }
1400
  },
1401
  "add_C5": {
1402
  "full_accuracy": 1.0,
1403
+ "digit_accuracy": 1.0,
1404
+ "n_examples": 50,
1405
  "per_subtask": {
1406
  "SA": {
1407
  "accuracy": 1.0,
1408
+ "count": 50
1409
  },
1410
  "SC": {
1411
  "accuracy": 1.0,
1412
+ "count": 50
1413
  },
1414
  "UC": {
1415
  "accuracy": 1.0,
1416
+ "count": 146
1417
  },
1418
  "US": {
1419
  "accuracy": 1.0,
1420
+ "count": 104
1421
  }
1422
  }
1423
  },
1424
  "add_C6": {
1425
  "full_accuracy": 1.0,
1426
+ "digit_accuracy": 1.0,
1427
+ "n_examples": 50,
1428
  "per_subtask": {
1429
  "SC": {
1430
  "accuracy": 1.0,
1431
+ "count": 50
1432
  },
1433
  "UC": {
1434
  "accuracy": 1.0,
1435
+ "count": 189
1436
  },
1437
  "US": {
1438
  "accuracy": 1.0,
1439
+ "count": 111
1440
  }
1441
  }
1442
  },
1443
  "sub_M0": {
1444
  "full_accuracy": 1.0,
1445
+ "digit_accuracy": 1.0,
1446
+ "n_examples": 50,
1447
  "per_subtask": {
1448
  "MD": {
1449
  "accuracy": 1.0,
1450
+ "count": 303
1451
  },
1452
  "ME": {
1453
  "accuracy": 1.0,
1454
+ "count": 47
1455
  }
1456
  }
1457
  },
1458
  "sub_M1": {
1459
  "full_accuracy": 1.0,
1460
+ "digit_accuracy": 1.0,
1461
+ "n_examples": 50,
1462
  "per_subtask": {
1463
  "MD": {
1464
  "accuracy": 1.0,
1465
+ "count": 141
1466
  },
1467
  "MB": {
1468
  "accuracy": 1.0,
1469
+ "count": 72
1470
  },
1471
  "ME": {
1472
  "accuracy": 1.0,
1473
+ "count": 18
1474
  },
1475
  "UB": {
1476
  "accuracy": 1.0,
1477
+ "count": 119
1478
  }
1479
  }
1480
  },
1481
  "sub_M2": {
1482
  "full_accuracy": 1.0,
1483
+ "digit_accuracy": 1.0,
1484
+ "n_examples": 50,
1485
  "per_subtask": {
1486
  "MD": {
1487
  "accuracy": 1.0,
1488
+ "count": 112
1489
  },
1490
  "MB": {
1491
  "accuracy": 1.0,
1492
+ "count": 53
1493
  },
1494
  "ME": {
1495
  "accuracy": 1.0,
1496
+ "count": 47
1497
  },
1498
  "UB": {
1499
  "accuracy": 1.0,
1500
+ "count": 85
1501
  },
1502
  "UD": {
1503
  "accuracy": 1.0,
1504
+ "count": 53
1505
  }
1506
  }
1507
  },
1508
  "sub_M3": {
1509
  "full_accuracy": 1.0,
1510
+ "digit_accuracy": 1.0,
1511
+ "n_examples": 50,
1512
  "per_subtask": {
1513
  "MD": {
1514
  "accuracy": 1.0,
1515
+ "count": 97
1516
  },
1517
  "MB": {
1518
  "accuracy": 1.0,
1519
+ "count": 51
1520
  },
1521
  "ME": {
1522
  "accuracy": 1.0,
1523
+ "count": 27
1524
  },
1525
  "UB": {
1526
  "accuracy": 1.0,
1527
+ "count": 74
1528
  },
1529
  "UD": {
1530
  "accuracy": 1.0,
1531
+ "count": 101
1532
  }
1533
  }
1534
  },
1535
  "sub_M4": {
1536
  "full_accuracy": 1.0,
1537
+ "digit_accuracy": 1.0,
1538
+ "n_examples": 50,
1539
  "per_subtask": {
1540
  "MD": {
1541
  "accuracy": 1.0,
1542
+ "count": 100
1543
  },
1544
  "MB": {
1545
  "accuracy": 1.0,
1546
+ "count": 50
1547
  },
1548
  "UB": {
1549
  "accuracy": 1.0,
1550
+ "count": 50
1551
  },
1552
  "UD": {
1553
  "accuracy": 1.0,
1554
+ "count": 150
1555
  }
1556
  }
1557
  },
1558
  "sub_M5": {
1559
+ "full_accuracy": 0.06,
1560
+ "digit_accuracy": 0.8657142857142858,
1561
+ "n_examples": 50,
1562
  "per_subtask": {
1563
  "MD": {
1564
  "accuracy": 1.0,
1565
+ "count": 50
1566
  },
1567
  "MB": {
1568
  "accuracy": 1.0,
1569
+ "count": 50
1570
  },
1571
  "UB": {
1572
+ "accuracy": 0.06,
1573
+ "count": 50
1574
  },
1575
  "UD": {
1576
  "accuracy": 1.0,
1577
+ "count": 200
1578
  }
1579
  }
1580
  },
1581
  "sub_random": {
1582
  "full_accuracy": 1.0,
1583
+ "digit_accuracy": 1.0,
1584
  "n_examples": 200,
1585
  "per_subtask": {
1586
  "MD": {
1587
  "accuracy": 1.0,
1588
+ "count": 570
1589
  },
1590
  "MB": {
1591
  "accuracy": 1.0,
1592
+ "count": 277
1593
  },
1594
  "ME": {
1595
  "accuracy": 1.0,
 
1597
  },
1598
  "UB": {
1599
  "accuracy": 1.0,
1600
+ "count": 471
1601
  },
1602
  "UD": {
1603
  "accuracy": 1.0,
1604
+ "count": 29
1605
  }
1606
  }
1607
  },
1608
  "sub_B3": {
1609
  "full_accuracy": 1.0,
1610
+ "digit_accuracy": 1.0,
1611
+ "n_examples": 50,
1612
  "per_subtask": {
1613
  "MD": {
1614
  "accuracy": 1.0,
1615
+ "count": 150
1616
  },
1617
  "MB": {
1618
  "accuracy": 1.0,
1619
+ "count": 50
1620
  },
1621
  "UB": {
1622
  "accuracy": 1.0,
1623
+ "count": 101
1624
  },
1625
  "UD": {
1626
  "accuracy": 1.0,
1627
+ "count": 49
1628
  }
1629
  }
1630
  },
1631
  "sub_B4": {
1632
  "full_accuracy": 1.0,
1633
+ "digit_accuracy": 1.0,
1634
+ "n_examples": 50,
1635
  "per_subtask": {
1636
  "MD": {
1637
  "accuracy": 1.0,
1638
+ "count": 100
1639
  },
1640
  "MB": {
1641
  "accuracy": 1.0,
1642
+ "count": 50
1643
  },
1644
  "UB": {
1645
  "accuracy": 1.0,
1646
+ "count": 121
1647
  },
1648
  "UD": {
1649
  "accuracy": 1.0,
1650
+ "count": 79
1651
  }
1652
  }
1653
  },
1654
  "sub_B5": {
1655
+ "full_accuracy": 0.98,
1656
+ "digit_accuracy": 0.9971428571428571,
1657
+ "n_examples": 50,
1658
  "per_subtask": {
1659
  "MD": {
1660
  "accuracy": 1.0,
1661
+ "count": 50
1662
  },
1663
  "MB": {
1664
  "accuracy": 1.0,
1665
+ "count": 50
1666
  },
1667
  "UB": {
1668
+ "accuracy": 0.993421052631579,
1669
+ "count": 152
1670
  },
1671
  "UD": {
1672
  "accuracy": 1.0,
1673
+ "count": 98
1674
  }
1675
  }
1676
  }
1677
  },
1678
  "summary": {
1679
+ "overall_accuracy": 0.9673333333333334,
1680
+ "digit_accuracy": 0.9953333333333333,
1681
+ "total_examples": 1500,
1682
+ "n_splits": 24
1683
  }
1684
  },
1685
  "sorl_overall_accuracy": 0.96125,