amirali1985 commited on
Commit
ebdaf4d
·
verified ·
1 Parent(s): 2c1ef04

Upload add_sub_sorl_v1_abs10_K1_25K_2L1H128d/metrics.json with huggingface_hub

Browse files
add_sub_sorl_v1_abs10_K1_25K_2L1H128d/metrics.json CHANGED
@@ -610,502 +610,567 @@
610
  "K": null,
611
  "mode": "sft",
612
  "n_digits": 6,
613
- "n_per_split": 100
614
  },
615
  "splits": {
616
  "add_S0": {
617
  "full_accuracy": 0.0,
618
- "n_examples": 100,
 
619
  "per_subtask": {
620
  "SA": {
621
- "accuracy": 0.2066115702479339,
622
- "count": 605
623
  },
624
  "SS": {
625
  "accuracy": 1.0,
626
- "count": 95
627
  }
628
  }
629
  },
630
  "add_S1": {
631
  "full_accuracy": 0.0,
632
- "n_examples": 100,
 
633
  "per_subtask": {
634
  "SA": {
635
- "accuracy": 0.2549019607843137,
636
- "count": 204
637
  },
638
  "SC": {
639
- "accuracy": 0.03550295857988166,
640
- "count": 169
641
  },
642
  "SS": {
643
- "accuracy": 0.8064516129032258,
644
- "count": 31
645
  },
646
  "UC": {
647
- "accuracy": 0.14527027027027026,
648
- "count": 296
649
  }
650
  }
651
  },
652
  "add_S2": {
653
  "full_accuracy": 0.0,
654
- "n_examples": 100,
 
655
  "per_subtask": {
656
  "SA": {
657
- "accuracy": 0.4662576687116564,
658
- "count": 163
659
  },
660
  "SC": {
661
- "accuracy": 0.07692307692307693,
662
- "count": 130
663
  },
664
  "SS": {
665
- "accuracy": 0.5977011494252874,
666
- "count": 87
667
  },
668
  "UC": {
669
- "accuracy": 0.26108374384236455,
670
- "count": 203
671
  },
672
  "US": {
673
- "accuracy": 0.4017094017094017,
674
- "count": 117
675
  }
676
  }
677
  },
678
  "add_S3": {
679
  "full_accuracy": 0.0,
680
- "n_examples": 100,
 
681
  "per_subtask": {
682
  "SA": {
683
- "accuracy": 0.5206611570247934,
684
- "count": 121
685
  },
686
  "SC": {
687
- "accuracy": 0.024793388429752067,
688
- "count": 121
689
  },
690
  "SS": {
691
- "accuracy": 0.8571428571428571,
692
- "count": 49
693
  },
694
  "UC": {
695
- "accuracy": 0.24731182795698925,
696
- "count": 186
697
  },
698
  "US": {
699
- "accuracy": 0.35874439461883406,
700
- "count": 223
701
  }
702
  }
703
  },
704
  "add_S4": {
705
  "full_accuracy": 0.0,
706
- "n_examples": 100,
 
707
  "per_subtask": {
708
  "SA": {
709
- "accuracy": 0.5576923076923077,
710
- "count": 104
711
  },
712
  "SC": {
713
- "accuracy": 0.04716981132075472,
714
- "count": 106
715
  },
716
  "SS": {
717
- "accuracy": 0.8695652173913043,
718
- "count": 23
719
  },
720
  "UC": {
721
- "accuracy": 0.25,
722
- "count": 160
723
  },
724
  "US": {
725
- "accuracy": 0.3583061889250814,
726
- "count": 307
727
  }
728
  }
729
  },
730
  "add_S5": {
731
  "full_accuracy": 0.0,
732
- "n_examples": 100,
 
733
  "per_subtask": {
734
  "SA": {
735
- "accuracy": 0.45,
736
- "count": 100
737
  },
738
  "SC": {
739
  "accuracy": 0.0,
740
- "count": 100
741
  },
742
  "UC": {
743
- "accuracy": 0.11,
744
- "count": 100
745
  },
746
  "US": {
747
- "accuracy": 0.04,
748
- "count": 400
749
  }
750
  }
751
  },
752
  "add_S6": {
753
  "full_accuracy": 0.0,
754
- "n_examples": 100,
 
755
  "per_subtask": {
756
  "SC": {
757
  "accuracy": 0.0,
758
- "count": 100
759
  },
760
  "UC": {
761
- "accuracy": 0.26,
762
- "count": 100
763
  },
764
  "US": {
765
- "accuracy": 0.26,
766
- "count": 500
767
  }
768
  }
769
  },
770
  "add_random": {
771
  "full_accuracy": 0.0,
 
772
  "n_examples": 200,
773
  "per_subtask": {
774
  "SA": {
775
- "accuracy": 0.27069351230425054,
776
- "count": 447
777
  },
778
  "SC": {
779
- "accuracy": 0.053125,
780
- "count": 320
781
  },
782
  "SS": {
783
- "accuracy": 0.7321428571428571,
784
- "count": 56
785
  },
786
  "UC": {
787
- "accuracy": 0.166351606805293,
788
- "count": 529
789
  },
790
  "US": {
791
- "accuracy": 0.3333333333333333,
792
- "count": 48
793
  }
794
  }
795
  },
796
- "add_C3": {
797
  "full_accuracy": 0.0,
798
- "n_examples": 100,
 
799
  "per_subtask": {
800
  "SA": {
801
- "accuracy": 0.24,
802
- "count": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
  },
804
  "SC": {
805
  "accuracy": 0.02,
806
- "count": 100
807
  },
808
  "UC": {
809
- "accuracy": 0.06735751295336788,
810
- "count": 193
811
  },
812
  "US": {
813
- "accuracy": 0.06542056074766354,
814
- "count": 107
815
  }
816
  }
817
  },
818
- "add_C4": {
819
  "full_accuracy": 0.0,
820
- "n_examples": 100,
 
821
  "per_subtask": {
822
  "SA": {
823
- "accuracy": 0.335,
824
- "count": 200
825
  },
826
  "SC": {
827
- "accuracy": 0.01,
 
 
 
 
828
  "count": 100
829
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
830
  "UC": {
831
- "accuracy": 0.04296875,
832
- "count": 256
833
  },
834
  "US": {
835
- "accuracy": 0.06944444444444445,
836
- "count": 144
837
  }
838
  }
839
  },
840
  "add_C5": {
841
  "full_accuracy": 0.0,
842
- "n_examples": 100,
 
843
  "per_subtask": {
844
  "SA": {
845
- "accuracy": 0.49,
846
- "count": 100
847
  },
848
  "SC": {
849
- "accuracy": 0.05,
850
- "count": 100
851
  },
852
  "UC": {
853
- "accuracy": 0.08169934640522876,
854
- "count": 306
855
  },
856
  "US": {
857
- "accuracy": 0.23195876288659795,
858
- "count": 194
859
  }
860
  }
861
  },
862
  "add_C6": {
863
  "full_accuracy": 0.0,
864
- "n_examples": 100,
 
865
  "per_subtask": {
866
  "SC": {
867
- "accuracy": 0.01,
868
- "count": 100
869
  },
870
  "UC": {
871
- "accuracy": 0.1912568306010929,
872
- "count": 366
873
  },
874
  "US": {
875
- "accuracy": 0.6239316239316239,
876
- "count": 234
877
  }
878
  }
879
  },
880
  "sub_M0": {
881
  "full_accuracy": 0.0,
882
- "n_examples": 100,
 
883
  "per_subtask": {
884
  "MD": {
885
- "accuracy": 0.20465890183028287,
886
- "count": 601
887
  },
888
  "ME": {
889
  "accuracy": 1.0,
890
- "count": 99
891
  }
892
  }
893
  },
894
  "sub_M1": {
895
  "full_accuracy": 0.0,
896
- "n_examples": 100,
 
897
  "per_subtask": {
898
  "MD": {
899
- "accuracy": 0.3835125448028674,
900
- "count": 279
901
  },
902
  "MB": {
903
  "accuracy": 0.0,
904
- "count": 145
905
  },
906
  "ME": {
907
  "accuracy": 1.0,
908
- "count": 24
909
  },
910
  "UB": {
911
- "accuracy": 0.09523809523809523,
912
- "count": 252
913
  }
914
  }
915
  },
916
  "sub_M2": {
917
  "full_accuracy": 0.0,
918
- "n_examples": 100,
 
919
  "per_subtask": {
920
  "MD": {
921
- "accuracy": 0.6150234741784038,
922
- "count": 213
923
  },
924
  "MB": {
925
  "accuracy": 0.0,
926
- "count": 113
927
  },
928
  "ME": {
929
  "accuracy": 1.0,
930
- "count": 85
931
  },
932
  "UB": {
933
- "accuracy": 0.16574585635359115,
934
- "count": 181
935
  },
936
  "UD": {
937
  "accuracy": 0.0,
938
- "count": 108
939
  }
940
  }
941
  },
942
  "sub_M3": {
943
  "full_accuracy": 0.0,
944
- "n_examples": 100,
 
945
  "per_subtask": {
946
  "MD": {
947
- "accuracy": 0.7597765363128491,
948
- "count": 179
949
  },
950
  "MB": {
951
  "accuracy": 0.0,
952
- "count": 103
953
  },
954
  "ME": {
955
  "accuracy": 1.0,
956
- "count": 56
957
  },
958
  "UB": {
959
- "accuracy": 0.12080536912751678,
960
- "count": 149
961
  },
962
  "UD": {
963
  "accuracy": 0.0,
964
- "count": 213
965
  }
966
  }
967
  },
968
  "sub_M4": {
969
  "full_accuracy": 0.0,
970
- "n_examples": 100,
 
971
  "per_subtask": {
972
  "MD": {
973
  "accuracy": 0.5,
974
- "count": 200
975
  },
976
  "MB": {
977
  "accuracy": 0.0,
978
- "count": 100
979
  },
980
  "UB": {
981
- "accuracy": 0.3,
982
- "count": 100
983
  },
984
  "UD": {
985
  "accuracy": 0.0,
986
- "count": 300
987
  }
988
  }
989
  },
990
  "sub_M5": {
991
  "full_accuracy": 0.0,
992
- "n_examples": 100,
 
993
  "per_subtask": {
994
  "MD": {
995
  "accuracy": 1.0,
996
- "count": 100
997
  },
998
  "MB": {
999
  "accuracy": 0.0,
1000
- "count": 100
1001
  },
1002
  "UB": {
1003
- "accuracy": 0.31,
1004
- "count": 100
1005
  },
1006
  "UD": {
1007
  "accuracy": 0.0,
1008
- "count": 400
1009
  }
1010
  }
1011
  },
1012
  "sub_random": {
1013
  "full_accuracy": 0.0,
 
1014
  "n_examples": 200,
1015
  "per_subtask": {
1016
  "MD": {
1017
- "accuracy": 0.3616666666666667,
1018
- "count": 600
1019
  },
1020
  "MB": {
1021
  "accuracy": 0.0,
1022
- "count": 267
1023
  },
1024
  "ME": {
1025
  "accuracy": 1.0,
1026
  "count": 53
1027
  },
1028
  "UB": {
1029
- "accuracy": 0.12072892938496584,
1030
- "count": 439
1031
  },
1032
  "UD": {
1033
  "accuracy": 0.0,
1034
- "count": 41
1035
  }
1036
  }
1037
  },
1038
  "sub_B3": {
1039
  "full_accuracy": 0.0,
1040
- "n_examples": 100,
 
1041
  "per_subtask": {
1042
  "MD": {
1043
  "accuracy": 0.3333333333333333,
1044
- "count": 300
1045
  },
1046
  "MB": {
1047
  "accuracy": 0.0,
1048
- "count": 100
1049
  },
1050
  "UB": {
1051
- "accuracy": 0.17766497461928935,
1052
- "count": 197
1053
  },
1054
  "UD": {
1055
  "accuracy": 0.0,
1056
- "count": 103
1057
  }
1058
  }
1059
  },
1060
  "sub_B4": {
1061
  "full_accuracy": 0.0,
1062
- "n_examples": 100,
 
1063
  "per_subtask": {
1064
  "MD": {
1065
  "accuracy": 0.5,
1066
- "count": 200
1067
  },
1068
  "MB": {
1069
  "accuracy": 0.0,
1070
- "count": 100
1071
  },
1072
  "UB": {
1073
- "accuracy": 0.145748987854251,
1074
- "count": 247
1075
  },
1076
  "UD": {
1077
  "accuracy": 0.0,
1078
- "count": 153
1079
  }
1080
  }
1081
  },
1082
  "sub_B5": {
1083
  "full_accuracy": 0.0,
1084
- "n_examples": 100,
 
1085
  "per_subtask": {
1086
  "MD": {
1087
  "accuracy": 1.0,
1088
- "count": 100
1089
  },
1090
  "MB": {
1091
  "accuracy": 0.0,
1092
- "count": 100
1093
  },
1094
  "UB": {
1095
- "accuracy": 0.11073825503355705,
1096
- "count": 298
1097
  },
1098
  "UD": {
1099
  "accuracy": 0.0,
1100
- "count": 202
1101
  }
1102
  }
1103
  }
1104
  },
1105
  "summary": {
1106
  "overall_accuracy": 0.0,
1107
- "total_examples": 2400,
1108
- "n_splits": 22
 
1109
  }
1110
  },
1111
  "sorl_eval": {
@@ -1114,502 +1179,567 @@
1114
  "K": 1,
1115
  "mode": "sorl",
1116
  "n_digits": 6,
1117
- "n_per_split": 100
1118
  },
1119
  "splits": {
1120
  "add_S0": {
1121
- "full_accuracy": 0.09,
1122
- "n_examples": 100,
 
1123
  "per_subtask": {
1124
  "SA": {
1125
- "accuracy": 0.6776859504132231,
1126
- "count": 605
1127
  },
1128
  "SS": {
1129
- "accuracy": 0.8947368421052632,
1130
- "count": 95
1131
  }
1132
  }
1133
  },
1134
  "add_S1": {
1135
- "full_accuracy": 0.03,
1136
- "n_examples": 100,
 
1137
  "per_subtask": {
1138
  "SA": {
1139
- "accuracy": 0.696078431372549,
1140
- "count": 204
1141
  },
1142
  "SC": {
1143
- "accuracy": 0.6804733727810651,
1144
- "count": 169
1145
  },
1146
  "SS": {
1147
- "accuracy": 0.8387096774193549,
1148
- "count": 31
1149
  },
1150
  "UC": {
1151
- "accuracy": 0.4831081081081081,
1152
- "count": 296
1153
  }
1154
  }
1155
  },
1156
  "add_S2": {
1157
- "full_accuracy": 0.05,
1158
- "n_examples": 100,
 
1159
  "per_subtask": {
1160
  "SA": {
1161
- "accuracy": 0.7423312883435583,
1162
- "count": 163
1163
  },
1164
  "SC": {
1165
- "accuracy": 0.6615384615384615,
1166
- "count": 130
1167
  },
1168
  "SS": {
1169
- "accuracy": 0.7471264367816092,
1170
- "count": 87
1171
  },
1172
  "UC": {
1173
- "accuracy": 0.37438423645320196,
1174
- "count": 203
1175
  },
1176
  "US": {
1177
- "accuracy": 0.49572649572649574,
1178
- "count": 117
1179
  }
1180
  }
1181
  },
1182
  "add_S3": {
1183
- "full_accuracy": 0.01,
1184
- "n_examples": 100,
 
1185
  "per_subtask": {
1186
  "SA": {
1187
- "accuracy": 0.743801652892562,
1188
- "count": 121
1189
  },
1190
  "SC": {
1191
- "accuracy": 0.6611570247933884,
1192
- "count": 121
1193
  },
1194
  "SS": {
1195
- "accuracy": 0.8367346938775511,
1196
- "count": 49
1197
  },
1198
  "UC": {
1199
- "accuracy": 0.3709677419354839,
1200
- "count": 186
1201
  },
1202
  "US": {
1203
- "accuracy": 0.3811659192825112,
1204
- "count": 223
1205
  }
1206
  }
1207
  },
1208
  "add_S4": {
1209
- "full_accuracy": 0.03,
1210
- "n_examples": 100,
 
1211
  "per_subtask": {
1212
  "SA": {
1213
- "accuracy": 0.7307692307692307,
1214
- "count": 104
1215
  },
1216
  "SC": {
1217
- "accuracy": 0.7075471698113207,
1218
- "count": 106
1219
  },
1220
  "SS": {
1221
- "accuracy": 0.9565217391304348,
1222
- "count": 23
1223
  },
1224
  "UC": {
1225
- "accuracy": 0.325,
1226
- "count": 160
1227
  },
1228
  "US": {
1229
- "accuracy": 0.3322475570032573,
1230
- "count": 307
1231
  }
1232
  }
1233
  },
1234
  "add_S5": {
1235
- "full_accuracy": 0.03,
1236
- "n_examples": 100,
 
1237
  "per_subtask": {
1238
  "SA": {
1239
- "accuracy": 0.65,
1240
- "count": 100
1241
  },
1242
  "SC": {
1243
- "accuracy": 0.63,
1244
- "count": 100
1245
  },
1246
  "UC": {
1247
- "accuracy": 0.13,
1248
- "count": 100
1249
  },
1250
  "US": {
1251
- "accuracy": 0.285,
1252
- "count": 400
1253
  }
1254
  }
1255
  },
1256
  "add_S6": {
1257
  "full_accuracy": 0.0,
1258
- "n_examples": 100,
 
1259
  "per_subtask": {
1260
  "SC": {
1261
- "accuracy": 0.48,
1262
- "count": 100
1263
  },
1264
  "UC": {
1265
  "accuracy": 0.0,
1266
- "count": 100
1267
  },
1268
  "US": {
1269
- "accuracy": 0.036,
1270
- "count": 500
1271
  }
1272
  }
1273
  },
1274
  "add_random": {
1275
- "full_accuracy": 0.04,
 
1276
  "n_examples": 200,
1277
  "per_subtask": {
1278
  "SA": {
1279
- "accuracy": 0.6487695749440716,
1280
- "count": 447
1281
  },
1282
  "SC": {
1283
- "accuracy": 0.70625,
1284
- "count": 320
1285
  },
1286
  "SS": {
1287
- "accuracy": 0.8571428571428571,
1288
- "count": 56
1289
  },
1290
  "UC": {
1291
- "accuracy": 0.46313799621928164,
1292
- "count": 529
1293
  },
1294
  "US": {
1295
- "accuracy": 0.625,
1296
- "count": 48
1297
  }
1298
  }
1299
  },
1300
- "add_C3": {
1301
  "full_accuracy": 0.02,
1302
- "n_examples": 100,
 
1303
  "per_subtask": {
1304
  "SA": {
1305
- "accuracy": 0.8033333333333333,
1306
- "count": 300
1307
  },
1308
  "SC": {
1309
  "accuracy": 0.78,
1310
- "count": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1311
  },
1312
  "UC": {
1313
- "accuracy": 0.19170984455958548,
1314
- "count": 193
1315
  },
1316
  "US": {
1317
- "accuracy": 0.5327102803738317,
1318
- "count": 107
1319
  }
1320
  }
1321
  },
1322
- "add_C4": {
1323
- "full_accuracy": 0.01,
1324
- "n_examples": 100,
 
1325
  "per_subtask": {
1326
  "SA": {
1327
  "accuracy": 0.8,
1328
- "count": 200
1329
  },
1330
  "SC": {
1331
- "accuracy": 0.75,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1332
  "count": 100
1333
  },
 
 
 
 
1334
  "UC": {
1335
- "accuracy": 0.1875,
1336
- "count": 256
1337
  },
1338
  "US": {
1339
- "accuracy": 0.5833333333333334,
1340
- "count": 144
1341
  }
1342
  }
1343
  },
1344
  "add_C5": {
1345
  "full_accuracy": 0.02,
1346
- "n_examples": 100,
 
1347
  "per_subtask": {
1348
  "SA": {
1349
- "accuracy": 0.79,
1350
- "count": 100
1351
  },
1352
  "SC": {
1353
- "accuracy": 0.65,
1354
- "count": 100
1355
  },
1356
  "UC": {
1357
- "accuracy": 0.23529411764705882,
1358
- "count": 306
1359
  },
1360
  "US": {
1361
- "accuracy": 0.5979381443298969,
1362
- "count": 194
1363
  }
1364
  }
1365
  },
1366
  "add_C6": {
1367
- "full_accuracy": 0.01,
1368
- "n_examples": 100,
 
1369
  "per_subtask": {
1370
  "SC": {
1371
- "accuracy": 0.6,
1372
- "count": 100
1373
  },
1374
  "UC": {
1375
- "accuracy": 0.319672131147541,
1376
- "count": 366
1377
  },
1378
  "US": {
1379
- "accuracy": 0.6282051282051282,
1380
- "count": 234
1381
  }
1382
  }
1383
  },
1384
  "sub_M0": {
1385
- "full_accuracy": 0.01,
1386
- "n_examples": 100,
 
1387
  "per_subtask": {
1388
  "MD": {
1389
- "accuracy": 0.5391014975041597,
1390
- "count": 601
1391
  },
1392
  "ME": {
1393
- "accuracy": 0.9595959595959596,
1394
- "count": 99
1395
  }
1396
  }
1397
  },
1398
  "sub_M1": {
1399
  "full_accuracy": 0.0,
1400
- "n_examples": 100,
 
1401
  "per_subtask": {
1402
  "MD": {
1403
- "accuracy": 0.6666666666666666,
1404
- "count": 279
1405
  },
1406
  "MB": {
1407
- "accuracy": 0.38620689655172413,
1408
- "count": 145
1409
  },
1410
  "ME": {
1411
- "accuracy": 0.9583333333333334,
1412
- "count": 24
1413
  },
1414
  "UB": {
1415
- "accuracy": 0.5198412698412699,
1416
- "count": 252
1417
  }
1418
  }
1419
  },
1420
  "sub_M2": {
1421
- "full_accuracy": 0.0,
1422
- "n_examples": 100,
 
1423
  "per_subtask": {
1424
  "MD": {
1425
- "accuracy": 0.755868544600939,
1426
- "count": 213
1427
  },
1428
  "MB": {
1429
- "accuracy": 0.26548672566371684,
1430
- "count": 113
1431
  },
1432
  "ME": {
1433
- "accuracy": 0.9764705882352941,
1434
- "count": 85
1435
  },
1436
  "UB": {
1437
- "accuracy": 0.5524861878453039,
1438
- "count": 181
1439
  },
1440
  "UD": {
1441
- "accuracy": 0.2037037037037037,
1442
- "count": 108
1443
  }
1444
  }
1445
  },
1446
  "sub_M3": {
1447
- "full_accuracy": 0.01,
1448
- "n_examples": 100,
 
1449
  "per_subtask": {
1450
  "MD": {
1451
- "accuracy": 0.8547486033519553,
1452
- "count": 179
1453
  },
1454
  "MB": {
1455
- "accuracy": 0.3300970873786408,
1456
- "count": 103
1457
  },
1458
  "ME": {
1459
- "accuracy": 0.9821428571428571,
1460
- "count": 56
1461
  },
1462
  "UB": {
1463
- "accuracy": 0.5838926174496645,
1464
- "count": 149
1465
  },
1466
  "UD": {
1467
- "accuracy": 0.08450704225352113,
1468
- "count": 213
1469
  }
1470
  }
1471
  },
1472
  "sub_M4": {
1473
- "full_accuracy": 0.01,
1474
- "n_examples": 100,
 
1475
  "per_subtask": {
1476
  "MD": {
1477
- "accuracy": 0.65,
1478
- "count": 200
1479
  },
1480
  "MB": {
1481
- "accuracy": 0.21,
1482
- "count": 100
1483
  },
1484
  "UB": {
1485
- "accuracy": 0.67,
1486
- "count": 100
1487
  },
1488
  "UD": {
1489
- "accuracy": 0.06333333333333334,
1490
- "count": 300
1491
  }
1492
  }
1493
  },
1494
  "sub_M5": {
1495
- "full_accuracy": 0.06,
1496
- "n_examples": 100,
 
1497
  "per_subtask": {
1498
  "MD": {
1499
  "accuracy": 1.0,
1500
- "count": 100
1501
  },
1502
  "MB": {
1503
- "accuracy": 0.13,
1504
- "count": 100
1505
  },
1506
  "UB": {
1507
- "accuracy": 0.72,
1508
- "count": 100
1509
  },
1510
  "UD": {
1511
- "accuracy": 0.145,
1512
- "count": 400
1513
  }
1514
  }
1515
  },
1516
  "sub_random": {
1517
- "full_accuracy": 0.01,
 
1518
  "n_examples": 200,
1519
  "per_subtask": {
1520
  "MD": {
1521
- "accuracy": 0.635,
1522
- "count": 600
1523
  },
1524
  "MB": {
1525
- "accuracy": 0.45318352059925093,
1526
- "count": 267
1527
  },
1528
  "ME": {
1529
- "accuracy": 0.9811320754716981,
1530
  "count": 53
1531
  },
1532
  "UB": {
1533
- "accuracy": 0.5444191343963554,
1534
- "count": 439
1535
  },
1536
  "UD": {
1537
- "accuracy": 0.1951219512195122,
1538
- "count": 41
1539
  }
1540
  }
1541
  },
1542
  "sub_B3": {
1543
  "full_accuracy": 0.0,
1544
- "n_examples": 100,
 
1545
  "per_subtask": {
1546
  "MD": {
1547
- "accuracy": 0.5266666666666666,
1548
- "count": 300
1549
  },
1550
  "MB": {
1551
- "accuracy": 0.41,
1552
- "count": 100
1553
  },
1554
  "UB": {
1555
- "accuracy": 0.49746192893401014,
1556
- "count": 197
1557
  },
1558
  "UD": {
1559
- "accuracy": 0.11650485436893204,
1560
- "count": 103
1561
  }
1562
  }
1563
  },
1564
  "sub_B4": {
1565
  "full_accuracy": 0.0,
1566
- "n_examples": 100,
 
1567
  "per_subtask": {
1568
  "MD": {
1569
- "accuracy": 0.67,
1570
- "count": 200
1571
  },
1572
  "MB": {
1573
- "accuracy": 0.35,
1574
- "count": 100
1575
  },
1576
  "UB": {
1577
- "accuracy": 0.4898785425101215,
1578
- "count": 247
1579
  },
1580
  "UD": {
1581
- "accuracy": 0.16993464052287582,
1582
- "count": 153
1583
  }
1584
  }
1585
  },
1586
  "sub_B5": {
1587
  "full_accuracy": 0.0,
1588
- "n_examples": 100,
 
1589
  "per_subtask": {
1590
  "MD": {
1591
  "accuracy": 1.0,
1592
- "count": 100
1593
  },
1594
  "MB": {
1595
- "accuracy": 0.15,
1596
- "count": 100
1597
  },
1598
  "UB": {
1599
- "accuracy": 0.4395973154362416,
1600
- "count": 298
1601
  },
1602
  "UD": {
1603
- "accuracy": 0.1782178217821782,
1604
- "count": 202
1605
  }
1606
  }
1607
  }
1608
  },
1609
  "summary": {
1610
- "overall_accuracy": 0.020416666666666666,
1611
- "total_examples": 2400,
1612
- "n_splits": 22
 
1613
  }
1614
  },
1615
  "sorl_overall_accuracy": 0.020416666666666666,
 
610
  "K": null,
611
  "mode": "sft",
612
  "n_digits": 6,
613
+ "n_per_split": 50
614
  },
615
  "splits": {
616
  "add_S0": {
617
  "full_accuracy": 0.0,
618
+ "digit_accuracy": 0.3457142857142857,
619
+ "n_examples": 50,
620
  "per_subtask": {
621
  "SA": {
622
+ "accuracy": 0.22372881355932203,
623
+ "count": 295
624
  },
625
  "SS": {
626
  "accuracy": 1.0,
627
+ "count": 55
628
  }
629
  }
630
  },
631
  "add_S1": {
632
  "full_accuracy": 0.0,
633
+ "digit_accuracy": 0.19714285714285715,
634
+ "n_examples": 50,
635
  "per_subtask": {
636
  "SA": {
637
+ "accuracy": 0.25396825396825395,
638
+ "count": 126
639
  },
640
  "SC": {
641
+ "accuracy": 0.05063291139240506,
642
+ "count": 79
643
  },
644
  "SS": {
645
+ "accuracy": 0.8571428571428571,
646
+ "count": 21
647
  },
648
  "UC": {
649
+ "accuracy": 0.12096774193548387,
650
+ "count": 124
651
  }
652
  }
653
  },
654
  "add_S2": {
655
  "full_accuracy": 0.0,
656
+ "digit_accuracy": 0.37142857142857144,
657
+ "n_examples": 50,
658
  "per_subtask": {
659
  "SA": {
660
+ "accuracy": 0.48,
661
+ "count": 75
662
  },
663
  "SC": {
664
+ "accuracy": 0.08064516129032258,
665
+ "count": 62
666
  },
667
  "SS": {
668
+ "accuracy": 0.8717948717948718,
669
+ "count": 39
670
  },
671
  "UC": {
672
+ "accuracy": 0.23423423423423423,
673
+ "count": 111
674
  },
675
  "US": {
676
+ "accuracy": 0.4603174603174603,
677
+ "count": 63
678
  }
679
  }
680
  },
681
  "add_S3": {
682
  "full_accuracy": 0.0,
683
+ "digit_accuracy": 0.3057142857142857,
684
+ "n_examples": 50,
685
  "per_subtask": {
686
  "SA": {
687
+ "accuracy": 0.5833333333333334,
688
+ "count": 60
689
  },
690
  "SC": {
691
+ "accuracy": 0.05263157894736842,
692
+ "count": 57
693
  },
694
  "SS": {
695
+ "accuracy": 0.8947368421052632,
696
+ "count": 19
697
  },
698
  "UC": {
699
+ "accuracy": 0.16346153846153846,
700
+ "count": 104
701
  },
702
  "US": {
703
+ "accuracy": 0.3181818181818182,
704
+ "count": 110
705
  }
706
  }
707
  },
708
  "add_S4": {
709
  "full_accuracy": 0.0,
710
+ "digit_accuracy": 0.29714285714285715,
711
+ "n_examples": 50,
712
  "per_subtask": {
713
  "SA": {
714
+ "accuracy": 0.5208333333333334,
715
+ "count": 48
716
  },
717
  "SC": {
718
+ "accuracy": 0.057692307692307696,
719
+ "count": 52
720
  },
721
  "SS": {
722
+ "accuracy": 0.7142857142857143,
723
+ "count": 7
724
  },
725
  "UC": {
726
+ "accuracy": 0.21348314606741572,
727
+ "count": 89
728
  },
729
  "US": {
730
+ "accuracy": 0.33766233766233766,
731
+ "count": 154
732
  }
733
  }
734
  },
735
  "add_S5": {
736
  "full_accuracy": 0.0,
737
+ "digit_accuracy": 0.09142857142857143,
738
+ "n_examples": 50,
739
  "per_subtask": {
740
  "SA": {
741
+ "accuracy": 0.5,
742
+ "count": 50
743
  },
744
  "SC": {
745
  "accuracy": 0.0,
746
+ "count": 50
747
  },
748
  "UC": {
749
+ "accuracy": 0.06,
750
+ "count": 50
751
  },
752
  "US": {
753
+ "accuracy": 0.02,
754
+ "count": 200
755
  }
756
  }
757
  },
758
  "add_S6": {
759
  "full_accuracy": 0.0,
760
+ "digit_accuracy": 0.12,
761
+ "n_examples": 50,
762
  "per_subtask": {
763
  "SC": {
764
  "accuracy": 0.0,
765
+ "count": 50
766
  },
767
  "UC": {
768
+ "accuracy": 0.14,
769
+ "count": 50
770
  },
771
  "US": {
772
+ "accuracy": 0.14,
773
+ "count": 250
774
  }
775
  }
776
  },
777
  "add_random": {
778
  "full_accuracy": 0.0,
779
+ "digit_accuracy": 0.19857142857142857,
780
  "n_examples": 200,
781
  "per_subtask": {
782
  "SA": {
783
+ "accuracy": 0.2482598607888631,
784
+ "count": 431
785
  },
786
  "SC": {
787
+ "accuracy": 0.05379746835443038,
788
+ "count": 316
789
  },
790
  "SS": {
791
+ "accuracy": 0.8461538461538461,
792
+ "count": 39
793
  },
794
  "UC": {
795
+ "accuracy": 0.17142857142857143,
796
+ "count": 560
797
  },
798
  "US": {
799
+ "accuracy": 0.46296296296296297,
800
+ "count": 54
801
  }
802
  }
803
  },
804
+ "add_C1": {
805
  "full_accuracy": 0.0,
806
+ "digit_accuracy": 0.12285714285714286,
807
+ "n_examples": 50,
808
  "per_subtask": {
809
  "SA": {
810
+ "accuracy": 0.172,
811
+ "count": 250
812
+ },
813
+ "SC": {
814
+ "accuracy": 0.0,
815
+ "count": 50
816
+ },
817
+ "UC": {
818
+ "accuracy": 0.0,
819
+ "count": 50
820
+ }
821
+ }
822
+ },
823
+ "add_C2": {
824
+ "full_accuracy": 0.0,
825
+ "digit_accuracy": 0.12285714285714286,
826
+ "n_examples": 50,
827
+ "per_subtask": {
828
+ "SA": {
829
+ "accuracy": 0.19,
830
+ "count": 200
831
  },
832
  "SC": {
833
  "accuracy": 0.02,
834
+ "count": 50
835
  },
836
  "UC": {
837
+ "accuracy": 0.04819277108433735,
838
+ "count": 83
839
  },
840
  "US": {
841
+ "accuracy": 0.0,
842
+ "count": 17
843
  }
844
  }
845
  },
846
+ "add_C3": {
847
  "full_accuracy": 0.0,
848
+ "digit_accuracy": 0.13428571428571429,
849
+ "n_examples": 50,
850
  "per_subtask": {
851
  "SA": {
852
+ "accuracy": 0.24666666666666667,
853
+ "count": 150
854
  },
855
  "SC": {
856
+ "accuracy": 0.0,
857
+ "count": 50
858
+ },
859
+ "UC": {
860
+ "accuracy": 0.06,
861
  "count": 100
862
  },
863
+ "US": {
864
+ "accuracy": 0.08,
865
+ "count": 50
866
+ }
867
+ }
868
+ },
869
+ "add_C4": {
870
+ "full_accuracy": 0.0,
871
+ "digit_accuracy": 0.12857142857142856,
872
+ "n_examples": 50,
873
+ "per_subtask": {
874
+ "SA": {
875
+ "accuracy": 0.35,
876
+ "count": 100
877
+ },
878
+ "SC": {
879
+ "accuracy": 0.0,
880
+ "count": 50
881
+ },
882
  "UC": {
883
+ "accuracy": 0.03787878787878788,
884
+ "count": 132
885
  },
886
  "US": {
887
+ "accuracy": 0.07352941176470588,
888
+ "count": 68
889
  }
890
  }
891
  },
892
  "add_C5": {
893
  "full_accuracy": 0.0,
894
+ "digit_accuracy": 0.21428571428571427,
895
+ "n_examples": 50,
896
  "per_subtask": {
897
  "SA": {
898
+ "accuracy": 0.36,
899
+ "count": 50
900
  },
901
  "SC": {
902
+ "accuracy": 0.04,
903
+ "count": 50
904
  },
905
  "UC": {
906
+ "accuracy": 0.13013698630136986,
907
+ "count": 146
908
  },
909
  "US": {
910
+ "accuracy": 0.34615384615384615,
911
+ "count": 104
912
  }
913
  }
914
  },
915
  "add_C6": {
916
  "full_accuracy": 0.0,
917
+ "digit_accuracy": 0.3,
918
+ "n_examples": 50,
919
  "per_subtask": {
920
  "SC": {
921
+ "accuracy": 0.0,
922
+ "count": 50
923
  },
924
  "UC": {
925
+ "accuracy": 0.20105820105820105,
926
+ "count": 189
927
  },
928
  "US": {
929
+ "accuracy": 0.6036036036036037,
930
+ "count": 111
931
  }
932
  }
933
  },
934
  "sub_M0": {
935
  "full_accuracy": 0.0,
936
+ "digit_accuracy": 0.3057142857142857,
937
+ "n_examples": 50,
938
  "per_subtask": {
939
  "MD": {
940
+ "accuracy": 0.19801980198019803,
941
+ "count": 303
942
  },
943
  "ME": {
944
  "accuracy": 1.0,
945
+ "count": 47
946
  }
947
  }
948
  },
949
  "sub_M1": {
950
  "full_accuracy": 0.0,
951
+ "digit_accuracy": 0.26285714285714284,
952
+ "n_examples": 50,
953
  "per_subtask": {
954
  "MD": {
955
+ "accuracy": 0.3900709219858156,
956
+ "count": 141
957
  },
958
  "MB": {
959
  "accuracy": 0.0,
960
+ "count": 72
961
  },
962
  "ME": {
963
  "accuracy": 1.0,
964
+ "count": 18
965
  },
966
  "UB": {
967
+ "accuracy": 0.15966386554621848,
968
+ "count": 119
969
  }
970
  }
971
  },
972
  "sub_M2": {
973
  "full_accuracy": 0.0,
974
+ "digit_accuracy": 0.38285714285714284,
975
+ "n_examples": 50,
976
  "per_subtask": {
977
  "MD": {
978
+ "accuracy": 0.6428571428571429,
979
+ "count": 112
980
  },
981
  "MB": {
982
  "accuracy": 0.0,
983
+ "count": 53
984
  },
985
  "ME": {
986
  "accuracy": 1.0,
987
+ "count": 47
988
  },
989
  "UB": {
990
+ "accuracy": 0.17647058823529413,
991
+ "count": 85
992
  },
993
  "UD": {
994
  "accuracy": 0.0,
995
+ "count": 53
996
  }
997
  }
998
  },
999
  "sub_M3": {
1000
  "full_accuracy": 0.0,
1001
+ "digit_accuracy": 0.28,
1002
+ "n_examples": 50,
1003
  "per_subtask": {
1004
  "MD": {
1005
+ "accuracy": 0.6494845360824743,
1006
+ "count": 97
1007
  },
1008
  "MB": {
1009
  "accuracy": 0.0,
1010
+ "count": 51
1011
  },
1012
  "ME": {
1013
  "accuracy": 1.0,
1014
+ "count": 27
1015
  },
1016
  "UB": {
1017
+ "accuracy": 0.10810810810810811,
1018
+ "count": 74
1019
  },
1020
  "UD": {
1021
  "accuracy": 0.0,
1022
+ "count": 101
1023
  }
1024
  }
1025
  },
1026
  "sub_M4": {
1027
  "full_accuracy": 0.0,
1028
+ "digit_accuracy": 0.21142857142857144,
1029
+ "n_examples": 50,
1030
  "per_subtask": {
1031
  "MD": {
1032
  "accuracy": 0.5,
1033
+ "count": 100
1034
  },
1035
  "MB": {
1036
  "accuracy": 0.0,
1037
+ "count": 50
1038
  },
1039
  "UB": {
1040
+ "accuracy": 0.48,
1041
+ "count": 50
1042
  },
1043
  "UD": {
1044
  "accuracy": 0.0,
1045
+ "count": 150
1046
  }
1047
  }
1048
  },
1049
  "sub_M5": {
1050
  "full_accuracy": 0.0,
1051
+ "digit_accuracy": 0.18285714285714286,
1052
+ "n_examples": 50,
1053
  "per_subtask": {
1054
  "MD": {
1055
  "accuracy": 1.0,
1056
+ "count": 50
1057
  },
1058
  "MB": {
1059
  "accuracy": 0.0,
1060
+ "count": 50
1061
  },
1062
  "UB": {
1063
+ "accuracy": 0.28,
1064
+ "count": 50
1065
  },
1066
  "UD": {
1067
  "accuracy": 0.0,
1068
+ "count": 200
1069
  }
1070
  }
1071
  },
1072
  "sub_random": {
1073
  "full_accuracy": 0.0,
1074
+ "digit_accuracy": 0.2307142857142857,
1075
  "n_examples": 200,
1076
  "per_subtask": {
1077
  "MD": {
1078
+ "accuracy": 0.37719298245614036,
1079
+ "count": 570
1080
  },
1081
  "MB": {
1082
  "accuracy": 0.0,
1083
+ "count": 277
1084
  },
1085
  "ME": {
1086
  "accuracy": 1.0,
1087
  "count": 53
1088
  },
1089
  "UB": {
1090
+ "accuracy": 0.11677282377919321,
1091
+ "count": 471
1092
  },
1093
  "UD": {
1094
  "accuracy": 0.0,
1095
+ "count": 29
1096
  }
1097
  }
1098
  },
1099
  "sub_B3": {
1100
  "full_accuracy": 0.0,
1101
+ "digit_accuracy": 0.18857142857142858,
1102
+ "n_examples": 50,
1103
  "per_subtask": {
1104
  "MD": {
1105
  "accuracy": 0.3333333333333333,
1106
+ "count": 150
1107
  },
1108
  "MB": {
1109
  "accuracy": 0.0,
1110
+ "count": 50
1111
  },
1112
  "UB": {
1113
+ "accuracy": 0.15841584158415842,
1114
+ "count": 101
1115
  },
1116
  "UD": {
1117
  "accuracy": 0.0,
1118
+ "count": 49
1119
  }
1120
  }
1121
  },
1122
  "sub_B4": {
1123
  "full_accuracy": 0.0,
1124
+ "digit_accuracy": 0.17714285714285713,
1125
+ "n_examples": 50,
1126
  "per_subtask": {
1127
  "MD": {
1128
  "accuracy": 0.5,
1129
+ "count": 100
1130
  },
1131
  "MB": {
1132
  "accuracy": 0.0,
1133
+ "count": 50
1134
  },
1135
  "UB": {
1136
+ "accuracy": 0.09917355371900827,
1137
+ "count": 121
1138
  },
1139
  "UD": {
1140
  "accuracy": 0.0,
1141
+ "count": 79
1142
  }
1143
  }
1144
  },
1145
  "sub_B5": {
1146
  "full_accuracy": 0.0,
1147
+ "digit_accuracy": 0.18,
1148
+ "n_examples": 50,
1149
  "per_subtask": {
1150
  "MD": {
1151
  "accuracy": 1.0,
1152
+ "count": 50
1153
  },
1154
  "MB": {
1155
  "accuracy": 0.0,
1156
+ "count": 50
1157
  },
1158
  "UB": {
1159
+ "accuracy": 0.08552631578947369,
1160
+ "count": 152
1161
  },
1162
  "UD": {
1163
  "accuracy": 0.0,
1164
+ "count": 98
1165
  }
1166
  }
1167
  }
1168
  },
1169
  "summary": {
1170
  "overall_accuracy": 0.0,
1171
+ "digit_accuracy": 0.22114285714285714,
1172
+ "total_examples": 1500,
1173
+ "n_splits": 24
1174
  }
1175
  },
1176
  "sorl_eval": {
 
1179
  "K": 1,
1180
  "mode": "sorl",
1181
  "n_digits": 6,
1182
+ "n_per_split": 50
1183
  },
1184
  "splits": {
1185
  "add_S0": {
1186
+ "full_accuracy": 0.08,
1187
+ "digit_accuracy": 0.7114285714285714,
1188
+ "n_examples": 50,
1189
  "per_subtask": {
1190
  "SA": {
1191
+ "accuracy": 0.688135593220339,
1192
+ "count": 295
1193
  },
1194
  "SS": {
1195
+ "accuracy": 0.8363636363636363,
1196
+ "count": 55
1197
  }
1198
  }
1199
  },
1200
  "add_S1": {
1201
+ "full_accuracy": 0.0,
1202
+ "digit_accuracy": 0.6228571428571429,
1203
+ "n_examples": 50,
1204
  "per_subtask": {
1205
  "SA": {
1206
+ "accuracy": 0.7142857142857143,
1207
+ "count": 126
1208
  },
1209
  "SC": {
1210
+ "accuracy": 0.7215189873417721,
1211
+ "count": 79
1212
  },
1213
  "SS": {
1214
+ "accuracy": 0.8571428571428571,
1215
+ "count": 21
1216
  },
1217
  "UC": {
1218
+ "accuracy": 0.4274193548387097,
1219
+ "count": 124
1220
  }
1221
  }
1222
  },
1223
  "add_S2": {
1224
+ "full_accuracy": 0.02,
1225
+ "digit_accuracy": 0.5857142857142857,
1226
+ "n_examples": 50,
1227
  "per_subtask": {
1228
  "SA": {
1229
+ "accuracy": 0.7333333333333333,
1230
+ "count": 75
1231
  },
1232
  "SC": {
1233
+ "accuracy": 0.7096774193548387,
1234
+ "count": 62
1235
  },
1236
  "SS": {
1237
+ "accuracy": 0.7435897435897436,
1238
+ "count": 39
1239
  },
1240
  "UC": {
1241
+ "accuracy": 0.36936936936936937,
1242
+ "count": 111
1243
  },
1244
  "US": {
1245
+ "accuracy": 0.5714285714285714,
1246
+ "count": 63
1247
  }
1248
  }
1249
  },
1250
  "add_S3": {
1251
+ "full_accuracy": 0.02,
1252
+ "digit_accuracy": 0.5485714285714286,
1253
+ "n_examples": 50,
1254
  "per_subtask": {
1255
  "SA": {
1256
+ "accuracy": 0.7833333333333333,
1257
+ "count": 60
1258
  },
1259
  "SC": {
1260
+ "accuracy": 0.631578947368421,
1261
+ "count": 57
1262
  },
1263
  "SS": {
1264
+ "accuracy": 0.7894736842105263,
1265
+ "count": 19
1266
  },
1267
  "UC": {
1268
+ "accuracy": 0.3942307692307692,
1269
+ "count": 104
1270
  },
1271
  "US": {
1272
+ "accuracy": 0.4818181818181818,
1273
+ "count": 110
1274
  }
1275
  }
1276
  },
1277
  "add_S4": {
1278
+ "full_accuracy": 0.04,
1279
+ "digit_accuracy": 0.46,
1280
+ "n_examples": 50,
1281
  "per_subtask": {
1282
  "SA": {
1283
+ "accuracy": 0.7708333333333334,
1284
+ "count": 48
1285
  },
1286
  "SC": {
1287
+ "accuracy": 0.6923076923076923,
1288
+ "count": 52
1289
  },
1290
  "SS": {
1291
+ "accuracy": 0.7142857142857143,
1292
+ "count": 7
1293
  },
1294
  "UC": {
1295
+ "accuracy": 0.3258426966292135,
1296
+ "count": 89
1297
  },
1298
  "US": {
1299
+ "accuracy": 0.35064935064935066,
1300
+ "count": 154
1301
  }
1302
  }
1303
  },
1304
  "add_S5": {
1305
+ "full_accuracy": 0.02,
1306
+ "digit_accuracy": 0.3314285714285714,
1307
+ "n_examples": 50,
1308
  "per_subtask": {
1309
  "SA": {
1310
+ "accuracy": 0.68,
1311
+ "count": 50
1312
  },
1313
  "SC": {
1314
+ "accuracy": 0.72,
1315
+ "count": 50
1316
  },
1317
  "UC": {
1318
+ "accuracy": 0.12,
1319
+ "count": 50
1320
  },
1321
  "US": {
1322
+ "accuracy": 0.2,
1323
+ "count": 200
1324
  }
1325
  }
1326
  },
1327
  "add_S6": {
1328
  "full_accuracy": 0.0,
1329
+ "digit_accuracy": 0.09142857142857143,
1330
+ "n_examples": 50,
1331
  "per_subtask": {
1332
  "SC": {
1333
+ "accuracy": 0.52,
1334
+ "count": 50
1335
  },
1336
  "UC": {
1337
  "accuracy": 0.0,
1338
+ "count": 50
1339
  },
1340
  "US": {
1341
+ "accuracy": 0.024,
1342
+ "count": 250
1343
  }
1344
  }
1345
  },
1346
  "add_random": {
1347
+ "full_accuracy": 0.025,
1348
+ "digit_accuracy": 0.5957142857142858,
1349
  "n_examples": 200,
1350
  "per_subtask": {
1351
  "SA": {
1352
+ "accuracy": 0.6682134570765661,
1353
+ "count": 431
1354
  },
1355
  "SC": {
1356
+ "accuracy": 0.6930379746835443,
1357
+ "count": 316
1358
  },
1359
  "SS": {
1360
+ "accuracy": 0.8974358974358975,
1361
+ "count": 39
1362
  },
1363
  "UC": {
1364
+ "accuracy": 0.4589285714285714,
1365
+ "count": 560
1366
  },
1367
  "US": {
1368
+ "accuracy": 0.6481481481481481,
1369
+ "count": 54
1370
  }
1371
  }
1372
  },
1373
+ "add_C1": {
1374
  "full_accuracy": 0.02,
1375
+ "digit_accuracy": 0.7228571428571429,
1376
+ "n_examples": 50,
1377
  "per_subtask": {
1378
  "SA": {
1379
+ "accuracy": 0.82,
1380
+ "count": 250
1381
  },
1382
  "SC": {
1383
  "accuracy": 0.78,
1384
+ "count": 50
1385
+ },
1386
+ "UC": {
1387
+ "accuracy": 0.18,
1388
+ "count": 50
1389
+ }
1390
+ }
1391
+ },
1392
+ "add_C2": {
1393
+ "full_accuracy": 0.0,
1394
+ "digit_accuracy": 0.6485714285714286,
1395
+ "n_examples": 50,
1396
+ "per_subtask": {
1397
+ "SA": {
1398
+ "accuracy": 0.81,
1399
+ "count": 200
1400
+ },
1401
+ "SC": {
1402
+ "accuracy": 0.76,
1403
+ "count": 50
1404
  },
1405
  "UC": {
1406
+ "accuracy": 0.20481927710843373,
1407
+ "count": 83
1408
  },
1409
  "US": {
1410
+ "accuracy": 0.5882352941176471,
1411
+ "count": 17
1412
  }
1413
  }
1414
  },
1415
+ "add_C3": {
1416
+ "full_accuracy": 0.04,
1417
+ "digit_accuracy": 0.62,
1418
+ "n_examples": 50,
1419
  "per_subtask": {
1420
  "SA": {
1421
  "accuracy": 0.8,
1422
+ "count": 150
1423
  },
1424
  "SC": {
1425
+ "accuracy": 0.78,
1426
+ "count": 50
1427
+ },
1428
+ "UC": {
1429
+ "accuracy": 0.28,
1430
+ "count": 100
1431
+ },
1432
+ "US": {
1433
+ "accuracy": 0.6,
1434
+ "count": 50
1435
+ }
1436
+ }
1437
+ },
1438
+ "add_C4": {
1439
+ "full_accuracy": 0.0,
1440
+ "digit_accuracy": 0.52,
1441
+ "n_examples": 50,
1442
+ "per_subtask": {
1443
+ "SA": {
1444
+ "accuracy": 0.76,
1445
  "count": 100
1446
  },
1447
+ "SC": {
1448
+ "accuracy": 0.68,
1449
+ "count": 50
1450
+ },
1451
  "UC": {
1452
+ "accuracy": 0.15151515151515152,
1453
+ "count": 132
1454
  },
1455
  "US": {
1456
+ "accuracy": 0.7647058823529411,
1457
+ "count": 68
1458
  }
1459
  }
1460
  },
1461
  "add_C5": {
1462
  "full_accuracy": 0.02,
1463
+ "digit_accuracy": 0.5,
1464
+ "n_examples": 50,
1465
  "per_subtask": {
1466
  "SA": {
1467
+ "accuracy": 0.76,
1468
+ "count": 50
1469
  },
1470
  "SC": {
1471
+ "accuracy": 0.72,
1472
+ "count": 50
1473
  },
1474
  "UC": {
1475
+ "accuracy": 0.2465753424657534,
1476
+ "count": 146
1477
  },
1478
  "US": {
1479
+ "accuracy": 0.625,
1480
+ "count": 104
1481
  }
1482
  }
1483
  },
1484
  "add_C6": {
1485
+ "full_accuracy": 0.0,
1486
+ "digit_accuracy": 0.44571428571428573,
1487
+ "n_examples": 50,
1488
  "per_subtask": {
1489
  "SC": {
1490
+ "accuracy": 0.66,
1491
+ "count": 50
1492
  },
1493
  "UC": {
1494
+ "accuracy": 0.37037037037037035,
1495
+ "count": 189
1496
  },
1497
  "US": {
1498
+ "accuracy": 0.4774774774774775,
1499
+ "count": 111
1500
  }
1501
  }
1502
  },
1503
  "sub_M0": {
1504
+ "full_accuracy": 0.06,
1505
+ "digit_accuracy": 0.6371428571428571,
1506
+ "n_examples": 50,
1507
  "per_subtask": {
1508
  "MD": {
1509
+ "accuracy": 0.5808580858085809,
1510
+ "count": 303
1511
  },
1512
  "ME": {
1513
+ "accuracy": 1.0,
1514
+ "count": 47
1515
  }
1516
  }
1517
  },
1518
  "sub_M1": {
1519
  "full_accuracy": 0.0,
1520
+ "digit_accuracy": 0.5828571428571429,
1521
+ "n_examples": 50,
1522
  "per_subtask": {
1523
  "MD": {
1524
+ "accuracy": 0.6453900709219859,
1525
+ "count": 141
1526
  },
1527
  "MB": {
1528
+ "accuracy": 0.4027777777777778,
1529
+ "count": 72
1530
  },
1531
  "ME": {
1532
+ "accuracy": 0.8888888888888888,
1533
+ "count": 18
1534
  },
1535
  "UB": {
1536
+ "accuracy": 0.5714285714285714,
1537
+ "count": 119
1538
  }
1539
  }
1540
  },
1541
  "sub_M2": {
1542
+ "full_accuracy": 0.02,
1543
+ "digit_accuracy": 0.5885714285714285,
1544
+ "n_examples": 50,
1545
  "per_subtask": {
1546
  "MD": {
1547
+ "accuracy": 0.8035714285714286,
1548
+ "count": 112
1549
  },
1550
  "MB": {
1551
+ "accuracy": 0.33962264150943394,
1552
+ "count": 53
1553
  },
1554
  "ME": {
1555
+ "accuracy": 0.9574468085106383,
1556
+ "count": 47
1557
  },
1558
  "UB": {
1559
+ "accuracy": 0.5058823529411764,
1560
+ "count": 85
1561
  },
1562
  "UD": {
1563
+ "accuracy": 0.18867924528301888,
1564
+ "count": 53
1565
  }
1566
  }
1567
  },
1568
  "sub_M3": {
1569
+ "full_accuracy": 0.0,
1570
+ "digit_accuracy": 0.44857142857142857,
1571
+ "n_examples": 50,
1572
  "per_subtask": {
1573
  "MD": {
1574
+ "accuracy": 0.7628865979381443,
1575
+ "count": 97
1576
  },
1577
  "MB": {
1578
+ "accuracy": 0.19607843137254902,
1579
+ "count": 51
1580
  },
1581
  "ME": {
1582
+ "accuracy": 0.9629629629629629,
1583
+ "count": 27
1584
  },
1585
  "UB": {
1586
+ "accuracy": 0.5675675675675675,
1587
+ "count": 74
1588
  },
1589
  "UD": {
1590
+ "accuracy": 0.04950495049504951,
1591
+ "count": 101
1592
  }
1593
  }
1594
  },
1595
  "sub_M4": {
1596
+ "full_accuracy": 0.02,
1597
+ "digit_accuracy": 0.3485714285714286,
1598
+ "n_examples": 50,
1599
  "per_subtask": {
1600
  "MD": {
1601
+ "accuracy": 0.62,
1602
+ "count": 100
1603
  },
1604
  "MB": {
1605
+ "accuracy": 0.18,
1606
+ "count": 50
1607
  },
1608
  "UB": {
1609
+ "accuracy": 0.78,
1610
+ "count": 50
1611
  },
1612
  "UD": {
1613
+ "accuracy": 0.08,
1614
+ "count": 150
1615
  }
1616
  }
1617
  },
1618
  "sub_M5": {
1619
+ "full_accuracy": 0.02,
1620
+ "digit_accuracy": 0.28285714285714286,
1621
+ "n_examples": 50,
1622
  "per_subtask": {
1623
  "MD": {
1624
  "accuracy": 1.0,
1625
+ "count": 50
1626
  },
1627
  "MB": {
1628
+ "accuracy": 0.02,
1629
+ "count": 50
1630
  },
1631
  "UB": {
1632
+ "accuracy": 0.74,
1633
+ "count": 50
1634
  },
1635
  "UD": {
1636
+ "accuracy": 0.055,
1637
+ "count": 200
1638
  }
1639
  }
1640
  },
1641
  "sub_random": {
1642
+ "full_accuracy": 0.025,
1643
+ "digit_accuracy": 0.5657142857142857,
1644
  "n_examples": 200,
1645
  "per_subtask": {
1646
  "MD": {
1647
+ "accuracy": 0.6385964912280702,
1648
+ "count": 570
1649
  },
1650
  "MB": {
1651
+ "accuracy": 0.45126353790613716,
1652
+ "count": 277
1653
  },
1654
  "ME": {
1655
+ "accuracy": 0.9245283018867925,
1656
  "count": 53
1657
  },
1658
  "UB": {
1659
+ "accuracy": 0.5329087048832272,
1660
+ "count": 471
1661
  },
1662
  "UD": {
1663
+ "accuracy": 0.10344827586206896,
1664
+ "count": 29
1665
  }
1666
  }
1667
  },
1668
  "sub_B3": {
1669
  "full_accuracy": 0.0,
1670
+ "digit_accuracy": 0.4542857142857143,
1671
+ "n_examples": 50,
1672
  "per_subtask": {
1673
  "MD": {
1674
+ "accuracy": 0.5666666666666667,
1675
+ "count": 150
1676
  },
1677
  "MB": {
1678
+ "accuracy": 0.36,
1679
+ "count": 50
1680
  },
1681
  "UB": {
1682
+ "accuracy": 0.5148514851485149,
1683
+ "count": 101
1684
  },
1685
  "UD": {
1686
+ "accuracy": 0.08163265306122448,
1687
+ "count": 49
1688
  }
1689
  }
1690
  },
1691
  "sub_B4": {
1692
  "full_accuracy": 0.0,
1693
+ "digit_accuracy": 0.4342857142857143,
1694
+ "n_examples": 50,
1695
  "per_subtask": {
1696
  "MD": {
1697
+ "accuracy": 0.6,
1698
+ "count": 100
1699
  },
1700
  "MB": {
1701
+ "accuracy": 0.36,
1702
+ "count": 50
1703
  },
1704
  "UB": {
1705
+ "accuracy": 0.4793388429752066,
1706
+ "count": 121
1707
  },
1708
  "UD": {
1709
+ "accuracy": 0.20253164556962025,
1710
+ "count": 79
1711
  }
1712
  }
1713
  },
1714
  "sub_B5": {
1715
  "full_accuracy": 0.0,
1716
+ "digit_accuracy": 0.46,
1717
+ "n_examples": 50,
1718
  "per_subtask": {
1719
  "MD": {
1720
  "accuracy": 1.0,
1721
+ "count": 50
1722
  },
1723
  "MB": {
1724
+ "accuracy": 0.2,
1725
+ "count": 50
1726
  },
1727
  "UB": {
1728
+ "accuracy": 0.4407894736842105,
1729
+ "count": 152
1730
  },
1731
  "UD": {
1732
+ "accuracy": 0.3469387755102041,
1733
+ "count": 98
1734
  }
1735
  }
1736
  }
1737
  },
1738
  "summary": {
1739
+ "overall_accuracy": 0.019333333333333334,
1740
+ "digit_accuracy": 0.522952380952381,
1741
+ "total_examples": 1500,
1742
+ "n_splits": 24
1743
  }
1744
  },
1745
  "sorl_overall_accuracy": 0.020416666666666666,