amirali1985 commited on
Commit
a5704aa
·
verified ·
1 Parent(s): 29e5bb1

Upload add_sub_baseline_25K/metrics.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. add_sub_baseline_25K/metrics.json +211 -146
add_sub_baseline_25K/metrics.json CHANGED
@@ -706,416 +706,477 @@
706
  "K": null,
707
  "mode": "sft",
708
  "n_digits": 6,
709
- "n_per_split": 100
710
  },
711
  "splits": {
712
  "add_S0": {
713
  "full_accuracy": 1.0,
714
- "n_examples": 100,
 
715
  "per_subtask": {
716
  "SA": {
717
  "accuracy": 1.0,
718
- "count": 605
719
  },
720
  "SS": {
721
  "accuracy": 1.0,
722
- "count": 95
723
  }
724
  }
725
  },
726
  "add_S1": {
727
- "full_accuracy": 0.99,
728
- "n_examples": 100,
 
729
  "per_subtask": {
730
  "SA": {
731
- "accuracy": 0.9950980392156863,
732
- "count": 204
733
  },
734
  "SC": {
735
  "accuracy": 1.0,
736
- "count": 169
737
  },
738
  "SS": {
739
  "accuracy": 1.0,
740
- "count": 31
741
  },
742
  "UC": {
743
  "accuracy": 1.0,
744
- "count": 296
745
  }
746
  }
747
  },
748
  "add_S2": {
749
  "full_accuracy": 1.0,
750
- "n_examples": 100,
 
751
  "per_subtask": {
752
  "SA": {
753
  "accuracy": 1.0,
754
- "count": 163
755
  },
756
  "SC": {
757
  "accuracy": 1.0,
758
- "count": 130
759
  },
760
  "SS": {
761
  "accuracy": 1.0,
762
- "count": 87
763
  },
764
  "UC": {
765
  "accuracy": 1.0,
766
- "count": 203
767
  },
768
  "US": {
769
  "accuracy": 1.0,
770
- "count": 117
771
  }
772
  }
773
  },
774
  "add_S3": {
775
- "full_accuracy": 0.98,
776
- "n_examples": 100,
 
777
  "per_subtask": {
778
  "SA": {
779
  "accuracy": 1.0,
780
- "count": 121
781
  },
782
  "SC": {
783
  "accuracy": 1.0,
784
- "count": 121
785
  },
786
  "SS": {
787
  "accuracy": 1.0,
788
- "count": 49
789
  },
790
  "UC": {
791
- "accuracy": 0.989247311827957,
792
- "count": 186
793
  },
794
  "US": {
795
  "accuracy": 1.0,
796
- "count": 223
797
  }
798
  }
799
  },
800
  "add_S4": {
801
- "full_accuracy": 0.88,
802
- "n_examples": 100,
 
803
  "per_subtask": {
804
  "SA": {
805
  "accuracy": 1.0,
806
- "count": 104
807
  },
808
  "SC": {
809
  "accuracy": 1.0,
810
- "count": 106
811
  },
812
  "SS": {
813
  "accuracy": 1.0,
814
- "count": 23
815
  },
816
  "UC": {
817
- "accuracy": 0.925,
818
- "count": 160
819
  },
820
  "US": {
821
- "accuracy": 1.0,
822
- "count": 307
823
  }
824
  }
825
  },
826
  "add_S5": {
827
- "full_accuracy": 0.54,
828
- "n_examples": 100,
 
829
  "per_subtask": {
830
  "SA": {
831
  "accuracy": 1.0,
832
- "count": 100
833
  },
834
  "SC": {
835
  "accuracy": 1.0,
836
- "count": 100
837
  },
838
  "UC": {
839
- "accuracy": 0.66,
840
- "count": 100
841
  },
842
  "US": {
843
- "accuracy": 0.89,
844
- "count": 400
845
  }
846
  }
847
  },
848
  "add_S6": {
849
- "full_accuracy": 0.98,
850
- "n_examples": 100,
 
851
  "per_subtask": {
852
  "SC": {
853
  "accuracy": 1.0,
854
- "count": 100
855
  },
856
  "UC": {
857
- "accuracy": 0.99,
858
- "count": 100
859
  },
860
  "US": {
861
- "accuracy": 0.994,
862
- "count": 500
863
  }
864
  }
865
  },
866
  "add_random": {
867
  "full_accuracy": 1.0,
 
868
  "n_examples": 200,
869
  "per_subtask": {
870
  "SA": {
871
  "accuracy": 1.0,
872
- "count": 447
873
  },
874
  "SC": {
875
  "accuracy": 1.0,
876
- "count": 320
877
  },
878
  "SS": {
879
  "accuracy": 1.0,
880
- "count": 56
881
  },
882
  "UC": {
883
  "accuracy": 1.0,
884
- "count": 529
885
  },
886
  "US": {
887
  "accuracy": 1.0,
888
- "count": 48
889
  }
890
  }
891
  },
892
- "add_C3": {
893
  "full_accuracy": 1.0,
894
- "n_examples": 100,
 
895
  "per_subtask": {
896
  "SA": {
897
  "accuracy": 1.0,
898
- "count": 300
899
  },
900
  "SC": {
901
  "accuracy": 1.0,
902
- "count": 100
903
  },
904
  "UC": {
905
  "accuracy": 1.0,
906
- "count": 193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
907
  },
908
  "US": {
909
  "accuracy": 1.0,
910
- "count": 107
911
  }
912
  }
913
  },
914
- "add_C4": {
915
- "full_accuracy": 0.95,
916
- "n_examples": 100,
 
917
  "per_subtask": {
918
  "SA": {
919
  "accuracy": 1.0,
920
- "count": 200
921
  },
922
  "SC": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
923
  "accuracy": 1.0,
924
  "count": 100
925
  },
 
 
 
 
926
  "UC": {
927
- "accuracy": 0.98046875,
928
- "count": 256
929
  },
930
  "US": {
931
  "accuracy": 1.0,
932
- "count": 144
933
  }
934
  }
935
  },
936
  "add_C5": {
937
- "full_accuracy": 0.91,
938
- "n_examples": 100,
 
939
  "per_subtask": {
940
  "SA": {
941
  "accuracy": 1.0,
942
- "count": 100
943
  },
944
  "SC": {
945
  "accuracy": 1.0,
946
- "count": 100
947
  },
948
  "UC": {
949
- "accuracy": 0.9738562091503268,
950
- "count": 306
951
  },
952
  "US": {
953
- "accuracy": 0.9896907216494846,
954
- "count": 194
955
  }
956
  }
957
  },
958
  "add_C6": {
959
  "full_accuracy": 1.0,
960
- "n_examples": 100,
 
961
  "per_subtask": {
962
  "SC": {
963
  "accuracy": 1.0,
964
- "count": 100
965
  },
966
  "UC": {
967
  "accuracy": 1.0,
968
- "count": 366
969
  },
970
  "US": {
971
  "accuracy": 1.0,
972
- "count": 234
973
  }
974
  }
975
  },
976
  "sub_M0": {
977
  "full_accuracy": 1.0,
978
- "n_examples": 100,
 
979
  "per_subtask": {
980
  "MD": {
981
  "accuracy": 1.0,
982
- "count": 601
983
  },
984
  "ME": {
985
  "accuracy": 1.0,
986
- "count": 99
987
  }
988
  }
989
  },
990
  "sub_M1": {
991
  "full_accuracy": 1.0,
992
- "n_examples": 100,
 
993
  "per_subtask": {
994
  "MD": {
995
  "accuracy": 1.0,
996
- "count": 279
997
  },
998
  "MB": {
999
  "accuracy": 1.0,
1000
- "count": 145
1001
  },
1002
  "ME": {
1003
  "accuracy": 1.0,
1004
- "count": 24
1005
  },
1006
  "UB": {
1007
  "accuracy": 1.0,
1008
- "count": 252
1009
  }
1010
  }
1011
  },
1012
  "sub_M2": {
1013
  "full_accuracy": 1.0,
1014
- "n_examples": 100,
 
1015
  "per_subtask": {
1016
  "MD": {
1017
  "accuracy": 1.0,
1018
- "count": 213
1019
  },
1020
  "MB": {
1021
  "accuracy": 1.0,
1022
- "count": 113
1023
  },
1024
  "ME": {
1025
  "accuracy": 1.0,
1026
- "count": 85
1027
  },
1028
  "UB": {
1029
  "accuracy": 1.0,
1030
- "count": 181
1031
  },
1032
  "UD": {
1033
  "accuracy": 1.0,
1034
- "count": 108
1035
  }
1036
  }
1037
  },
1038
  "sub_M3": {
1039
- "full_accuracy": 1.0,
1040
- "n_examples": 100,
 
1041
  "per_subtask": {
1042
  "MD": {
1043
  "accuracy": 1.0,
1044
- "count": 179
1045
  },
1046
  "MB": {
1047
  "accuracy": 1.0,
1048
- "count": 103
1049
  },
1050
  "ME": {
1051
  "accuracy": 1.0,
1052
- "count": 56
1053
  },
1054
  "UB": {
1055
- "accuracy": 1.0,
1056
- "count": 149
1057
  },
1058
  "UD": {
1059
  "accuracy": 1.0,
1060
- "count": 213
1061
  }
1062
  }
1063
  },
1064
  "sub_M4": {
1065
- "full_accuracy": 0.56,
1066
- "n_examples": 100,
 
1067
  "per_subtask": {
1068
  "MD": {
1069
  "accuracy": 1.0,
1070
- "count": 200
1071
  },
1072
  "MB": {
1073
  "accuracy": 1.0,
1074
- "count": 100
1075
  },
1076
  "UB": {
1077
- "accuracy": 0.57,
1078
- "count": 100
1079
  },
1080
  "UD": {
1081
- "accuracy": 0.9933333333333333,
1082
- "count": 300
1083
  }
1084
  }
1085
  },
1086
  "sub_M5": {
1087
- "full_accuracy": 0.34,
1088
- "n_examples": 100,
 
1089
  "per_subtask": {
1090
  "MD": {
1091
  "accuracy": 1.0,
1092
- "count": 100
1093
  },
1094
  "MB": {
1095
  "accuracy": 1.0,
1096
- "count": 100
1097
  },
1098
  "UB": {
1099
- "accuracy": 0.57,
1100
- "count": 100
1101
  },
1102
  "UD": {
1103
  "accuracy": 0.87,
1104
- "count": 400
1105
  }
1106
  }
1107
  },
1108
  "sub_random": {
1109
  "full_accuracy": 1.0,
 
1110
  "n_examples": 200,
1111
  "per_subtask": {
1112
  "MD": {
1113
  "accuracy": 1.0,
1114
- "count": 600
1115
  },
1116
  "MB": {
1117
  "accuracy": 1.0,
1118
- "count": 267
1119
  },
1120
  "ME": {
1121
  "accuracy": 1.0,
@@ -1123,85 +1184,89 @@
1123
  },
1124
  "UB": {
1125
  "accuracy": 1.0,
1126
- "count": 439
1127
  },
1128
  "UD": {
1129
  "accuracy": 1.0,
1130
- "count": 41
1131
  }
1132
  }
1133
  },
1134
  "sub_B3": {
1135
- "full_accuracy": 0.98,
1136
- "n_examples": 100,
 
1137
  "per_subtask": {
1138
  "MD": {
1139
- "accuracy": 0.9933333333333333,
1140
- "count": 300
1141
  },
1142
  "MB": {
1143
  "accuracy": 1.0,
1144
- "count": 100
1145
  },
1146
  "UB": {
1147
  "accuracy": 1.0,
1148
- "count": 197
1149
  },
1150
  "UD": {
1151
  "accuracy": 1.0,
1152
- "count": 103
1153
  }
1154
  }
1155
  },
1156
  "sub_B4": {
1157
- "full_accuracy": 0.93,
1158
- "n_examples": 100,
 
1159
  "per_subtask": {
1160
  "MD": {
1161
- "accuracy": 0.995,
1162
- "count": 200
1163
  },
1164
  "MB": {
1165
  "accuracy": 1.0,
1166
- "count": 100
1167
  },
1168
  "UB": {
1169
- "accuracy": 0.9757085020242915,
1170
- "count": 247
1171
  },
1172
  "UD": {
1173
  "accuracy": 1.0,
1174
- "count": 153
1175
  }
1176
  }
1177
  },
1178
  "sub_B5": {
1179
- "full_accuracy": 0.95,
1180
- "n_examples": 100,
 
1181
  "per_subtask": {
1182
  "MD": {
1183
  "accuracy": 1.0,
1184
- "count": 100
1185
  },
1186
  "MB": {
1187
  "accuracy": 1.0,
1188
- "count": 100
1189
  },
1190
  "UB": {
1191
- "accuracy": 0.9832214765100671,
1192
- "count": 298
1193
  },
1194
  "UD": {
1195
- "accuracy": 0.9900990099009901,
1196
- "count": 202
1197
  }
1198
  }
1199
  }
1200
  },
1201
  "summary": {
1202
- "overall_accuracy": 0.91625,
1203
- "total_examples": 2400,
1204
- "n_splits": 22
 
1205
  }
1206
  }
1207
  }
 
706
  "K": null,
707
  "mode": "sft",
708
  "n_digits": 6,
709
+ "n_per_split": 50
710
  },
711
  "splits": {
712
  "add_S0": {
713
  "full_accuracy": 1.0,
714
+ "digit_accuracy": 1.0,
715
+ "n_examples": 50,
716
  "per_subtask": {
717
  "SA": {
718
  "accuracy": 1.0,
719
+ "count": 295
720
  },
721
  "SS": {
722
  "accuracy": 1.0,
723
+ "count": 55
724
  }
725
  }
726
  },
727
  "add_S1": {
728
+ "full_accuracy": 1.0,
729
+ "digit_accuracy": 1.0,
730
+ "n_examples": 50,
731
  "per_subtask": {
732
  "SA": {
733
+ "accuracy": 1.0,
734
+ "count": 126
735
  },
736
  "SC": {
737
  "accuracy": 1.0,
738
+ "count": 79
739
  },
740
  "SS": {
741
  "accuracy": 1.0,
742
+ "count": 21
743
  },
744
  "UC": {
745
  "accuracy": 1.0,
746
+ "count": 124
747
  }
748
  }
749
  },
750
  "add_S2": {
751
  "full_accuracy": 1.0,
752
+ "digit_accuracy": 1.0,
753
+ "n_examples": 50,
754
  "per_subtask": {
755
  "SA": {
756
  "accuracy": 1.0,
757
+ "count": 75
758
  },
759
  "SC": {
760
  "accuracy": 1.0,
761
+ "count": 62
762
  },
763
  "SS": {
764
  "accuracy": 1.0,
765
+ "count": 39
766
  },
767
  "UC": {
768
  "accuracy": 1.0,
769
+ "count": 111
770
  },
771
  "US": {
772
  "accuracy": 1.0,
773
+ "count": 63
774
  }
775
  }
776
  },
777
  "add_S3": {
778
+ "full_accuracy": 1.0,
779
+ "digit_accuracy": 1.0,
780
+ "n_examples": 50,
781
  "per_subtask": {
782
  "SA": {
783
  "accuracy": 1.0,
784
+ "count": 60
785
  },
786
  "SC": {
787
  "accuracy": 1.0,
788
+ "count": 57
789
  },
790
  "SS": {
791
  "accuracy": 1.0,
792
+ "count": 19
793
  },
794
  "UC": {
795
+ "accuracy": 1.0,
796
+ "count": 104
797
  },
798
  "US": {
799
  "accuracy": 1.0,
800
+ "count": 110
801
  }
802
  }
803
  },
804
  "add_S4": {
805
+ "full_accuracy": 0.9,
806
+ "digit_accuracy": 0.9857142857142858,
807
+ "n_examples": 50,
808
  "per_subtask": {
809
  "SA": {
810
  "accuracy": 1.0,
811
+ "count": 48
812
  },
813
  "SC": {
814
  "accuracy": 1.0,
815
+ "count": 52
816
  },
817
  "SS": {
818
  "accuracy": 1.0,
819
+ "count": 7
820
  },
821
  "UC": {
822
+ "accuracy": 0.9550561797752809,
823
+ "count": 89
824
  },
825
  "US": {
826
+ "accuracy": 0.9935064935064936,
827
+ "count": 154
828
  }
829
  }
830
  },
831
  "add_S5": {
832
+ "full_accuracy": 0.7,
833
+ "digit_accuracy": 0.9314285714285714,
834
+ "n_examples": 50,
835
  "per_subtask": {
836
  "SA": {
837
  "accuracy": 1.0,
838
+ "count": 50
839
  },
840
  "SC": {
841
  "accuracy": 1.0,
842
+ "count": 50
843
  },
844
  "UC": {
845
+ "accuracy": 0.8,
846
+ "count": 50
847
  },
848
  "US": {
849
+ "accuracy": 0.93,
850
+ "count": 200
851
  }
852
  }
853
  },
854
  "add_S6": {
855
+ "full_accuracy": 0.96,
856
+ "digit_accuracy": 0.9857142857142858,
857
+ "n_examples": 50,
858
  "per_subtask": {
859
  "SC": {
860
  "accuracy": 1.0,
861
+ "count": 50
862
  },
863
  "UC": {
864
+ "accuracy": 0.96,
865
+ "count": 50
866
  },
867
  "US": {
868
+ "accuracy": 0.988,
869
+ "count": 250
870
  }
871
  }
872
  },
873
  "add_random": {
874
  "full_accuracy": 1.0,
875
+ "digit_accuracy": 1.0,
876
  "n_examples": 200,
877
  "per_subtask": {
878
  "SA": {
879
  "accuracy": 1.0,
880
+ "count": 431
881
  },
882
  "SC": {
883
  "accuracy": 1.0,
884
+ "count": 316
885
  },
886
  "SS": {
887
  "accuracy": 1.0,
888
+ "count": 39
889
  },
890
  "UC": {
891
  "accuracy": 1.0,
892
+ "count": 560
893
  },
894
  "US": {
895
  "accuracy": 1.0,
896
+ "count": 54
897
  }
898
  }
899
  },
900
+ "add_C1": {
901
  "full_accuracy": 1.0,
902
+ "digit_accuracy": 1.0,
903
+ "n_examples": 50,
904
  "per_subtask": {
905
  "SA": {
906
  "accuracy": 1.0,
907
+ "count": 250
908
  },
909
  "SC": {
910
  "accuracy": 1.0,
911
+ "count": 50
912
  },
913
  "UC": {
914
  "accuracy": 1.0,
915
+ "count": 50
916
+ }
917
+ }
918
+ },
919
+ "add_C2": {
920
+ "full_accuracy": 1.0,
921
+ "digit_accuracy": 1.0,
922
+ "n_examples": 50,
923
+ "per_subtask": {
924
+ "SA": {
925
+ "accuracy": 1.0,
926
+ "count": 200
927
+ },
928
+ "SC": {
929
+ "accuracy": 1.0,
930
+ "count": 50
931
+ },
932
+ "UC": {
933
+ "accuracy": 1.0,
934
+ "count": 83
935
  },
936
  "US": {
937
  "accuracy": 1.0,
938
+ "count": 17
939
  }
940
  }
941
  },
942
+ "add_C3": {
943
+ "full_accuracy": 0.96,
944
+ "digit_accuracy": 0.9942857142857143,
945
+ "n_examples": 50,
946
  "per_subtask": {
947
  "SA": {
948
  "accuracy": 1.0,
949
+ "count": 150
950
  },
951
  "SC": {
952
+ "accuracy": 1.0,
953
+ "count": 50
954
+ },
955
+ "UC": {
956
+ "accuracy": 0.98,
957
+ "count": 100
958
+ },
959
+ "US": {
960
+ "accuracy": 1.0,
961
+ "count": 50
962
+ }
963
+ }
964
+ },
965
+ "add_C4": {
966
+ "full_accuracy": 0.96,
967
+ "digit_accuracy": 0.9942857142857143,
968
+ "n_examples": 50,
969
+ "per_subtask": {
970
+ "SA": {
971
  "accuracy": 1.0,
972
  "count": 100
973
  },
974
+ "SC": {
975
+ "accuracy": 1.0,
976
+ "count": 50
977
+ },
978
  "UC": {
979
+ "accuracy": 0.9848484848484849,
980
+ "count": 132
981
  },
982
  "US": {
983
  "accuracy": 1.0,
984
+ "count": 68
985
  }
986
  }
987
  },
988
  "add_C5": {
989
+ "full_accuracy": 0.94,
990
+ "digit_accuracy": 0.9914285714285714,
991
+ "n_examples": 50,
992
  "per_subtask": {
993
  "SA": {
994
  "accuracy": 1.0,
995
+ "count": 50
996
  },
997
  "SC": {
998
  "accuracy": 1.0,
999
+ "count": 50
1000
  },
1001
  "UC": {
1002
+ "accuracy": 0.9863013698630136,
1003
+ "count": 146
1004
  },
1005
  "US": {
1006
+ "accuracy": 0.9903846153846154,
1007
+ "count": 104
1008
  }
1009
  }
1010
  },
1011
  "add_C6": {
1012
  "full_accuracy": 1.0,
1013
+ "digit_accuracy": 1.0,
1014
+ "n_examples": 50,
1015
  "per_subtask": {
1016
  "SC": {
1017
  "accuracy": 1.0,
1018
+ "count": 50
1019
  },
1020
  "UC": {
1021
  "accuracy": 1.0,
1022
+ "count": 189
1023
  },
1024
  "US": {
1025
  "accuracy": 1.0,
1026
+ "count": 111
1027
  }
1028
  }
1029
  },
1030
  "sub_M0": {
1031
  "full_accuracy": 1.0,
1032
+ "digit_accuracy": 1.0,
1033
+ "n_examples": 50,
1034
  "per_subtask": {
1035
  "MD": {
1036
  "accuracy": 1.0,
1037
+ "count": 303
1038
  },
1039
  "ME": {
1040
  "accuracy": 1.0,
1041
+ "count": 47
1042
  }
1043
  }
1044
  },
1045
  "sub_M1": {
1046
  "full_accuracy": 1.0,
1047
+ "digit_accuracy": 1.0,
1048
+ "n_examples": 50,
1049
  "per_subtask": {
1050
  "MD": {
1051
  "accuracy": 1.0,
1052
+ "count": 141
1053
  },
1054
  "MB": {
1055
  "accuracy": 1.0,
1056
+ "count": 72
1057
  },
1058
  "ME": {
1059
  "accuracy": 1.0,
1060
+ "count": 18
1061
  },
1062
  "UB": {
1063
  "accuracy": 1.0,
1064
+ "count": 119
1065
  }
1066
  }
1067
  },
1068
  "sub_M2": {
1069
  "full_accuracy": 1.0,
1070
+ "digit_accuracy": 1.0,
1071
+ "n_examples": 50,
1072
  "per_subtask": {
1073
  "MD": {
1074
  "accuracy": 1.0,
1075
+ "count": 112
1076
  },
1077
  "MB": {
1078
  "accuracy": 1.0,
1079
+ "count": 53
1080
  },
1081
  "ME": {
1082
  "accuracy": 1.0,
1083
+ "count": 47
1084
  },
1085
  "UB": {
1086
  "accuracy": 1.0,
1087
+ "count": 85
1088
  },
1089
  "UD": {
1090
  "accuracy": 1.0,
1091
+ "count": 53
1092
  }
1093
  }
1094
  },
1095
  "sub_M3": {
1096
+ "full_accuracy": 0.98,
1097
+ "digit_accuracy": 0.9971428571428571,
1098
+ "n_examples": 50,
1099
  "per_subtask": {
1100
  "MD": {
1101
  "accuracy": 1.0,
1102
+ "count": 97
1103
  },
1104
  "MB": {
1105
  "accuracy": 1.0,
1106
+ "count": 51
1107
  },
1108
  "ME": {
1109
  "accuracy": 1.0,
1110
+ "count": 27
1111
  },
1112
  "UB": {
1113
+ "accuracy": 0.9864864864864865,
1114
+ "count": 74
1115
  },
1116
  "UD": {
1117
  "accuracy": 1.0,
1118
+ "count": 101
1119
  }
1120
  }
1121
  },
1122
  "sub_M4": {
1123
+ "full_accuracy": 0.58,
1124
+ "digit_accuracy": 0.94,
1125
+ "n_examples": 50,
1126
  "per_subtask": {
1127
  "MD": {
1128
  "accuracy": 1.0,
1129
+ "count": 100
1130
  },
1131
  "MB": {
1132
  "accuracy": 1.0,
1133
+ "count": 50
1134
  },
1135
  "UB": {
1136
+ "accuracy": 0.58,
1137
+ "count": 50
1138
  },
1139
  "UD": {
1140
+ "accuracy": 1.0,
1141
+ "count": 150
1142
  }
1143
  }
1144
  },
1145
  "sub_M5": {
1146
+ "full_accuracy": 0.36,
1147
+ "digit_accuracy": 0.86,
1148
+ "n_examples": 50,
1149
  "per_subtask": {
1150
  "MD": {
1151
  "accuracy": 1.0,
1152
+ "count": 50
1153
  },
1154
  "MB": {
1155
  "accuracy": 1.0,
1156
+ "count": 50
1157
  },
1158
  "UB": {
1159
+ "accuracy": 0.54,
1160
+ "count": 50
1161
  },
1162
  "UD": {
1163
  "accuracy": 0.87,
1164
+ "count": 200
1165
  }
1166
  }
1167
  },
1168
  "sub_random": {
1169
  "full_accuracy": 1.0,
1170
+ "digit_accuracy": 1.0,
1171
  "n_examples": 200,
1172
  "per_subtask": {
1173
  "MD": {
1174
  "accuracy": 1.0,
1175
+ "count": 570
1176
  },
1177
  "MB": {
1178
  "accuracy": 1.0,
1179
+ "count": 277
1180
  },
1181
  "ME": {
1182
  "accuracy": 1.0,
 
1184
  },
1185
  "UB": {
1186
  "accuracy": 1.0,
1187
+ "count": 471
1188
  },
1189
  "UD": {
1190
  "accuracy": 1.0,
1191
+ "count": 29
1192
  }
1193
  }
1194
  },
1195
  "sub_B3": {
1196
+ "full_accuracy": 1.0,
1197
+ "digit_accuracy": 1.0,
1198
+ "n_examples": 50,
1199
  "per_subtask": {
1200
  "MD": {
1201
+ "accuracy": 1.0,
1202
+ "count": 150
1203
  },
1204
  "MB": {
1205
  "accuracy": 1.0,
1206
+ "count": 50
1207
  },
1208
  "UB": {
1209
  "accuracy": 1.0,
1210
+ "count": 101
1211
  },
1212
  "UD": {
1213
  "accuracy": 1.0,
1214
+ "count": 49
1215
  }
1216
  }
1217
  },
1218
  "sub_B4": {
1219
+ "full_accuracy": 0.9,
1220
+ "digit_accuracy": 0.9857142857142858,
1221
+ "n_examples": 50,
1222
  "per_subtask": {
1223
  "MD": {
1224
+ "accuracy": 1.0,
1225
+ "count": 100
1226
  },
1227
  "MB": {
1228
  "accuracy": 1.0,
1229
+ "count": 50
1230
  },
1231
  "UB": {
1232
+ "accuracy": 0.9586776859504132,
1233
+ "count": 121
1234
  },
1235
  "UD": {
1236
  "accuracy": 1.0,
1237
+ "count": 79
1238
  }
1239
  }
1240
  },
1241
  "sub_B5": {
1242
+ "full_accuracy": 0.92,
1243
+ "digit_accuracy": 0.9885714285714285,
1244
+ "n_examples": 50,
1245
  "per_subtask": {
1246
  "MD": {
1247
  "accuracy": 1.0,
1248
+ "count": 50
1249
  },
1250
  "MB": {
1251
  "accuracy": 1.0,
1252
+ "count": 50
1253
  },
1254
  "UB": {
1255
+ "accuracy": 0.9736842105263158,
1256
+ "count": 152
1257
  },
1258
  "UD": {
1259
+ "accuracy": 1.0,
1260
+ "count": 98
1261
  }
1262
  }
1263
  }
1264
  },
1265
  "summary": {
1266
+ "overall_accuracy": 0.938,
1267
+ "digit_accuracy": 0.9884761904761905,
1268
+ "total_examples": 1500,
1269
+ "n_splits": 24
1270
  }
1271
  }
1272
  }