amirali1985 commited on
Commit
2f7ecac
·
verified ·
1 Parent(s): 6d2f17e

Upload add_sub_sorl_v1_abs16_K1_100K/metrics.json with huggingface_hub

Browse files
add_sub_sorl_v1_abs16_K1_100K/metrics.json CHANGED
@@ -2530,416 +2530,477 @@
2530
  "K": null,
2531
  "mode": "sft",
2532
  "n_digits": 6,
2533
- "n_per_split": 100
2534
  },
2535
  "splits": {
2536
  "add_S0": {
2537
  "full_accuracy": 1.0,
2538
- "n_examples": 100,
 
2539
  "per_subtask": {
2540
  "SA": {
2541
  "accuracy": 1.0,
2542
- "count": 605
2543
  },
2544
  "SS": {
2545
  "accuracy": 1.0,
2546
- "count": 95
2547
  }
2548
  }
2549
  },
2550
  "add_S1": {
2551
  "full_accuracy": 1.0,
2552
- "n_examples": 100,
 
2553
  "per_subtask": {
2554
  "SA": {
2555
  "accuracy": 1.0,
2556
- "count": 204
2557
  },
2558
  "SC": {
2559
  "accuracy": 1.0,
2560
- "count": 169
2561
  },
2562
  "SS": {
2563
  "accuracy": 1.0,
2564
- "count": 31
2565
  },
2566
  "UC": {
2567
  "accuracy": 1.0,
2568
- "count": 296
2569
  }
2570
  }
2571
  },
2572
  "add_S2": {
2573
  "full_accuracy": 1.0,
2574
- "n_examples": 100,
 
2575
  "per_subtask": {
2576
  "SA": {
2577
  "accuracy": 1.0,
2578
- "count": 163
2579
  },
2580
  "SC": {
2581
  "accuracy": 1.0,
2582
- "count": 130
2583
  },
2584
  "SS": {
2585
  "accuracy": 1.0,
2586
- "count": 87
2587
  },
2588
  "UC": {
2589
  "accuracy": 1.0,
2590
- "count": 203
2591
  },
2592
  "US": {
2593
  "accuracy": 1.0,
2594
- "count": 117
2595
  }
2596
  }
2597
  },
2598
  "add_S3": {
2599
- "full_accuracy": 0.98,
2600
- "n_examples": 100,
 
2601
  "per_subtask": {
2602
  "SA": {
2603
  "accuracy": 1.0,
2604
- "count": 121
2605
  },
2606
  "SC": {
2607
  "accuracy": 1.0,
2608
- "count": 121
2609
  },
2610
  "SS": {
2611
  "accuracy": 1.0,
2612
- "count": 49
2613
  },
2614
  "UC": {
2615
- "accuracy": 0.989247311827957,
2616
- "count": 186
2617
  },
2618
  "US": {
2619
  "accuracy": 1.0,
2620
- "count": 223
2621
  }
2622
  }
2623
  },
2624
  "add_S4": {
2625
- "full_accuracy": 0.79,
2626
- "n_examples": 100,
 
2627
  "per_subtask": {
2628
  "SA": {
2629
  "accuracy": 1.0,
2630
- "count": 104
2631
  },
2632
  "SC": {
2633
  "accuracy": 1.0,
2634
- "count": 106
2635
  },
2636
  "SS": {
2637
  "accuracy": 1.0,
2638
- "count": 23
2639
  },
2640
  "UC": {
2641
- "accuracy": 0.86875,
2642
- "count": 160
2643
  },
2644
  "US": {
2645
- "accuracy": 0.9869706840390879,
2646
- "count": 307
2647
  }
2648
  }
2649
  },
2650
  "add_S5": {
2651
- "full_accuracy": 0.64,
2652
- "n_examples": 100,
 
2653
  "per_subtask": {
2654
  "SA": {
2655
  "accuracy": 1.0,
2656
- "count": 100
2657
  },
2658
  "SC": {
2659
  "accuracy": 1.0,
2660
- "count": 100
2661
  },
2662
  "UC": {
2663
- "accuracy": 0.71,
2664
- "count": 100
2665
  },
2666
  "US": {
2667
- "accuracy": 0.9025,
2668
- "count": 400
2669
  }
2670
  }
2671
  },
2672
  "add_S6": {
2673
- "full_accuracy": 0.88,
2674
- "n_examples": 100,
 
2675
  "per_subtask": {
2676
  "SC": {
2677
  "accuracy": 1.0,
2678
- "count": 100
2679
  },
2680
  "UC": {
2681
  "accuracy": 0.94,
2682
- "count": 100
2683
  },
2684
  "US": {
2685
- "accuracy": 0.96,
2686
- "count": 500
2687
  }
2688
  }
2689
  },
2690
  "add_random": {
2691
  "full_accuracy": 1.0,
 
2692
  "n_examples": 200,
2693
  "per_subtask": {
2694
  "SA": {
2695
  "accuracy": 1.0,
2696
- "count": 447
2697
  },
2698
  "SC": {
2699
  "accuracy": 1.0,
2700
- "count": 320
2701
  },
2702
  "SS": {
2703
  "accuracy": 1.0,
2704
- "count": 56
2705
  },
2706
  "UC": {
2707
  "accuracy": 1.0,
2708
- "count": 529
2709
  },
2710
  "US": {
2711
  "accuracy": 1.0,
2712
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2713
  }
2714
  }
2715
  },
2716
  "add_C3": {
2717
- "full_accuracy": 0.99,
2718
- "n_examples": 100,
 
2719
  "per_subtask": {
2720
  "SA": {
2721
  "accuracy": 1.0,
2722
- "count": 300
2723
  },
2724
  "SC": {
2725
  "accuracy": 1.0,
2726
- "count": 100
2727
  },
2728
  "UC": {
2729
- "accuracy": 0.9948186528497409,
2730
- "count": 193
2731
  },
2732
  "US": {
2733
  "accuracy": 1.0,
2734
- "count": 107
2735
  }
2736
  }
2737
  },
2738
  "add_C4": {
2739
- "full_accuracy": 0.94,
2740
- "n_examples": 100,
 
2741
  "per_subtask": {
2742
  "SA": {
2743
- "accuracy": 0.995,
2744
- "count": 200
2745
  },
2746
  "SC": {
2747
  "accuracy": 1.0,
2748
- "count": 100
2749
  },
2750
  "UC": {
2751
- "accuracy": 0.98046875,
2752
- "count": 256
2753
  },
2754
  "US": {
2755
- "accuracy": 0.9930555555555556,
2756
- "count": 144
2757
  }
2758
  }
2759
  },
2760
  "add_C5": {
2761
- "full_accuracy": 0.92,
2762
- "n_examples": 100,
 
2763
  "per_subtask": {
2764
  "SA": {
2765
  "accuracy": 1.0,
2766
- "count": 100
2767
  },
2768
  "SC": {
2769
  "accuracy": 1.0,
2770
- "count": 100
2771
  },
2772
  "UC": {
2773
- "accuracy": 0.9738562091503268,
2774
- "count": 306
2775
  },
2776
  "US": {
2777
- "accuracy": 0.9948453608247423,
2778
- "count": 194
2779
  }
2780
  }
2781
  },
2782
  "add_C6": {
2783
  "full_accuracy": 1.0,
2784
- "n_examples": 100,
 
2785
  "per_subtask": {
2786
  "SC": {
2787
  "accuracy": 1.0,
2788
- "count": 100
2789
  },
2790
  "UC": {
2791
  "accuracy": 1.0,
2792
- "count": 366
2793
  },
2794
  "US": {
2795
  "accuracy": 1.0,
2796
- "count": 234
2797
  }
2798
  }
2799
  },
2800
  "sub_M0": {
2801
  "full_accuracy": 1.0,
2802
- "n_examples": 100,
 
2803
  "per_subtask": {
2804
  "MD": {
2805
  "accuracy": 1.0,
2806
- "count": 601
2807
  },
2808
  "ME": {
2809
  "accuracy": 1.0,
2810
- "count": 99
2811
  }
2812
  }
2813
  },
2814
  "sub_M1": {
2815
- "full_accuracy": 0.99,
2816
- "n_examples": 100,
 
2817
  "per_subtask": {
2818
  "MD": {
2819
  "accuracy": 1.0,
2820
- "count": 279
2821
  },
2822
  "MB": {
2823
  "accuracy": 1.0,
2824
- "count": 145
2825
  },
2826
  "ME": {
2827
  "accuracy": 1.0,
2828
- "count": 24
2829
  },
2830
  "UB": {
2831
- "accuracy": 0.996031746031746,
2832
- "count": 252
2833
  }
2834
  }
2835
  },
2836
  "sub_M2": {
2837
  "full_accuracy": 1.0,
2838
- "n_examples": 100,
 
2839
  "per_subtask": {
2840
  "MD": {
2841
  "accuracy": 1.0,
2842
- "count": 213
2843
  },
2844
  "MB": {
2845
  "accuracy": 1.0,
2846
- "count": 113
2847
  },
2848
  "ME": {
2849
  "accuracy": 1.0,
2850
- "count": 85
2851
  },
2852
  "UB": {
2853
  "accuracy": 1.0,
2854
- "count": 181
2855
  },
2856
  "UD": {
2857
  "accuracy": 1.0,
2858
- "count": 108
2859
  }
2860
  }
2861
  },
2862
  "sub_M3": {
2863
  "full_accuracy": 1.0,
2864
- "n_examples": 100,
 
2865
  "per_subtask": {
2866
  "MD": {
2867
  "accuracy": 1.0,
2868
- "count": 179
2869
  },
2870
  "MB": {
2871
  "accuracy": 1.0,
2872
- "count": 103
2873
  },
2874
  "ME": {
2875
  "accuracy": 1.0,
2876
- "count": 56
2877
  },
2878
  "UB": {
2879
  "accuracy": 1.0,
2880
- "count": 149
2881
  },
2882
  "UD": {
2883
  "accuracy": 1.0,
2884
- "count": 213
2885
  }
2886
  }
2887
  },
2888
  "sub_M4": {
2889
- "full_accuracy": 0.57,
2890
- "n_examples": 100,
 
2891
  "per_subtask": {
2892
  "MD": {
2893
  "accuracy": 1.0,
2894
- "count": 200
2895
  },
2896
  "MB": {
2897
  "accuracy": 1.0,
2898
- "count": 100
2899
  },
2900
  "UB": {
2901
- "accuracy": 0.58,
2902
- "count": 100
2903
  },
2904
  "UD": {
2905
- "accuracy": 0.9966666666666667,
2906
- "count": 300
2907
  }
2908
  }
2909
  },
2910
  "sub_M5": {
2911
- "full_accuracy": 0.35,
2912
- "n_examples": 100,
 
2913
  "per_subtask": {
2914
  "MD": {
2915
  "accuracy": 1.0,
2916
- "count": 100
2917
  },
2918
  "MB": {
2919
  "accuracy": 1.0,
2920
- "count": 100
2921
  },
2922
  "UB": {
2923
- "accuracy": 0.61,
2924
- "count": 100
2925
  },
2926
  "UD": {
2927
- "accuracy": 0.86,
2928
- "count": 400
2929
  }
2930
  }
2931
  },
2932
  "sub_random": {
2933
  "full_accuracy": 1.0,
 
2934
  "n_examples": 200,
2935
  "per_subtask": {
2936
  "MD": {
2937
  "accuracy": 1.0,
2938
- "count": 600
2939
  },
2940
  "MB": {
2941
  "accuracy": 1.0,
2942
- "count": 267
2943
  },
2944
  "ME": {
2945
  "accuracy": 1.0,
@@ -2947,85 +3008,89 @@
2947
  },
2948
  "UB": {
2949
  "accuracy": 1.0,
2950
- "count": 439
2951
  },
2952
  "UD": {
2953
  "accuracy": 1.0,
2954
- "count": 41
2955
  }
2956
  }
2957
  },
2958
  "sub_B3": {
2959
- "full_accuracy": 0.99,
2960
- "n_examples": 100,
 
2961
  "per_subtask": {
2962
  "MD": {
2963
  "accuracy": 1.0,
2964
- "count": 300
2965
  },
2966
  "MB": {
2967
  "accuracy": 1.0,
2968
- "count": 100
2969
  },
2970
  "UB": {
2971
- "accuracy": 0.9949238578680203,
2972
- "count": 197
2973
  },
2974
  "UD": {
2975
  "accuracy": 1.0,
2976
- "count": 103
2977
  }
2978
  }
2979
  },
2980
  "sub_B4": {
2981
  "full_accuracy": 0.94,
2982
- "n_examples": 100,
 
2983
  "per_subtask": {
2984
  "MD": {
2985
  "accuracy": 1.0,
2986
- "count": 200
2987
  },
2988
  "MB": {
2989
  "accuracy": 1.0,
2990
- "count": 100
2991
  },
2992
  "UB": {
2993
- "accuracy": 0.9757085020242915,
2994
- "count": 247
2995
  },
2996
  "UD": {
2997
  "accuracy": 1.0,
2998
- "count": 153
2999
  }
3000
  }
3001
  },
3002
  "sub_B5": {
3003
  "full_accuracy": 0.98,
3004
- "n_examples": 100,
 
3005
  "per_subtask": {
3006
  "MD": {
3007
  "accuracy": 1.0,
3008
- "count": 100
3009
  },
3010
  "MB": {
3011
  "accuracy": 1.0,
3012
- "count": 100
3013
  },
3014
  "UB": {
3015
- "accuracy": 0.9966442953020134,
3016
- "count": 298
3017
  },
3018
  "UD": {
3019
- "accuracy": 0.995049504950495,
3020
- "count": 202
3021
  }
3022
  }
3023
  }
3024
  },
3025
  "summary": {
3026
- "overall_accuracy": 0.9145833333333333,
3027
- "total_examples": 2400,
3028
- "n_splits": 22
 
3029
  }
3030
  },
3031
  "sorl_eval": {
@@ -3034,416 +3099,477 @@
3034
  "K": 1,
3035
  "mode": "sorl",
3036
  "n_digits": 6,
3037
- "n_per_split": 100
3038
  },
3039
  "splits": {
3040
  "add_S0": {
3041
  "full_accuracy": 1.0,
3042
- "n_examples": 100,
 
3043
  "per_subtask": {
3044
  "SA": {
3045
  "accuracy": 1.0,
3046
- "count": 605
3047
  },
3048
  "SS": {
3049
  "accuracy": 1.0,
3050
- "count": 95
3051
  }
3052
  }
3053
  },
3054
  "add_S1": {
3055
  "full_accuracy": 1.0,
3056
- "n_examples": 100,
 
3057
  "per_subtask": {
3058
  "SA": {
3059
  "accuracy": 1.0,
3060
- "count": 204
3061
  },
3062
  "SC": {
3063
  "accuracy": 1.0,
3064
- "count": 169
3065
  },
3066
  "SS": {
3067
  "accuracy": 1.0,
3068
- "count": 31
3069
  },
3070
  "UC": {
3071
  "accuracy": 1.0,
3072
- "count": 296
3073
  }
3074
  }
3075
  },
3076
  "add_S2": {
3077
  "full_accuracy": 1.0,
3078
- "n_examples": 100,
 
3079
  "per_subtask": {
3080
  "SA": {
3081
  "accuracy": 1.0,
3082
- "count": 163
3083
  },
3084
  "SC": {
3085
  "accuracy": 1.0,
3086
- "count": 130
3087
  },
3088
  "SS": {
3089
  "accuracy": 1.0,
3090
- "count": 87
3091
  },
3092
  "UC": {
3093
  "accuracy": 1.0,
3094
- "count": 203
3095
  },
3096
  "US": {
3097
  "accuracy": 1.0,
3098
- "count": 117
3099
  }
3100
  }
3101
  },
3102
  "add_S3": {
3103
  "full_accuracy": 1.0,
3104
- "n_examples": 100,
 
3105
  "per_subtask": {
3106
  "SA": {
3107
  "accuracy": 1.0,
3108
- "count": 121
3109
  },
3110
  "SC": {
3111
  "accuracy": 1.0,
3112
- "count": 121
3113
  },
3114
  "SS": {
3115
  "accuracy": 1.0,
3116
- "count": 49
3117
  },
3118
  "UC": {
3119
  "accuracy": 1.0,
3120
- "count": 186
3121
  },
3122
  "US": {
3123
  "accuracy": 1.0,
3124
- "count": 223
3125
  }
3126
  }
3127
  },
3128
  "add_S4": {
3129
  "full_accuracy": 1.0,
3130
- "n_examples": 100,
 
3131
  "per_subtask": {
3132
  "SA": {
3133
  "accuracy": 1.0,
3134
- "count": 104
3135
  },
3136
  "SC": {
3137
  "accuracy": 1.0,
3138
- "count": 106
3139
  },
3140
  "SS": {
3141
  "accuracy": 1.0,
3142
- "count": 23
3143
  },
3144
  "UC": {
3145
  "accuracy": 1.0,
3146
- "count": 160
3147
  },
3148
  "US": {
3149
  "accuracy": 1.0,
3150
- "count": 307
3151
  }
3152
  }
3153
  },
3154
  "add_S5": {
3155
- "full_accuracy": 0.99,
3156
- "n_examples": 100,
 
3157
  "per_subtask": {
3158
  "SA": {
3159
  "accuracy": 1.0,
3160
- "count": 100
3161
  },
3162
  "SC": {
3163
  "accuracy": 1.0,
3164
- "count": 100
3165
  },
3166
  "UC": {
3167
- "accuracy": 0.99,
3168
- "count": 100
3169
  },
3170
  "US": {
3171
  "accuracy": 1.0,
3172
- "count": 400
3173
  }
3174
  }
3175
  },
3176
  "add_S6": {
3177
  "full_accuracy": 0.96,
3178
- "n_examples": 100,
 
3179
  "per_subtask": {
3180
  "SC": {
3181
  "accuracy": 1.0,
3182
- "count": 100
3183
  },
3184
  "UC": {
3185
  "accuracy": 0.96,
3186
- "count": 100
3187
  },
3188
  "US": {
3189
- "accuracy": 0.998,
3190
- "count": 500
3191
  }
3192
  }
3193
  },
3194
  "add_random": {
3195
  "full_accuracy": 1.0,
 
3196
  "n_examples": 200,
3197
  "per_subtask": {
3198
  "SA": {
3199
  "accuracy": 1.0,
3200
- "count": 447
3201
  },
3202
  "SC": {
3203
  "accuracy": 1.0,
3204
- "count": 320
3205
  },
3206
  "SS": {
3207
  "accuracy": 1.0,
3208
- "count": 56
3209
  },
3210
  "UC": {
3211
  "accuracy": 1.0,
3212
- "count": 529
3213
  },
3214
  "US": {
3215
  "accuracy": 1.0,
3216
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3217
  }
3218
  }
3219
  },
3220
  "add_C3": {
3221
  "full_accuracy": 1.0,
3222
- "n_examples": 100,
 
3223
  "per_subtask": {
3224
  "SA": {
3225
  "accuracy": 1.0,
3226
- "count": 300
3227
  },
3228
  "SC": {
3229
  "accuracy": 1.0,
3230
- "count": 100
3231
  },
3232
  "UC": {
3233
  "accuracy": 1.0,
3234
- "count": 193
3235
  },
3236
  "US": {
3237
  "accuracy": 1.0,
3238
- "count": 107
3239
  }
3240
  }
3241
  },
3242
  "add_C4": {
3243
- "full_accuracy": 0.99,
3244
- "n_examples": 100,
 
3245
  "per_subtask": {
3246
  "SA": {
3247
  "accuracy": 1.0,
3248
- "count": 200
3249
  },
3250
  "SC": {
3251
  "accuracy": 1.0,
3252
- "count": 100
3253
  },
3254
  "UC": {
3255
- "accuracy": 0.99609375,
3256
- "count": 256
3257
  },
3258
  "US": {
3259
  "accuracy": 1.0,
3260
- "count": 144
3261
  }
3262
  }
3263
  },
3264
  "add_C5": {
3265
  "full_accuracy": 1.0,
3266
- "n_examples": 100,
 
3267
  "per_subtask": {
3268
  "SA": {
3269
  "accuracy": 1.0,
3270
- "count": 100
3271
  },
3272
  "SC": {
3273
  "accuracy": 1.0,
3274
- "count": 100
3275
  },
3276
  "UC": {
3277
  "accuracy": 1.0,
3278
- "count": 306
3279
  },
3280
  "US": {
3281
  "accuracy": 1.0,
3282
- "count": 194
3283
  }
3284
  }
3285
  },
3286
  "add_C6": {
3287
  "full_accuracy": 1.0,
3288
- "n_examples": 100,
 
3289
  "per_subtask": {
3290
  "SC": {
3291
  "accuracy": 1.0,
3292
- "count": 100
3293
  },
3294
  "UC": {
3295
  "accuracy": 1.0,
3296
- "count": 366
3297
  },
3298
  "US": {
3299
  "accuracy": 1.0,
3300
- "count": 234
3301
  }
3302
  }
3303
  },
3304
  "sub_M0": {
3305
  "full_accuracy": 1.0,
3306
- "n_examples": 100,
 
3307
  "per_subtask": {
3308
  "MD": {
3309
  "accuracy": 1.0,
3310
- "count": 601
3311
  },
3312
  "ME": {
3313
  "accuracy": 1.0,
3314
- "count": 99
3315
  }
3316
  }
3317
  },
3318
  "sub_M1": {
3319
  "full_accuracy": 1.0,
3320
- "n_examples": 100,
 
3321
  "per_subtask": {
3322
  "MD": {
3323
  "accuracy": 1.0,
3324
- "count": 279
3325
  },
3326
  "MB": {
3327
  "accuracy": 1.0,
3328
- "count": 145
3329
  },
3330
  "ME": {
3331
  "accuracy": 1.0,
3332
- "count": 24
3333
  },
3334
  "UB": {
3335
  "accuracy": 1.0,
3336
- "count": 252
3337
  }
3338
  }
3339
  },
3340
  "sub_M2": {
3341
  "full_accuracy": 1.0,
3342
- "n_examples": 100,
 
3343
  "per_subtask": {
3344
  "MD": {
3345
  "accuracy": 1.0,
3346
- "count": 213
3347
  },
3348
  "MB": {
3349
  "accuracy": 1.0,
3350
- "count": 113
3351
  },
3352
  "ME": {
3353
  "accuracy": 1.0,
3354
- "count": 85
3355
  },
3356
  "UB": {
3357
  "accuracy": 1.0,
3358
- "count": 181
3359
  },
3360
  "UD": {
3361
  "accuracy": 1.0,
3362
- "count": 108
3363
  }
3364
  }
3365
  },
3366
  "sub_M3": {
3367
  "full_accuracy": 1.0,
3368
- "n_examples": 100,
 
3369
  "per_subtask": {
3370
  "MD": {
3371
  "accuracy": 1.0,
3372
- "count": 179
3373
  },
3374
  "MB": {
3375
  "accuracy": 1.0,
3376
- "count": 103
3377
  },
3378
  "ME": {
3379
  "accuracy": 1.0,
3380
- "count": 56
3381
  },
3382
  "UB": {
3383
  "accuracy": 1.0,
3384
- "count": 149
3385
  },
3386
  "UD": {
3387
  "accuracy": 1.0,
3388
- "count": 213
3389
  }
3390
  }
3391
  },
3392
  "sub_M4": {
3393
  "full_accuracy": 1.0,
3394
- "n_examples": 100,
 
3395
  "per_subtask": {
3396
  "MD": {
3397
  "accuracy": 1.0,
3398
- "count": 200
3399
  },
3400
  "MB": {
3401
  "accuracy": 1.0,
3402
- "count": 100
3403
  },
3404
  "UB": {
3405
  "accuracy": 1.0,
3406
- "count": 100
3407
  },
3408
  "UD": {
3409
  "accuracy": 1.0,
3410
- "count": 300
3411
  }
3412
  }
3413
  },
3414
  "sub_M5": {
3415
- "full_accuracy": 0.93,
3416
- "n_examples": 100,
 
3417
  "per_subtask": {
3418
  "MD": {
3419
  "accuracy": 1.0,
3420
- "count": 100
3421
  },
3422
  "MB": {
3423
  "accuracy": 1.0,
3424
- "count": 100
3425
  },
3426
  "UB": {
3427
- "accuracy": 0.96,
3428
- "count": 100
3429
  },
3430
  "UD": {
3431
- "accuracy": 0.9925,
3432
- "count": 400
3433
  }
3434
  }
3435
  },
3436
  "sub_random": {
3437
  "full_accuracy": 1.0,
 
3438
  "n_examples": 200,
3439
  "per_subtask": {
3440
  "MD": {
3441
  "accuracy": 1.0,
3442
- "count": 600
3443
  },
3444
  "MB": {
3445
  "accuracy": 1.0,
3446
- "count": 267
3447
  },
3448
  "ME": {
3449
  "accuracy": 1.0,
@@ -3451,85 +3577,89 @@
3451
  },
3452
  "UB": {
3453
  "accuracy": 1.0,
3454
- "count": 439
3455
  },
3456
  "UD": {
3457
  "accuracy": 1.0,
3458
- "count": 41
3459
  }
3460
  }
3461
  },
3462
  "sub_B3": {
3463
  "full_accuracy": 1.0,
3464
- "n_examples": 100,
 
3465
  "per_subtask": {
3466
  "MD": {
3467
  "accuracy": 1.0,
3468
- "count": 300
3469
  },
3470
  "MB": {
3471
  "accuracy": 1.0,
3472
- "count": 100
3473
  },
3474
  "UB": {
3475
  "accuracy": 1.0,
3476
- "count": 197
3477
  },
3478
  "UD": {
3479
  "accuracy": 1.0,
3480
- "count": 103
3481
  }
3482
  }
3483
  },
3484
  "sub_B4": {
3485
  "full_accuracy": 1.0,
3486
- "n_examples": 100,
 
3487
  "per_subtask": {
3488
  "MD": {
3489
  "accuracy": 1.0,
3490
- "count": 200
3491
  },
3492
  "MB": {
3493
  "accuracy": 1.0,
3494
- "count": 100
3495
  },
3496
  "UB": {
3497
  "accuracy": 1.0,
3498
- "count": 247
3499
  },
3500
  "UD": {
3501
  "accuracy": 1.0,
3502
- "count": 153
3503
  }
3504
  }
3505
  },
3506
  "sub_B5": {
3507
  "full_accuracy": 1.0,
3508
- "n_examples": 100,
 
3509
  "per_subtask": {
3510
  "MD": {
3511
  "accuracy": 1.0,
3512
- "count": 100
3513
  },
3514
  "MB": {
3515
  "accuracy": 1.0,
3516
- "count": 100
3517
  },
3518
  "UB": {
3519
  "accuracy": 1.0,
3520
- "count": 298
3521
  },
3522
  "UD": {
3523
  "accuracy": 1.0,
3524
- "count": 202
3525
  }
3526
  }
3527
  }
3528
  },
3529
  "summary": {
3530
- "overall_accuracy": 0.9945833333333334,
3531
- "total_examples": 2400,
3532
- "n_splits": 22
 
3533
  }
3534
  },
3535
  "sorl_overall_accuracy": 0.9945833333333334,
 
2530
  "K": null,
2531
  "mode": "sft",
2532
  "n_digits": 6,
2533
+ "n_per_split": 50
2534
  },
2535
  "splits": {
2536
  "add_S0": {
2537
  "full_accuracy": 1.0,
2538
+ "digit_accuracy": 1.0,
2539
+ "n_examples": 50,
2540
  "per_subtask": {
2541
  "SA": {
2542
  "accuracy": 1.0,
2543
+ "count": 295
2544
  },
2545
  "SS": {
2546
  "accuracy": 1.0,
2547
+ "count": 55
2548
  }
2549
  }
2550
  },
2551
  "add_S1": {
2552
  "full_accuracy": 1.0,
2553
+ "digit_accuracy": 1.0,
2554
+ "n_examples": 50,
2555
  "per_subtask": {
2556
  "SA": {
2557
  "accuracy": 1.0,
2558
+ "count": 126
2559
  },
2560
  "SC": {
2561
  "accuracy": 1.0,
2562
+ "count": 79
2563
  },
2564
  "SS": {
2565
  "accuracy": 1.0,
2566
+ "count": 21
2567
  },
2568
  "UC": {
2569
  "accuracy": 1.0,
2570
+ "count": 124
2571
  }
2572
  }
2573
  },
2574
  "add_S2": {
2575
  "full_accuracy": 1.0,
2576
+ "digit_accuracy": 1.0,
2577
+ "n_examples": 50,
2578
  "per_subtask": {
2579
  "SA": {
2580
  "accuracy": 1.0,
2581
+ "count": 75
2582
  },
2583
  "SC": {
2584
  "accuracy": 1.0,
2585
+ "count": 62
2586
  },
2587
  "SS": {
2588
  "accuracy": 1.0,
2589
+ "count": 39
2590
  },
2591
  "UC": {
2592
  "accuracy": 1.0,
2593
+ "count": 111
2594
  },
2595
  "US": {
2596
  "accuracy": 1.0,
2597
+ "count": 63
2598
  }
2599
  }
2600
  },
2601
  "add_S3": {
2602
+ "full_accuracy": 1.0,
2603
+ "digit_accuracy": 1.0,
2604
+ "n_examples": 50,
2605
  "per_subtask": {
2606
  "SA": {
2607
  "accuracy": 1.0,
2608
+ "count": 60
2609
  },
2610
  "SC": {
2611
  "accuracy": 1.0,
2612
+ "count": 57
2613
  },
2614
  "SS": {
2615
  "accuracy": 1.0,
2616
+ "count": 19
2617
  },
2618
  "UC": {
2619
+ "accuracy": 1.0,
2620
+ "count": 104
2621
  },
2622
  "US": {
2623
  "accuracy": 1.0,
2624
+ "count": 110
2625
  }
2626
  }
2627
  },
2628
  "add_S4": {
2629
+ "full_accuracy": 0.9,
2630
+ "digit_accuracy": 0.9828571428571429,
2631
+ "n_examples": 50,
2632
  "per_subtask": {
2633
  "SA": {
2634
  "accuracy": 1.0,
2635
+ "count": 48
2636
  },
2637
  "SC": {
2638
  "accuracy": 1.0,
2639
+ "count": 52
2640
  },
2641
  "SS": {
2642
  "accuracy": 1.0,
2643
+ "count": 7
2644
  },
2645
  "UC": {
2646
+ "accuracy": 0.9438202247191011,
2647
+ "count": 89
2648
  },
2649
  "US": {
2650
+ "accuracy": 0.9935064935064936,
2651
+ "count": 154
2652
  }
2653
  }
2654
  },
2655
  "add_S5": {
2656
+ "full_accuracy": 0.58,
2657
+ "digit_accuracy": 0.8971428571428571,
2658
+ "n_examples": 50,
2659
  "per_subtask": {
2660
  "SA": {
2661
  "accuracy": 1.0,
2662
+ "count": 50
2663
  },
2664
  "SC": {
2665
  "accuracy": 1.0,
2666
+ "count": 50
2667
  },
2668
  "UC": {
2669
+ "accuracy": 0.72,
2670
+ "count": 50
2671
  },
2672
  "US": {
2673
+ "accuracy": 0.89,
2674
+ "count": 200
2675
  }
2676
  }
2677
  },
2678
  "add_S6": {
2679
+ "full_accuracy": 0.92,
2680
+ "digit_accuracy": 0.9685714285714285,
2681
+ "n_examples": 50,
2682
  "per_subtask": {
2683
  "SC": {
2684
  "accuracy": 1.0,
2685
+ "count": 50
2686
  },
2687
  "UC": {
2688
  "accuracy": 0.94,
2689
+ "count": 50
2690
  },
2691
  "US": {
2692
+ "accuracy": 0.968,
2693
+ "count": 250
2694
  }
2695
  }
2696
  },
2697
  "add_random": {
2698
  "full_accuracy": 1.0,
2699
+ "digit_accuracy": 1.0,
2700
  "n_examples": 200,
2701
  "per_subtask": {
2702
  "SA": {
2703
  "accuracy": 1.0,
2704
+ "count": 431
2705
  },
2706
  "SC": {
2707
  "accuracy": 1.0,
2708
+ "count": 316
2709
  },
2710
  "SS": {
2711
  "accuracy": 1.0,
2712
+ "count": 39
2713
  },
2714
  "UC": {
2715
  "accuracy": 1.0,
2716
+ "count": 560
2717
  },
2718
  "US": {
2719
  "accuracy": 1.0,
2720
+ "count": 54
2721
+ }
2722
+ }
2723
+ },
2724
+ "add_C1": {
2725
+ "full_accuracy": 1.0,
2726
+ "digit_accuracy": 1.0,
2727
+ "n_examples": 50,
2728
+ "per_subtask": {
2729
+ "SA": {
2730
+ "accuracy": 1.0,
2731
+ "count": 250
2732
+ },
2733
+ "SC": {
2734
+ "accuracy": 1.0,
2735
+ "count": 50
2736
+ },
2737
+ "UC": {
2738
+ "accuracy": 1.0,
2739
+ "count": 50
2740
+ }
2741
+ }
2742
+ },
2743
+ "add_C2": {
2744
+ "full_accuracy": 1.0,
2745
+ "digit_accuracy": 1.0,
2746
+ "n_examples": 50,
2747
+ "per_subtask": {
2748
+ "SA": {
2749
+ "accuracy": 1.0,
2750
+ "count": 200
2751
+ },
2752
+ "SC": {
2753
+ "accuracy": 1.0,
2754
+ "count": 50
2755
+ },
2756
+ "UC": {
2757
+ "accuracy": 1.0,
2758
+ "count": 83
2759
+ },
2760
+ "US": {
2761
+ "accuracy": 1.0,
2762
+ "count": 17
2763
  }
2764
  }
2765
  },
2766
  "add_C3": {
2767
+ "full_accuracy": 1.0,
2768
+ "digit_accuracy": 1.0,
2769
+ "n_examples": 50,
2770
  "per_subtask": {
2771
  "SA": {
2772
  "accuracy": 1.0,
2773
+ "count": 150
2774
  },
2775
  "SC": {
2776
  "accuracy": 1.0,
2777
+ "count": 50
2778
  },
2779
  "UC": {
2780
+ "accuracy": 1.0,
2781
+ "count": 100
2782
  },
2783
  "US": {
2784
  "accuracy": 1.0,
2785
+ "count": 50
2786
  }
2787
  }
2788
  },
2789
  "add_C4": {
2790
+ "full_accuracy": 0.9,
2791
+ "digit_accuracy": 0.9857142857142858,
2792
+ "n_examples": 50,
2793
  "per_subtask": {
2794
  "SA": {
2795
+ "accuracy": 1.0,
2796
+ "count": 100
2797
  },
2798
  "SC": {
2799
  "accuracy": 1.0,
2800
+ "count": 50
2801
  },
2802
  "UC": {
2803
+ "accuracy": 0.9621212121212122,
2804
+ "count": 132
2805
  },
2806
  "US": {
2807
+ "accuracy": 1.0,
2808
+ "count": 68
2809
  }
2810
  }
2811
  },
2812
  "add_C5": {
2813
+ "full_accuracy": 0.98,
2814
+ "digit_accuracy": 0.9971428571428571,
2815
+ "n_examples": 50,
2816
  "per_subtask": {
2817
  "SA": {
2818
  "accuracy": 1.0,
2819
+ "count": 50
2820
  },
2821
  "SC": {
2822
  "accuracy": 1.0,
2823
+ "count": 50
2824
  },
2825
  "UC": {
2826
+ "accuracy": 1.0,
2827
+ "count": 146
2828
  },
2829
  "US": {
2830
+ "accuracy": 0.9903846153846154,
2831
+ "count": 104
2832
  }
2833
  }
2834
  },
2835
  "add_C6": {
2836
  "full_accuracy": 1.0,
2837
+ "digit_accuracy": 1.0,
2838
+ "n_examples": 50,
2839
  "per_subtask": {
2840
  "SC": {
2841
  "accuracy": 1.0,
2842
+ "count": 50
2843
  },
2844
  "UC": {
2845
  "accuracy": 1.0,
2846
+ "count": 189
2847
  },
2848
  "US": {
2849
  "accuracy": 1.0,
2850
+ "count": 111
2851
  }
2852
  }
2853
  },
2854
  "sub_M0": {
2855
  "full_accuracy": 1.0,
2856
+ "digit_accuracy": 1.0,
2857
+ "n_examples": 50,
2858
  "per_subtask": {
2859
  "MD": {
2860
  "accuracy": 1.0,
2861
+ "count": 303
2862
  },
2863
  "ME": {
2864
  "accuracy": 1.0,
2865
+ "count": 47
2866
  }
2867
  }
2868
  },
2869
  "sub_M1": {
2870
+ "full_accuracy": 1.0,
2871
+ "digit_accuracy": 1.0,
2872
+ "n_examples": 50,
2873
  "per_subtask": {
2874
  "MD": {
2875
  "accuracy": 1.0,
2876
+ "count": 141
2877
  },
2878
  "MB": {
2879
  "accuracy": 1.0,
2880
+ "count": 72
2881
  },
2882
  "ME": {
2883
  "accuracy": 1.0,
2884
+ "count": 18
2885
  },
2886
  "UB": {
2887
+ "accuracy": 1.0,
2888
+ "count": 119
2889
  }
2890
  }
2891
  },
2892
  "sub_M2": {
2893
  "full_accuracy": 1.0,
2894
+ "digit_accuracy": 1.0,
2895
+ "n_examples": 50,
2896
  "per_subtask": {
2897
  "MD": {
2898
  "accuracy": 1.0,
2899
+ "count": 112
2900
  },
2901
  "MB": {
2902
  "accuracy": 1.0,
2903
+ "count": 53
2904
  },
2905
  "ME": {
2906
  "accuracy": 1.0,
2907
+ "count": 47
2908
  },
2909
  "UB": {
2910
  "accuracy": 1.0,
2911
+ "count": 85
2912
  },
2913
  "UD": {
2914
  "accuracy": 1.0,
2915
+ "count": 53
2916
  }
2917
  }
2918
  },
2919
  "sub_M3": {
2920
  "full_accuracy": 1.0,
2921
+ "digit_accuracy": 1.0,
2922
+ "n_examples": 50,
2923
  "per_subtask": {
2924
  "MD": {
2925
  "accuracy": 1.0,
2926
+ "count": 97
2927
  },
2928
  "MB": {
2929
  "accuracy": 1.0,
2930
+ "count": 51
2931
  },
2932
  "ME": {
2933
  "accuracy": 1.0,
2934
+ "count": 27
2935
  },
2936
  "UB": {
2937
  "accuracy": 1.0,
2938
+ "count": 74
2939
  },
2940
  "UD": {
2941
  "accuracy": 1.0,
2942
+ "count": 101
2943
  }
2944
  }
2945
  },
2946
  "sub_M4": {
2947
+ "full_accuracy": 0.6,
2948
+ "digit_accuracy": 0.9428571428571428,
2949
+ "n_examples": 50,
2950
  "per_subtask": {
2951
  "MD": {
2952
  "accuracy": 1.0,
2953
+ "count": 100
2954
  },
2955
  "MB": {
2956
  "accuracy": 1.0,
2957
+ "count": 50
2958
  },
2959
  "UB": {
2960
+ "accuracy": 0.6,
2961
+ "count": 50
2962
  },
2963
  "UD": {
2964
+ "accuracy": 1.0,
2965
+ "count": 150
2966
  }
2967
  }
2968
  },
2969
  "sub_M5": {
2970
+ "full_accuracy": 0.42,
2971
+ "digit_accuracy": 0.8914285714285715,
2972
+ "n_examples": 50,
2973
  "per_subtask": {
2974
  "MD": {
2975
  "accuracy": 1.0,
2976
+ "count": 50
2977
  },
2978
  "MB": {
2979
  "accuracy": 1.0,
2980
+ "count": 50
2981
  },
2982
  "UB": {
2983
+ "accuracy": 0.7,
2984
+ "count": 50
2985
  },
2986
  "UD": {
2987
+ "accuracy": 0.885,
2988
+ "count": 200
2989
  }
2990
  }
2991
  },
2992
  "sub_random": {
2993
  "full_accuracy": 1.0,
2994
+ "digit_accuracy": 1.0,
2995
  "n_examples": 200,
2996
  "per_subtask": {
2997
  "MD": {
2998
  "accuracy": 1.0,
2999
+ "count": 570
3000
  },
3001
  "MB": {
3002
  "accuracy": 1.0,
3003
+ "count": 277
3004
  },
3005
  "ME": {
3006
  "accuracy": 1.0,
 
3008
  },
3009
  "UB": {
3010
  "accuracy": 1.0,
3011
+ "count": 471
3012
  },
3013
  "UD": {
3014
  "accuracy": 1.0,
3015
+ "count": 29
3016
  }
3017
  }
3018
  },
3019
  "sub_B3": {
3020
+ "full_accuracy": 1.0,
3021
+ "digit_accuracy": 1.0,
3022
+ "n_examples": 50,
3023
  "per_subtask": {
3024
  "MD": {
3025
  "accuracy": 1.0,
3026
+ "count": 150
3027
  },
3028
  "MB": {
3029
  "accuracy": 1.0,
3030
+ "count": 50
3031
  },
3032
  "UB": {
3033
+ "accuracy": 1.0,
3034
+ "count": 101
3035
  },
3036
  "UD": {
3037
  "accuracy": 1.0,
3038
+ "count": 49
3039
  }
3040
  }
3041
  },
3042
  "sub_B4": {
3043
  "full_accuracy": 0.94,
3044
+ "digit_accuracy": 0.9914285714285714,
3045
+ "n_examples": 50,
3046
  "per_subtask": {
3047
  "MD": {
3048
  "accuracy": 1.0,
3049
+ "count": 100
3050
  },
3051
  "MB": {
3052
  "accuracy": 1.0,
3053
+ "count": 50
3054
  },
3055
  "UB": {
3056
+ "accuracy": 0.9752066115702479,
3057
+ "count": 121
3058
  },
3059
  "UD": {
3060
  "accuracy": 1.0,
3061
+ "count": 79
3062
  }
3063
  }
3064
  },
3065
  "sub_B5": {
3066
  "full_accuracy": 0.98,
3067
+ "digit_accuracy": 0.9971428571428571,
3068
+ "n_examples": 50,
3069
  "per_subtask": {
3070
  "MD": {
3071
  "accuracy": 1.0,
3072
+ "count": 50
3073
  },
3074
  "MB": {
3075
  "accuracy": 1.0,
3076
+ "count": 50
3077
  },
3078
  "UB": {
3079
+ "accuracy": 0.993421052631579,
3080
+ "count": 152
3081
  },
3082
  "UD": {
3083
+ "accuracy": 1.0,
3084
+ "count": 98
3085
  }
3086
  }
3087
  }
3088
  },
3089
  "summary": {
3090
+ "overall_accuracy": 0.94,
3091
+ "digit_accuracy": 0.9883809523809524,
3092
+ "total_examples": 1500,
3093
+ "n_splits": 24
3094
  }
3095
  },
3096
  "sorl_eval": {
 
3099
  "K": 1,
3100
  "mode": "sorl",
3101
  "n_digits": 6,
3102
+ "n_per_split": 50
3103
  },
3104
  "splits": {
3105
  "add_S0": {
3106
  "full_accuracy": 1.0,
3107
+ "digit_accuracy": 1.0,
3108
+ "n_examples": 50,
3109
  "per_subtask": {
3110
  "SA": {
3111
  "accuracy": 1.0,
3112
+ "count": 295
3113
  },
3114
  "SS": {
3115
  "accuracy": 1.0,
3116
+ "count": 55
3117
  }
3118
  }
3119
  },
3120
  "add_S1": {
3121
  "full_accuracy": 1.0,
3122
+ "digit_accuracy": 1.0,
3123
+ "n_examples": 50,
3124
  "per_subtask": {
3125
  "SA": {
3126
  "accuracy": 1.0,
3127
+ "count": 126
3128
  },
3129
  "SC": {
3130
  "accuracy": 1.0,
3131
+ "count": 79
3132
  },
3133
  "SS": {
3134
  "accuracy": 1.0,
3135
+ "count": 21
3136
  },
3137
  "UC": {
3138
  "accuracy": 1.0,
3139
+ "count": 124
3140
  }
3141
  }
3142
  },
3143
  "add_S2": {
3144
  "full_accuracy": 1.0,
3145
+ "digit_accuracy": 1.0,
3146
+ "n_examples": 50,
3147
  "per_subtask": {
3148
  "SA": {
3149
  "accuracy": 1.0,
3150
+ "count": 75
3151
  },
3152
  "SC": {
3153
  "accuracy": 1.0,
3154
+ "count": 62
3155
  },
3156
  "SS": {
3157
  "accuracy": 1.0,
3158
+ "count": 39
3159
  },
3160
  "UC": {
3161
  "accuracy": 1.0,
3162
+ "count": 111
3163
  },
3164
  "US": {
3165
  "accuracy": 1.0,
3166
+ "count": 63
3167
  }
3168
  }
3169
  },
3170
  "add_S3": {
3171
  "full_accuracy": 1.0,
3172
+ "digit_accuracy": 1.0,
3173
+ "n_examples": 50,
3174
  "per_subtask": {
3175
  "SA": {
3176
  "accuracy": 1.0,
3177
+ "count": 60
3178
  },
3179
  "SC": {
3180
  "accuracy": 1.0,
3181
+ "count": 57
3182
  },
3183
  "SS": {
3184
  "accuracy": 1.0,
3185
+ "count": 19
3186
  },
3187
  "UC": {
3188
  "accuracy": 1.0,
3189
+ "count": 104
3190
  },
3191
  "US": {
3192
  "accuracy": 1.0,
3193
+ "count": 110
3194
  }
3195
  }
3196
  },
3197
  "add_S4": {
3198
  "full_accuracy": 1.0,
3199
+ "digit_accuracy": 1.0,
3200
+ "n_examples": 50,
3201
  "per_subtask": {
3202
  "SA": {
3203
  "accuracy": 1.0,
3204
+ "count": 48
3205
  },
3206
  "SC": {
3207
  "accuracy": 1.0,
3208
+ "count": 52
3209
  },
3210
  "SS": {
3211
  "accuracy": 1.0,
3212
+ "count": 7
3213
  },
3214
  "UC": {
3215
  "accuracy": 1.0,
3216
+ "count": 89
3217
  },
3218
  "US": {
3219
  "accuracy": 1.0,
3220
+ "count": 154
3221
  }
3222
  }
3223
  },
3224
  "add_S5": {
3225
+ "full_accuracy": 1.0,
3226
+ "digit_accuracy": 1.0,
3227
+ "n_examples": 50,
3228
  "per_subtask": {
3229
  "SA": {
3230
  "accuracy": 1.0,
3231
+ "count": 50
3232
  },
3233
  "SC": {
3234
  "accuracy": 1.0,
3235
+ "count": 50
3236
  },
3237
  "UC": {
3238
+ "accuracy": 1.0,
3239
+ "count": 50
3240
  },
3241
  "US": {
3242
  "accuracy": 1.0,
3243
+ "count": 200
3244
  }
3245
  }
3246
  },
3247
  "add_S6": {
3248
  "full_accuracy": 0.96,
3249
+ "digit_accuracy": 0.9914285714285714,
3250
+ "n_examples": 50,
3251
  "per_subtask": {
3252
  "SC": {
3253
  "accuracy": 1.0,
3254
+ "count": 50
3255
  },
3256
  "UC": {
3257
  "accuracy": 0.96,
3258
+ "count": 50
3259
  },
3260
  "US": {
3261
+ "accuracy": 0.996,
3262
+ "count": 250
3263
  }
3264
  }
3265
  },
3266
  "add_random": {
3267
  "full_accuracy": 1.0,
3268
+ "digit_accuracy": 1.0,
3269
  "n_examples": 200,
3270
  "per_subtask": {
3271
  "SA": {
3272
  "accuracy": 1.0,
3273
+ "count": 431
3274
  },
3275
  "SC": {
3276
  "accuracy": 1.0,
3277
+ "count": 316
3278
  },
3279
  "SS": {
3280
  "accuracy": 1.0,
3281
+ "count": 39
3282
  },
3283
  "UC": {
3284
  "accuracy": 1.0,
3285
+ "count": 560
3286
  },
3287
  "US": {
3288
  "accuracy": 1.0,
3289
+ "count": 54
3290
+ }
3291
+ }
3292
+ },
3293
+ "add_C1": {
3294
+ "full_accuracy": 1.0,
3295
+ "digit_accuracy": 1.0,
3296
+ "n_examples": 50,
3297
+ "per_subtask": {
3298
+ "SA": {
3299
+ "accuracy": 1.0,
3300
+ "count": 250
3301
+ },
3302
+ "SC": {
3303
+ "accuracy": 1.0,
3304
+ "count": 50
3305
+ },
3306
+ "UC": {
3307
+ "accuracy": 1.0,
3308
+ "count": 50
3309
+ }
3310
+ }
3311
+ },
3312
+ "add_C2": {
3313
+ "full_accuracy": 0.98,
3314
+ "digit_accuracy": 0.9971428571428571,
3315
+ "n_examples": 50,
3316
+ "per_subtask": {
3317
+ "SA": {
3318
+ "accuracy": 1.0,
3319
+ "count": 200
3320
+ },
3321
+ "SC": {
3322
+ "accuracy": 1.0,
3323
+ "count": 50
3324
+ },
3325
+ "UC": {
3326
+ "accuracy": 0.9879518072289156,
3327
+ "count": 83
3328
+ },
3329
+ "US": {
3330
+ "accuracy": 1.0,
3331
+ "count": 17
3332
  }
3333
  }
3334
  },
3335
  "add_C3": {
3336
  "full_accuracy": 1.0,
3337
+ "digit_accuracy": 1.0,
3338
+ "n_examples": 50,
3339
  "per_subtask": {
3340
  "SA": {
3341
  "accuracy": 1.0,
3342
+ "count": 150
3343
  },
3344
  "SC": {
3345
  "accuracy": 1.0,
3346
+ "count": 50
3347
  },
3348
  "UC": {
3349
  "accuracy": 1.0,
3350
+ "count": 100
3351
  },
3352
  "US": {
3353
  "accuracy": 1.0,
3354
+ "count": 50
3355
  }
3356
  }
3357
  },
3358
  "add_C4": {
3359
+ "full_accuracy": 1.0,
3360
+ "digit_accuracy": 1.0,
3361
+ "n_examples": 50,
3362
  "per_subtask": {
3363
  "SA": {
3364
  "accuracy": 1.0,
3365
+ "count": 100
3366
  },
3367
  "SC": {
3368
  "accuracy": 1.0,
3369
+ "count": 50
3370
  },
3371
  "UC": {
3372
+ "accuracy": 1.0,
3373
+ "count": 132
3374
  },
3375
  "US": {
3376
  "accuracy": 1.0,
3377
+ "count": 68
3378
  }
3379
  }
3380
  },
3381
  "add_C5": {
3382
  "full_accuracy": 1.0,
3383
+ "digit_accuracy": 1.0,
3384
+ "n_examples": 50,
3385
  "per_subtask": {
3386
  "SA": {
3387
  "accuracy": 1.0,
3388
+ "count": 50
3389
  },
3390
  "SC": {
3391
  "accuracy": 1.0,
3392
+ "count": 50
3393
  },
3394
  "UC": {
3395
  "accuracy": 1.0,
3396
+ "count": 146
3397
  },
3398
  "US": {
3399
  "accuracy": 1.0,
3400
+ "count": 104
3401
  }
3402
  }
3403
  },
3404
  "add_C6": {
3405
  "full_accuracy": 1.0,
3406
+ "digit_accuracy": 1.0,
3407
+ "n_examples": 50,
3408
  "per_subtask": {
3409
  "SC": {
3410
  "accuracy": 1.0,
3411
+ "count": 50
3412
  },
3413
  "UC": {
3414
  "accuracy": 1.0,
3415
+ "count": 189
3416
  },
3417
  "US": {
3418
  "accuracy": 1.0,
3419
+ "count": 111
3420
  }
3421
  }
3422
  },
3423
  "sub_M0": {
3424
  "full_accuracy": 1.0,
3425
+ "digit_accuracy": 1.0,
3426
+ "n_examples": 50,
3427
  "per_subtask": {
3428
  "MD": {
3429
  "accuracy": 1.0,
3430
+ "count": 303
3431
  },
3432
  "ME": {
3433
  "accuracy": 1.0,
3434
+ "count": 47
3435
  }
3436
  }
3437
  },
3438
  "sub_M1": {
3439
  "full_accuracy": 1.0,
3440
+ "digit_accuracy": 1.0,
3441
+ "n_examples": 50,
3442
  "per_subtask": {
3443
  "MD": {
3444
  "accuracy": 1.0,
3445
+ "count": 141
3446
  },
3447
  "MB": {
3448
  "accuracy": 1.0,
3449
+ "count": 72
3450
  },
3451
  "ME": {
3452
  "accuracy": 1.0,
3453
+ "count": 18
3454
  },
3455
  "UB": {
3456
  "accuracy": 1.0,
3457
+ "count": 119
3458
  }
3459
  }
3460
  },
3461
  "sub_M2": {
3462
  "full_accuracy": 1.0,
3463
+ "digit_accuracy": 1.0,
3464
+ "n_examples": 50,
3465
  "per_subtask": {
3466
  "MD": {
3467
  "accuracy": 1.0,
3468
+ "count": 112
3469
  },
3470
  "MB": {
3471
  "accuracy": 1.0,
3472
+ "count": 53
3473
  },
3474
  "ME": {
3475
  "accuracy": 1.0,
3476
+ "count": 47
3477
  },
3478
  "UB": {
3479
  "accuracy": 1.0,
3480
+ "count": 85
3481
  },
3482
  "UD": {
3483
  "accuracy": 1.0,
3484
+ "count": 53
3485
  }
3486
  }
3487
  },
3488
  "sub_M3": {
3489
  "full_accuracy": 1.0,
3490
+ "digit_accuracy": 1.0,
3491
+ "n_examples": 50,
3492
  "per_subtask": {
3493
  "MD": {
3494
  "accuracy": 1.0,
3495
+ "count": 97
3496
  },
3497
  "MB": {
3498
  "accuracy": 1.0,
3499
+ "count": 51
3500
  },
3501
  "ME": {
3502
  "accuracy": 1.0,
3503
+ "count": 27
3504
  },
3505
  "UB": {
3506
  "accuracy": 1.0,
3507
+ "count": 74
3508
  },
3509
  "UD": {
3510
  "accuracy": 1.0,
3511
+ "count": 101
3512
  }
3513
  }
3514
  },
3515
  "sub_M4": {
3516
  "full_accuracy": 1.0,
3517
+ "digit_accuracy": 1.0,
3518
+ "n_examples": 50,
3519
  "per_subtask": {
3520
  "MD": {
3521
  "accuracy": 1.0,
3522
+ "count": 100
3523
  },
3524
  "MB": {
3525
  "accuracy": 1.0,
3526
+ "count": 50
3527
  },
3528
  "UB": {
3529
  "accuracy": 1.0,
3530
+ "count": 50
3531
  },
3532
  "UD": {
3533
  "accuracy": 1.0,
3534
+ "count": 150
3535
  }
3536
  }
3537
  },
3538
  "sub_M5": {
3539
+ "full_accuracy": 0.9,
3540
+ "digit_accuracy": 0.9857142857142858,
3541
+ "n_examples": 50,
3542
  "per_subtask": {
3543
  "MD": {
3544
  "accuracy": 1.0,
3545
+ "count": 50
3546
  },
3547
  "MB": {
3548
  "accuracy": 1.0,
3549
+ "count": 50
3550
  },
3551
  "UB": {
3552
+ "accuracy": 0.9,
3553
+ "count": 50
3554
  },
3555
  "UD": {
3556
+ "accuracy": 1.0,
3557
+ "count": 200
3558
  }
3559
  }
3560
  },
3561
  "sub_random": {
3562
  "full_accuracy": 1.0,
3563
+ "digit_accuracy": 1.0,
3564
  "n_examples": 200,
3565
  "per_subtask": {
3566
  "MD": {
3567
  "accuracy": 1.0,
3568
+ "count": 570
3569
  },
3570
  "MB": {
3571
  "accuracy": 1.0,
3572
+ "count": 277
3573
  },
3574
  "ME": {
3575
  "accuracy": 1.0,
 
3577
  },
3578
  "UB": {
3579
  "accuracy": 1.0,
3580
+ "count": 471
3581
  },
3582
  "UD": {
3583
  "accuracy": 1.0,
3584
+ "count": 29
3585
  }
3586
  }
3587
  },
3588
  "sub_B3": {
3589
  "full_accuracy": 1.0,
3590
+ "digit_accuracy": 1.0,
3591
+ "n_examples": 50,
3592
  "per_subtask": {
3593
  "MD": {
3594
  "accuracy": 1.0,
3595
+ "count": 150
3596
  },
3597
  "MB": {
3598
  "accuracy": 1.0,
3599
+ "count": 50
3600
  },
3601
  "UB": {
3602
  "accuracy": 1.0,
3603
+ "count": 101
3604
  },
3605
  "UD": {
3606
  "accuracy": 1.0,
3607
+ "count": 49
3608
  }
3609
  }
3610
  },
3611
  "sub_B4": {
3612
  "full_accuracy": 1.0,
3613
+ "digit_accuracy": 1.0,
3614
+ "n_examples": 50,
3615
  "per_subtask": {
3616
  "MD": {
3617
  "accuracy": 1.0,
3618
+ "count": 100
3619
  },
3620
  "MB": {
3621
  "accuracy": 1.0,
3622
+ "count": 50
3623
  },
3624
  "UB": {
3625
  "accuracy": 1.0,
3626
+ "count": 121
3627
  },
3628
  "UD": {
3629
  "accuracy": 1.0,
3630
+ "count": 79
3631
  }
3632
  }
3633
  },
3634
  "sub_B5": {
3635
  "full_accuracy": 1.0,
3636
+ "digit_accuracy": 1.0,
3637
+ "n_examples": 50,
3638
  "per_subtask": {
3639
  "MD": {
3640
  "accuracy": 1.0,
3641
+ "count": 50
3642
  },
3643
  "MB": {
3644
  "accuracy": 1.0,
3645
+ "count": 50
3646
  },
3647
  "UB": {
3648
  "accuracy": 1.0,
3649
+ "count": 152
3650
  },
3651
  "UD": {
3652
  "accuracy": 1.0,
3653
+ "count": 98
3654
  }
3655
  }
3656
  }
3657
  },
3658
  "summary": {
3659
+ "overall_accuracy": 0.9946666666666667,
3660
+ "digit_accuracy": 0.9991428571428571,
3661
+ "total_examples": 1500,
3662
+ "n_splits": 24
3663
  }
3664
  },
3665
  "sorl_overall_accuracy": 0.9945833333333334,