amirali1985 commited on
Commit
0c51d15
·
verified ·
1 Parent(s): 79ac4c2

Upload add_sub_sorl_v1_abs30_50K/metrics.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. add_sub_sorl_v1_abs30_50K/metrics.json +445 -315
add_sub_sorl_v1_abs30_50K/metrics.json CHANGED
@@ -2470,502 +2470,567 @@
2470
  "K": null,
2471
  "mode": "sft",
2472
  "n_digits": 6,
2473
- "n_per_split": 100
2474
  },
2475
  "splits": {
2476
  "add_S0": {
2477
- "full_accuracy": 0.95,
2478
- "n_examples": 100,
 
2479
  "per_subtask": {
2480
  "SA": {
2481
- "accuracy": 0.9933884297520661,
2482
- "count": 605
2483
  },
2484
  "SS": {
2485
- "accuracy": 0.9789473684210527,
2486
- "count": 95
2487
  }
2488
  }
2489
  },
2490
  "add_S1": {
2491
- "full_accuracy": 0.87,
2492
- "n_examples": 100,
 
2493
  "per_subtask": {
2494
  "SA": {
2495
- "accuracy": 0.9754901960784313,
2496
- "count": 204
2497
  },
2498
  "SC": {
2499
- "accuracy": 0.9881656804733728,
2500
- "count": 169
2501
  },
2502
  "SS": {
2503
  "accuracy": 1.0,
2504
- "count": 31
2505
  },
2506
  "UC": {
2507
- "accuracy": 0.9763513513513513,
2508
- "count": 296
2509
  }
2510
  }
2511
  },
2512
  "add_S2": {
2513
- "full_accuracy": 0.65,
2514
- "n_examples": 100,
 
2515
  "per_subtask": {
2516
  "SA": {
2517
- "accuracy": 0.9754601226993865,
2518
- "count": 163
2519
  },
2520
  "SC": {
2521
- "accuracy": 0.9615384615384616,
2522
- "count": 130
2523
  },
2524
  "SS": {
2525
- "accuracy": 0.9770114942528736,
2526
- "count": 87
2527
  },
2528
  "UC": {
2529
- "accuracy": 0.8719211822660099,
2530
- "count": 203
2531
  },
2532
  "US": {
2533
- "accuracy": 0.9743589743589743,
2534
- "count": 117
2535
  }
2536
  }
2537
  },
2538
  "add_S3": {
2539
- "full_accuracy": 0.6,
2540
- "n_examples": 100,
 
2541
  "per_subtask": {
2542
  "SA": {
2543
- "accuracy": 1.0,
2544
- "count": 121
2545
  },
2546
  "SC": {
2547
- "accuracy": 0.9834710743801653,
2548
- "count": 121
2549
  },
2550
  "SS": {
2551
- "accuracy": 1.0,
2552
- "count": 49
2553
  },
2554
  "UC": {
2555
- "accuracy": 0.8387096774193549,
2556
- "count": 186
2557
  },
2558
  "US": {
2559
- "accuracy": 0.8654708520179372,
2560
- "count": 223
2561
  }
2562
  }
2563
  },
2564
  "add_S4": {
2565
- "full_accuracy": 0.47,
2566
- "n_examples": 100,
 
2567
  "per_subtask": {
2568
  "SA": {
2569
  "accuracy": 1.0,
2570
- "count": 104
2571
  },
2572
  "SC": {
2573
  "accuracy": 1.0,
2574
- "count": 106
2575
  },
2576
  "SS": {
2577
  "accuracy": 1.0,
2578
- "count": 23
2579
  },
2580
  "UC": {
2581
- "accuracy": 0.75625,
2582
- "count": 160
2583
  },
2584
  "US": {
2585
- "accuracy": 0.7035830618892508,
2586
- "count": 307
2587
  }
2588
  }
2589
  },
2590
  "add_S5": {
2591
- "full_accuracy": 0.35,
2592
- "n_examples": 100,
 
2593
  "per_subtask": {
2594
  "SA": {
2595
  "accuracy": 1.0,
2596
- "count": 100
2597
  },
2598
  "SC": {
2599
  "accuracy": 1.0,
2600
- "count": 100
2601
  },
2602
  "UC": {
2603
- "accuracy": 0.64,
2604
- "count": 100
2605
  },
2606
  "US": {
2607
- "accuracy": 0.6075,
2608
- "count": 400
2609
  }
2610
  }
2611
  },
2612
  "add_S6": {
2613
- "full_accuracy": 0.51,
2614
- "n_examples": 100,
 
2615
  "per_subtask": {
2616
  "SC": {
2617
  "accuracy": 1.0,
2618
- "count": 100
2619
  },
2620
  "UC": {
2621
- "accuracy": 0.51,
2622
- "count": 100
2623
  },
2624
  "US": {
2625
- "accuracy": 0.736,
2626
- "count": 500
2627
  }
2628
  }
2629
  },
2630
  "add_random": {
2631
- "full_accuracy": 0.86,
 
2632
  "n_examples": 200,
2633
  "per_subtask": {
2634
  "SA": {
2635
- "accuracy": 0.9821029082774049,
2636
- "count": 447
2637
  },
2638
  "SC": {
2639
- "accuracy": 0.98125,
2640
- "count": 320
2641
  },
2642
  "SS": {
2643
- "accuracy": 0.9642857142857143,
2644
- "count": 56
2645
  },
2646
  "UC": {
2647
- "accuracy": 0.9735349716446124,
2648
- "count": 529
2649
  },
2650
  "US": {
2651
- "accuracy": 0.8958333333333334,
2652
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2653
  }
2654
  }
2655
  },
2656
  "add_C3": {
2657
- "full_accuracy": 0.77,
2658
- "n_examples": 100,
 
2659
  "per_subtask": {
2660
  "SA": {
2661
  "accuracy": 1.0,
2662
- "count": 300
2663
  },
2664
  "SC": {
2665
  "accuracy": 1.0,
2666
- "count": 100
2667
  },
2668
  "UC": {
2669
- "accuracy": 0.8911917098445595,
2670
- "count": 193
2671
  },
2672
  "US": {
2673
- "accuracy": 0.897196261682243,
2674
- "count": 107
2675
  }
2676
  }
2677
  },
2678
  "add_C4": {
2679
- "full_accuracy": 0.55,
2680
- "n_examples": 100,
 
2681
  "per_subtask": {
2682
  "SA": {
2683
  "accuracy": 1.0,
2684
- "count": 200
2685
  },
2686
  "SC": {
2687
- "accuracy": 0.99,
2688
- "count": 100
2689
  },
2690
  "UC": {
2691
- "accuracy": 0.859375,
2692
- "count": 256
2693
  },
2694
  "US": {
2695
- "accuracy": 0.8333333333333334,
2696
- "count": 144
2697
  }
2698
  }
2699
  },
2700
  "add_C5": {
2701
- "full_accuracy": 0.57,
2702
- "n_examples": 100,
 
2703
  "per_subtask": {
2704
  "SA": {
2705
  "accuracy": 1.0,
2706
- "count": 100
2707
  },
2708
  "SC": {
2709
  "accuracy": 1.0,
2710
- "count": 100
2711
  },
2712
  "UC": {
2713
- "accuracy": 0.8660130718954249,
2714
- "count": 306
2715
  },
2716
  "US": {
2717
- "accuracy": 0.7938144329896907,
2718
- "count": 194
2719
  }
2720
  }
2721
  },
2722
  "add_C6": {
2723
- "full_accuracy": 0.8,
2724
- "n_examples": 100,
 
2725
  "per_subtask": {
2726
  "SC": {
2727
  "accuracy": 1.0,
2728
- "count": 100
2729
  },
2730
  "UC": {
2731
- "accuracy": 0.9562841530054644,
2732
- "count": 366
2733
  },
2734
  "US": {
2735
- "accuracy": 0.9273504273504274,
2736
- "count": 234
2737
  }
2738
  }
2739
  },
2740
  "sub_M0": {
2741
- "full_accuracy": 0.96,
2742
- "n_examples": 100,
 
2743
  "per_subtask": {
2744
  "MD": {
2745
- "accuracy": 0.9933444259567388,
2746
- "count": 601
2747
  },
2748
  "ME": {
2749
  "accuracy": 1.0,
2750
- "count": 99
2751
  }
2752
  }
2753
  },
2754
  "sub_M1": {
2755
- "full_accuracy": 0.9,
2756
- "n_examples": 100,
 
2757
  "per_subtask": {
2758
  "MD": {
2759
- "accuracy": 0.992831541218638,
2760
- "count": 279
2761
  },
2762
  "MB": {
2763
- "accuracy": 0.9862068965517241,
2764
- "count": 145
2765
  },
2766
  "ME": {
2767
- "accuracy": 0.9583333333333334,
2768
- "count": 24
2769
  },
2770
  "UB": {
2771
- "accuracy": 0.9761904761904762,
2772
- "count": 252
2773
  }
2774
  }
2775
  },
2776
  "sub_M2": {
2777
- "full_accuracy": 0.67,
2778
- "n_examples": 100,
 
2779
  "per_subtask": {
2780
  "MD": {
2781
  "accuracy": 1.0,
2782
- "count": 213
2783
  },
2784
  "MB": {
2785
- "accuracy": 1.0,
2786
- "count": 113
2787
  },
2788
  "ME": {
2789
- "accuracy": 0.9882352941176471,
2790
- "count": 85
2791
  },
2792
  "UB": {
2793
- "accuracy": 0.8232044198895028,
2794
- "count": 181
2795
  },
2796
  "UD": {
2797
- "accuracy": 0.9629629629629629,
2798
- "count": 108
2799
  }
2800
  }
2801
  },
2802
  "sub_M3": {
2803
- "full_accuracy": 0.37,
2804
- "n_examples": 100,
 
2805
  "per_subtask": {
2806
  "MD": {
2807
- "accuracy": 1.0,
2808
- "count": 179
2809
  },
2810
  "MB": {
2811
  "accuracy": 1.0,
2812
- "count": 103
2813
  },
2814
  "ME": {
2815
  "accuracy": 1.0,
2816
- "count": 56
2817
  },
2818
  "UB": {
2819
- "accuracy": 0.6442953020134228,
2820
- "count": 149
2821
  },
2822
  "UD": {
2823
- "accuracy": 0.7511737089201878,
2824
- "count": 213
2825
  }
2826
  }
2827
  },
2828
  "sub_M4": {
2829
- "full_accuracy": 0.26,
2830
- "n_examples": 100,
 
2831
  "per_subtask": {
2832
  "MD": {
2833
  "accuracy": 1.0,
2834
- "count": 200
2835
  },
2836
  "MB": {
2837
- "accuracy": 0.99,
2838
- "count": 100
2839
  },
2840
  "UB": {
2841
- "accuracy": 0.36,
2842
- "count": 100
2843
  },
2844
  "UD": {
2845
  "accuracy": 0.7066666666666667,
2846
- "count": 300
2847
  }
2848
  }
2849
  },
2850
  "sub_M5": {
2851
- "full_accuracy": 0.23,
2852
- "n_examples": 100,
 
2853
  "per_subtask": {
2854
  "MD": {
2855
  "accuracy": 1.0,
2856
- "count": 100
2857
  },
2858
  "MB": {
2859
  "accuracy": 1.0,
2860
- "count": 100
2861
  },
2862
  "UB": {
2863
- "accuracy": 0.33,
2864
- "count": 100
2865
  },
2866
  "UD": {
2867
- "accuracy": 0.695,
2868
- "count": 400
2869
  }
2870
  }
2871
  },
2872
  "sub_random": {
2873
- "full_accuracy": 0.855,
 
2874
  "n_examples": 200,
2875
  "per_subtask": {
2876
  "MD": {
2877
- "accuracy": 0.99,
2878
- "count": 600
2879
  },
2880
  "MB": {
2881
- "accuracy": 0.9812734082397003,
2882
- "count": 267
2883
  },
2884
  "ME": {
2885
- "accuracy": 0.9811320754716981,
2886
  "count": 53
2887
  },
2888
  "UB": {
2889
- "accuracy": 0.958997722095672,
2890
- "count": 439
2891
  },
2892
  "UD": {
2893
- "accuracy": 1.0,
2894
- "count": 41
2895
  }
2896
  }
2897
  },
2898
  "sub_B3": {
2899
- "full_accuracy": 0.56,
2900
- "n_examples": 100,
 
2901
  "per_subtask": {
2902
  "MD": {
2903
- "accuracy": 0.9966666666666667,
2904
- "count": 300
2905
  },
2906
  "MB": {
2907
  "accuracy": 1.0,
2908
- "count": 100
2909
  },
2910
  "UB": {
2911
- "accuracy": 0.7868020304568528,
2912
- "count": 197
2913
  },
2914
  "UD": {
2915
- "accuracy": 0.8349514563106796,
2916
- "count": 103
2917
  }
2918
  }
2919
  },
2920
  "sub_B4": {
2921
- "full_accuracy": 0.51,
2922
- "n_examples": 100,
 
2923
  "per_subtask": {
2924
  "MD": {
2925
  "accuracy": 1.0,
2926
- "count": 200
2927
  },
2928
  "MB": {
2929
  "accuracy": 1.0,
2930
- "count": 100
2931
  },
2932
  "UB": {
2933
- "accuracy": 0.8178137651821862,
2934
- "count": 247
2935
  },
2936
  "UD": {
2937
- "accuracy": 0.7908496732026143,
2938
- "count": 153
2939
  }
2940
  }
2941
  },
2942
  "sub_B5": {
2943
- "full_accuracy": 0.68,
2944
- "n_examples": 100,
 
2945
  "per_subtask": {
2946
  "MD": {
2947
  "accuracy": 1.0,
2948
- "count": 100
2949
  },
2950
  "MB": {
2951
  "accuracy": 1.0,
2952
- "count": 100
2953
  },
2954
  "UB": {
2955
- "accuracy": 0.8959731543624161,
2956
- "count": 298
2957
  },
2958
  "UD": {
2959
- "accuracy": 0.9257425742574258,
2960
- "count": 202
2961
  }
2962
  }
2963
  }
2964
  },
2965
  "summary": {
2966
- "overall_accuracy": 0.6520833333333333,
2967
- "total_examples": 2400,
2968
- "n_splits": 22
 
2969
  }
2970
  },
2971
  "sorl_eval": {
@@ -2974,416 +3039,477 @@
2974
  "K": 4,
2975
  "mode": "sorl",
2976
  "n_digits": 6,
2977
- "n_per_split": 100
2978
  },
2979
  "splits": {
2980
  "add_S0": {
2981
  "full_accuracy": 1.0,
2982
- "n_examples": 100,
 
2983
  "per_subtask": {
2984
  "SA": {
2985
  "accuracy": 1.0,
2986
- "count": 605
2987
  },
2988
  "SS": {
2989
  "accuracy": 1.0,
2990
- "count": 95
2991
  }
2992
  }
2993
  },
2994
  "add_S1": {
2995
  "full_accuracy": 1.0,
2996
- "n_examples": 100,
 
2997
  "per_subtask": {
2998
  "SA": {
2999
  "accuracy": 1.0,
3000
- "count": 204
3001
  },
3002
  "SC": {
3003
  "accuracy": 1.0,
3004
- "count": 169
3005
  },
3006
  "SS": {
3007
  "accuracy": 1.0,
3008
- "count": 31
3009
  },
3010
  "UC": {
3011
  "accuracy": 1.0,
3012
- "count": 296
3013
  }
3014
  }
3015
  },
3016
  "add_S2": {
3017
  "full_accuracy": 1.0,
3018
- "n_examples": 100,
 
3019
  "per_subtask": {
3020
  "SA": {
3021
  "accuracy": 1.0,
3022
- "count": 163
3023
  },
3024
  "SC": {
3025
  "accuracy": 1.0,
3026
- "count": 130
3027
  },
3028
  "SS": {
3029
  "accuracy": 1.0,
3030
- "count": 87
3031
  },
3032
  "UC": {
3033
  "accuracy": 1.0,
3034
- "count": 203
3035
  },
3036
  "US": {
3037
  "accuracy": 1.0,
3038
- "count": 117
3039
  }
3040
  }
3041
  },
3042
  "add_S3": {
3043
  "full_accuracy": 1.0,
3044
- "n_examples": 100,
 
3045
  "per_subtask": {
3046
  "SA": {
3047
  "accuracy": 1.0,
3048
- "count": 121
3049
  },
3050
  "SC": {
3051
  "accuracy": 1.0,
3052
- "count": 121
3053
  },
3054
  "SS": {
3055
  "accuracy": 1.0,
3056
- "count": 49
3057
  },
3058
  "UC": {
3059
  "accuracy": 1.0,
3060
- "count": 186
3061
  },
3062
  "US": {
3063
  "accuracy": 1.0,
3064
- "count": 223
3065
  }
3066
  }
3067
  },
3068
  "add_S4": {
3069
  "full_accuracy": 1.0,
3070
- "n_examples": 100,
 
3071
  "per_subtask": {
3072
  "SA": {
3073
  "accuracy": 1.0,
3074
- "count": 104
3075
  },
3076
  "SC": {
3077
  "accuracy": 1.0,
3078
- "count": 106
3079
  },
3080
  "SS": {
3081
  "accuracy": 1.0,
3082
- "count": 23
3083
  },
3084
  "UC": {
3085
  "accuracy": 1.0,
3086
- "count": 160
3087
  },
3088
  "US": {
3089
  "accuracy": 1.0,
3090
- "count": 307
3091
  }
3092
  }
3093
  },
3094
  "add_S5": {
3095
- "full_accuracy": 0.85,
3096
- "n_examples": 100,
 
3097
  "per_subtask": {
3098
  "SA": {
3099
  "accuracy": 1.0,
3100
- "count": 100
3101
  },
3102
  "SC": {
3103
  "accuracy": 1.0,
3104
- "count": 100
3105
  },
3106
  "UC": {
3107
- "accuracy": 0.85,
3108
- "count": 100
3109
  },
3110
  "US": {
3111
  "accuracy": 1.0,
3112
- "count": 400
3113
  }
3114
  }
3115
  },
3116
  "add_S6": {
3117
  "full_accuracy": 1.0,
3118
- "n_examples": 100,
 
3119
  "per_subtask": {
3120
  "SC": {
3121
  "accuracy": 1.0,
3122
- "count": 100
3123
  },
3124
  "UC": {
3125
  "accuracy": 1.0,
3126
- "count": 100
3127
  },
3128
  "US": {
3129
  "accuracy": 1.0,
3130
- "count": 500
3131
  }
3132
  }
3133
  },
3134
  "add_random": {
3135
  "full_accuracy": 1.0,
 
3136
  "n_examples": 200,
3137
  "per_subtask": {
3138
  "SA": {
3139
  "accuracy": 1.0,
3140
- "count": 447
3141
  },
3142
  "SC": {
3143
  "accuracy": 1.0,
3144
- "count": 320
3145
  },
3146
  "SS": {
3147
  "accuracy": 1.0,
3148
- "count": 56
3149
  },
3150
  "UC": {
3151
  "accuracy": 1.0,
3152
- "count": 529
3153
  },
3154
  "US": {
3155
  "accuracy": 1.0,
3156
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3157
  }
3158
  }
3159
  },
3160
  "add_C3": {
3161
  "full_accuracy": 1.0,
3162
- "n_examples": 100,
 
3163
  "per_subtask": {
3164
  "SA": {
3165
  "accuracy": 1.0,
3166
- "count": 300
3167
  },
3168
  "SC": {
3169
  "accuracy": 1.0,
3170
- "count": 100
3171
  },
3172
  "UC": {
3173
  "accuracy": 1.0,
3174
- "count": 193
3175
  },
3176
  "US": {
3177
  "accuracy": 1.0,
3178
- "count": 107
3179
  }
3180
  }
3181
  },
3182
  "add_C4": {
3183
  "full_accuracy": 1.0,
3184
- "n_examples": 100,
 
3185
  "per_subtask": {
3186
  "SA": {
3187
  "accuracy": 1.0,
3188
- "count": 200
3189
  },
3190
  "SC": {
3191
  "accuracy": 1.0,
3192
- "count": 100
3193
  },
3194
  "UC": {
3195
  "accuracy": 1.0,
3196
- "count": 256
3197
  },
3198
  "US": {
3199
  "accuracy": 1.0,
3200
- "count": 144
3201
  }
3202
  }
3203
  },
3204
  "add_C5": {
3205
  "full_accuracy": 1.0,
3206
- "n_examples": 100,
 
3207
  "per_subtask": {
3208
  "SA": {
3209
  "accuracy": 1.0,
3210
- "count": 100
3211
  },
3212
  "SC": {
3213
  "accuracy": 1.0,
3214
- "count": 100
3215
  },
3216
  "UC": {
3217
  "accuracy": 1.0,
3218
- "count": 306
3219
  },
3220
  "US": {
3221
  "accuracy": 1.0,
3222
- "count": 194
3223
  }
3224
  }
3225
  },
3226
  "add_C6": {
3227
  "full_accuracy": 1.0,
3228
- "n_examples": 100,
 
3229
  "per_subtask": {
3230
  "SC": {
3231
  "accuracy": 1.0,
3232
- "count": 100
3233
  },
3234
  "UC": {
3235
  "accuracy": 1.0,
3236
- "count": 366
3237
  },
3238
  "US": {
3239
  "accuracy": 1.0,
3240
- "count": 234
3241
  }
3242
  }
3243
  },
3244
  "sub_M0": {
3245
  "full_accuracy": 1.0,
3246
- "n_examples": 100,
 
3247
  "per_subtask": {
3248
  "MD": {
3249
  "accuracy": 1.0,
3250
- "count": 601
3251
  },
3252
  "ME": {
3253
  "accuracy": 1.0,
3254
- "count": 99
3255
  }
3256
  }
3257
  },
3258
  "sub_M1": {
3259
  "full_accuracy": 1.0,
3260
- "n_examples": 100,
 
3261
  "per_subtask": {
3262
  "MD": {
3263
  "accuracy": 1.0,
3264
- "count": 279
3265
  },
3266
  "MB": {
3267
  "accuracy": 1.0,
3268
- "count": 145
3269
  },
3270
  "ME": {
3271
  "accuracy": 1.0,
3272
- "count": 24
3273
  },
3274
  "UB": {
3275
  "accuracy": 1.0,
3276
- "count": 252
3277
  }
3278
  }
3279
  },
3280
  "sub_M2": {
3281
  "full_accuracy": 1.0,
3282
- "n_examples": 100,
 
3283
  "per_subtask": {
3284
  "MD": {
3285
  "accuracy": 1.0,
3286
- "count": 213
3287
  },
3288
  "MB": {
3289
  "accuracy": 1.0,
3290
- "count": 113
3291
  },
3292
  "ME": {
3293
  "accuracy": 1.0,
3294
- "count": 85
3295
  },
3296
  "UB": {
3297
  "accuracy": 1.0,
3298
- "count": 181
3299
  },
3300
  "UD": {
3301
  "accuracy": 1.0,
3302
- "count": 108
3303
  }
3304
  }
3305
  },
3306
  "sub_M3": {
3307
  "full_accuracy": 1.0,
3308
- "n_examples": 100,
 
3309
  "per_subtask": {
3310
  "MD": {
3311
  "accuracy": 1.0,
3312
- "count": 179
3313
  },
3314
  "MB": {
3315
  "accuracy": 1.0,
3316
- "count": 103
3317
  },
3318
  "ME": {
3319
  "accuracy": 1.0,
3320
- "count": 56
3321
  },
3322
  "UB": {
3323
  "accuracy": 1.0,
3324
- "count": 149
3325
  },
3326
  "UD": {
3327
  "accuracy": 1.0,
3328
- "count": 213
3329
  }
3330
  }
3331
  },
3332
  "sub_M4": {
3333
  "full_accuracy": 1.0,
3334
- "n_examples": 100,
 
3335
  "per_subtask": {
3336
  "MD": {
3337
  "accuracy": 1.0,
3338
- "count": 200
3339
  },
3340
  "MB": {
3341
  "accuracy": 1.0,
3342
- "count": 100
3343
  },
3344
  "UB": {
3345
  "accuracy": 1.0,
3346
- "count": 100
3347
  },
3348
  "UD": {
3349
  "accuracy": 1.0,
3350
- "count": 300
3351
  }
3352
  }
3353
  },
3354
  "sub_M5": {
3355
- "full_accuracy": 0.99,
3356
- "n_examples": 100,
 
3357
  "per_subtask": {
3358
  "MD": {
3359
  "accuracy": 1.0,
3360
- "count": 100
3361
  },
3362
  "MB": {
3363
  "accuracy": 1.0,
3364
- "count": 100
3365
  },
3366
  "UB": {
3367
- "accuracy": 0.99,
3368
- "count": 100
3369
  },
3370
  "UD": {
3371
  "accuracy": 1.0,
3372
- "count": 400
3373
  }
3374
  }
3375
  },
3376
  "sub_random": {
3377
  "full_accuracy": 1.0,
 
3378
  "n_examples": 200,
3379
  "per_subtask": {
3380
  "MD": {
3381
  "accuracy": 1.0,
3382
- "count": 600
3383
  },
3384
  "MB": {
3385
  "accuracy": 1.0,
3386
- "count": 267
3387
  },
3388
  "ME": {
3389
  "accuracy": 1.0,
@@ -3391,85 +3517,89 @@
3391
  },
3392
  "UB": {
3393
  "accuracy": 1.0,
3394
- "count": 439
3395
  },
3396
  "UD": {
3397
  "accuracy": 1.0,
3398
- "count": 41
3399
  }
3400
  }
3401
  },
3402
  "sub_B3": {
3403
  "full_accuracy": 1.0,
3404
- "n_examples": 100,
 
3405
  "per_subtask": {
3406
  "MD": {
3407
  "accuracy": 1.0,
3408
- "count": 300
3409
  },
3410
  "MB": {
3411
  "accuracy": 1.0,
3412
- "count": 100
3413
  },
3414
  "UB": {
3415
  "accuracy": 1.0,
3416
- "count": 197
3417
  },
3418
  "UD": {
3419
  "accuracy": 1.0,
3420
- "count": 103
3421
  }
3422
  }
3423
  },
3424
  "sub_B4": {
3425
  "full_accuracy": 1.0,
3426
- "n_examples": 100,
 
3427
  "per_subtask": {
3428
  "MD": {
3429
  "accuracy": 1.0,
3430
- "count": 200
3431
  },
3432
  "MB": {
3433
  "accuracy": 1.0,
3434
- "count": 100
3435
  },
3436
  "UB": {
3437
  "accuracy": 1.0,
3438
- "count": 247
3439
  },
3440
  "UD": {
3441
  "accuracy": 1.0,
3442
- "count": 153
3443
  }
3444
  }
3445
  },
3446
  "sub_B5": {
3447
- "full_accuracy": 0.99,
3448
- "n_examples": 100,
 
3449
  "per_subtask": {
3450
  "MD": {
3451
  "accuracy": 1.0,
3452
- "count": 100
3453
  },
3454
  "MB": {
3455
  "accuracy": 1.0,
3456
- "count": 100
3457
  },
3458
  "UB": {
3459
- "accuracy": 0.9966442953020134,
3460
- "count": 298
3461
  },
3462
  "UD": {
3463
  "accuracy": 1.0,
3464
- "count": 202
3465
  }
3466
  }
3467
  }
3468
  },
3469
  "summary": {
3470
- "overall_accuracy": 0.9929166666666667,
3471
- "total_examples": 2400,
3472
- "n_splits": 22
 
3473
  }
3474
  },
3475
  "sorl_overall_accuracy": 0.9929166666666667,
 
2470
  "K": null,
2471
  "mode": "sft",
2472
  "n_digits": 6,
2473
+ "n_per_split": 50
2474
  },
2475
  "splits": {
2476
  "add_S0": {
2477
+ "full_accuracy": 0.94,
2478
+ "digit_accuracy": 0.9885714285714285,
2479
+ "n_examples": 50,
2480
  "per_subtask": {
2481
  "SA": {
2482
+ "accuracy": 0.9932203389830508,
2483
+ "count": 295
2484
  },
2485
  "SS": {
2486
+ "accuracy": 0.9636363636363636,
2487
+ "count": 55
2488
  }
2489
  }
2490
  },
2491
  "add_S1": {
2492
+ "full_accuracy": 0.84,
2493
+ "digit_accuracy": 0.9771428571428571,
2494
+ "n_examples": 50,
2495
  "per_subtask": {
2496
  "SA": {
2497
+ "accuracy": 0.9761904761904762,
2498
+ "count": 126
2499
  },
2500
  "SC": {
2501
+ "accuracy": 0.9746835443037974,
2502
+ "count": 79
2503
  },
2504
  "SS": {
2505
  "accuracy": 1.0,
2506
+ "count": 21
2507
  },
2508
  "UC": {
2509
+ "accuracy": 0.9758064516129032,
2510
+ "count": 124
2511
  }
2512
  }
2513
  },
2514
  "add_S2": {
2515
+ "full_accuracy": 0.5,
2516
+ "digit_accuracy": 0.9,
2517
+ "n_examples": 50,
2518
  "per_subtask": {
2519
  "SA": {
2520
+ "accuracy": 0.9733333333333334,
2521
+ "count": 75
2522
  },
2523
  "SC": {
2524
+ "accuracy": 0.8870967741935484,
2525
+ "count": 62
2526
  },
2527
  "SS": {
2528
+ "accuracy": 0.8461538461538461,
2529
+ "count": 39
2530
  },
2531
  "UC": {
2532
+ "accuracy": 0.8468468468468469,
2533
+ "count": 111
2534
  },
2535
  "US": {
2536
+ "accuracy": 0.9523809523809523,
2537
+ "count": 63
2538
  }
2539
  }
2540
  },
2541
  "add_S3": {
2542
+ "full_accuracy": 0.54,
2543
+ "digit_accuracy": 0.8971428571428571,
2544
+ "n_examples": 50,
2545
  "per_subtask": {
2546
  "SA": {
2547
+ "accuracy": 0.9833333333333333,
2548
+ "count": 60
2549
  },
2550
  "SC": {
2551
+ "accuracy": 1.0,
2552
+ "count": 57
2553
  },
2554
  "SS": {
2555
+ "accuracy": 0.9473684210526315,
2556
+ "count": 19
2557
  },
2558
  "UC": {
2559
+ "accuracy": 0.8365384615384616,
2560
+ "count": 104
2561
  },
2562
  "US": {
2563
+ "accuracy": 0.8454545454545455,
2564
+ "count": 110
2565
  }
2566
  }
2567
  },
2568
  "add_S4": {
2569
+ "full_accuracy": 0.52,
2570
+ "digit_accuracy": 0.8342857142857143,
2571
+ "n_examples": 50,
2572
  "per_subtask": {
2573
  "SA": {
2574
  "accuracy": 1.0,
2575
+ "count": 48
2576
  },
2577
  "SC": {
2578
  "accuracy": 1.0,
2579
+ "count": 52
2580
  },
2581
  "SS": {
2582
  "accuracy": 1.0,
2583
+ "count": 7
2584
  },
2585
  "UC": {
2586
+ "accuracy": 0.7415730337078652,
2587
+ "count": 89
2588
  },
2589
  "US": {
2590
+ "accuracy": 0.7727272727272727,
2591
+ "count": 154
2592
  }
2593
  }
2594
  },
2595
  "add_S5": {
2596
+ "full_accuracy": 0.26,
2597
+ "digit_accuracy": 0.6914285714285714,
2598
+ "n_examples": 50,
2599
  "per_subtask": {
2600
  "SA": {
2601
  "accuracy": 1.0,
2602
+ "count": 50
2603
  },
2604
  "SC": {
2605
  "accuracy": 1.0,
2606
+ "count": 50
2607
  },
2608
  "UC": {
2609
+ "accuracy": 0.58,
2610
+ "count": 50
2611
  },
2612
  "US": {
2613
+ "accuracy": 0.565,
2614
+ "count": 200
2615
  }
2616
  }
2617
  },
2618
  "add_S6": {
2619
+ "full_accuracy": 0.44,
2620
+ "digit_accuracy": 0.68,
2621
+ "n_examples": 50,
2622
  "per_subtask": {
2623
  "SC": {
2624
  "accuracy": 1.0,
2625
+ "count": 50
2626
  },
2627
  "UC": {
2628
+ "accuracy": 0.44,
2629
+ "count": 50
2630
  },
2631
  "US": {
2632
+ "accuracy": 0.664,
2633
+ "count": 250
2634
  }
2635
  }
2636
  },
2637
  "add_random": {
2638
+ "full_accuracy": 0.88,
2639
+ "digit_accuracy": 0.9792857142857143,
2640
  "n_examples": 200,
2641
  "per_subtask": {
2642
  "SA": {
2643
+ "accuracy": 0.9930394431554525,
2644
+ "count": 431
2645
  },
2646
  "SC": {
2647
+ "accuracy": 0.9778481012658228,
2648
+ "count": 316
2649
  },
2650
  "SS": {
2651
+ "accuracy": 0.9487179487179487,
2652
+ "count": 39
2653
  },
2654
  "UC": {
2655
+ "accuracy": 0.975,
2656
+ "count": 560
2657
  },
2658
  "US": {
2659
+ "accuracy": 0.9444444444444444,
2660
+ "count": 54
2661
+ }
2662
+ }
2663
+ },
2664
+ "add_C1": {
2665
+ "full_accuracy": 0.9,
2666
+ "digit_accuracy": 0.9857142857142858,
2667
+ "n_examples": 50,
2668
+ "per_subtask": {
2669
+ "SA": {
2670
+ "accuracy": 1.0,
2671
+ "count": 250
2672
+ },
2673
+ "SC": {
2674
+ "accuracy": 0.98,
2675
+ "count": 50
2676
+ },
2677
+ "UC": {
2678
+ "accuracy": 0.92,
2679
+ "count": 50
2680
+ }
2681
+ }
2682
+ },
2683
+ "add_C2": {
2684
+ "full_accuracy": 0.9,
2685
+ "digit_accuracy": 0.98,
2686
+ "n_examples": 50,
2687
+ "per_subtask": {
2688
+ "SA": {
2689
+ "accuracy": 0.995,
2690
+ "count": 200
2691
+ },
2692
+ "SC": {
2693
+ "accuracy": 1.0,
2694
+ "count": 50
2695
+ },
2696
+ "UC": {
2697
+ "accuracy": 0.9518072289156626,
2698
+ "count": 83
2699
+ },
2700
+ "US": {
2701
+ "accuracy": 0.8823529411764706,
2702
+ "count": 17
2703
  }
2704
  }
2705
  },
2706
  "add_C3": {
2707
+ "full_accuracy": 0.66,
2708
+ "digit_accuracy": 0.9285714285714286,
2709
+ "n_examples": 50,
2710
  "per_subtask": {
2711
  "SA": {
2712
  "accuracy": 1.0,
2713
+ "count": 150
2714
  },
2715
  "SC": {
2716
  "accuracy": 1.0,
2717
+ "count": 50
2718
  },
2719
  "UC": {
2720
+ "accuracy": 0.86,
2721
+ "count": 100
2722
  },
2723
  "US": {
2724
+ "accuracy": 0.78,
2725
+ "count": 50
2726
  }
2727
  }
2728
  },
2729
  "add_C4": {
2730
+ "full_accuracy": 0.64,
2731
+ "digit_accuracy": 0.94,
2732
+ "n_examples": 50,
2733
  "per_subtask": {
2734
  "SA": {
2735
  "accuracy": 1.0,
2736
+ "count": 100
2737
  },
2738
  "SC": {
2739
+ "accuracy": 1.0,
2740
+ "count": 50
2741
  },
2742
  "UC": {
2743
+ "accuracy": 0.8787878787878788,
2744
+ "count": 132
2745
  },
2746
  "US": {
2747
+ "accuracy": 0.9264705882352942,
2748
+ "count": 68
2749
  }
2750
  }
2751
  },
2752
  "add_C5": {
2753
+ "full_accuracy": 0.64,
2754
+ "digit_accuracy": 0.9142857142857143,
2755
+ "n_examples": 50,
2756
  "per_subtask": {
2757
  "SA": {
2758
  "accuracy": 1.0,
2759
+ "count": 50
2760
  },
2761
  "SC": {
2762
  "accuracy": 1.0,
2763
+ "count": 50
2764
  },
2765
  "UC": {
2766
+ "accuracy": 0.8835616438356164,
2767
+ "count": 146
2768
  },
2769
  "US": {
2770
+ "accuracy": 0.875,
2771
+ "count": 104
2772
  }
2773
  }
2774
  },
2775
  "add_C6": {
2776
+ "full_accuracy": 0.78,
2777
+ "digit_accuracy": 0.94,
2778
+ "n_examples": 50,
2779
  "per_subtask": {
2780
  "SC": {
2781
  "accuracy": 1.0,
2782
+ "count": 50
2783
  },
2784
  "UC": {
2785
+ "accuracy": 0.9470899470899471,
2786
+ "count": 189
2787
  },
2788
  "US": {
2789
+ "accuracy": 0.9009009009009009,
2790
+ "count": 111
2791
  }
2792
  }
2793
  },
2794
  "sub_M0": {
2795
+ "full_accuracy": 0.92,
2796
+ "digit_accuracy": 0.9885714285714285,
2797
+ "n_examples": 50,
2798
  "per_subtask": {
2799
  "MD": {
2800
+ "accuracy": 0.9867986798679867,
2801
+ "count": 303
2802
  },
2803
  "ME": {
2804
  "accuracy": 1.0,
2805
+ "count": 47
2806
  }
2807
  }
2808
  },
2809
  "sub_M1": {
2810
+ "full_accuracy": 0.94,
2811
+ "digit_accuracy": 0.9885714285714285,
2812
+ "n_examples": 50,
2813
  "per_subtask": {
2814
  "MD": {
2815
+ "accuracy": 1.0,
2816
+ "count": 141
2817
  },
2818
  "MB": {
2819
+ "accuracy": 0.9722222222222222,
2820
+ "count": 72
2821
  },
2822
  "ME": {
2823
+ "accuracy": 0.9444444444444444,
2824
+ "count": 18
2825
  },
2826
  "UB": {
2827
+ "accuracy": 0.9915966386554622,
2828
+ "count": 119
2829
  }
2830
  }
2831
  },
2832
  "sub_M2": {
2833
+ "full_accuracy": 0.62,
2834
+ "digit_accuracy": 0.9371428571428572,
2835
+ "n_examples": 50,
2836
  "per_subtask": {
2837
  "MD": {
2838
  "accuracy": 1.0,
2839
+ "count": 112
2840
  },
2841
  "MB": {
2842
+ "accuracy": 0.9622641509433962,
2843
+ "count": 53
2844
  },
2845
  "ME": {
2846
+ "accuracy": 1.0,
2847
+ "count": 47
2848
  },
2849
  "UB": {
2850
+ "accuracy": 0.8,
2851
+ "count": 85
2852
  },
2853
  "UD": {
2854
+ "accuracy": 0.9433962264150944,
2855
+ "count": 53
2856
  }
2857
  }
2858
  },
2859
  "sub_M3": {
2860
+ "full_accuracy": 0.28,
2861
+ "digit_accuracy": 0.8514285714285714,
2862
+ "n_examples": 50,
2863
  "per_subtask": {
2864
  "MD": {
2865
+ "accuracy": 0.9896907216494846,
2866
+ "count": 97
2867
  },
2868
  "MB": {
2869
  "accuracy": 1.0,
2870
+ "count": 51
2871
  },
2872
  "ME": {
2873
  "accuracy": 1.0,
2874
+ "count": 27
2875
  },
2876
  "UB": {
2877
+ "accuracy": 0.5945945945945946,
2878
+ "count": 74
2879
  },
2880
  "UD": {
2881
+ "accuracy": 0.7920792079207921,
2882
+ "count": 101
2883
  }
2884
  }
2885
  },
2886
  "sub_M4": {
2887
+ "full_accuracy": 0.28,
2888
+ "digit_accuracy": 0.78,
2889
+ "n_examples": 50,
2890
  "per_subtask": {
2891
  "MD": {
2892
  "accuracy": 1.0,
2893
+ "count": 100
2894
  },
2895
  "MB": {
2896
+ "accuracy": 1.0,
2897
+ "count": 50
2898
  },
2899
  "UB": {
2900
+ "accuracy": 0.34,
2901
+ "count": 50
2902
  },
2903
  "UD": {
2904
  "accuracy": 0.7066666666666667,
2905
+ "count": 150
2906
  }
2907
  }
2908
  },
2909
  "sub_M5": {
2910
+ "full_accuracy": 0.28,
2911
+ "digit_accuracy": 0.7771428571428571,
2912
+ "n_examples": 50,
2913
  "per_subtask": {
2914
  "MD": {
2915
  "accuracy": 1.0,
2916
+ "count": 50
2917
  },
2918
  "MB": {
2919
  "accuracy": 1.0,
2920
+ "count": 50
2921
  },
2922
  "UB": {
2923
+ "accuracy": 0.36,
2924
+ "count": 50
2925
  },
2926
  "UD": {
2927
+ "accuracy": 0.77,
2928
+ "count": 200
2929
  }
2930
  }
2931
  },
2932
  "sub_random": {
2933
+ "full_accuracy": 0.9,
2934
+ "digit_accuracy": 0.985,
2935
  "n_examples": 200,
2936
  "per_subtask": {
2937
  "MD": {
2938
+ "accuracy": 0.9947368421052631,
2939
+ "count": 570
2940
  },
2941
  "MB": {
2942
+ "accuracy": 0.9819494584837545,
2943
+ "count": 277
2944
  },
2945
  "ME": {
2946
+ "accuracy": 1.0,
2947
  "count": 53
2948
  },
2949
  "UB": {
2950
+ "accuracy": 0.9745222929936306,
2951
+ "count": 471
2952
  },
2953
  "UD": {
2954
+ "accuracy": 0.9655172413793104,
2955
+ "count": 29
2956
  }
2957
  }
2958
  },
2959
  "sub_B3": {
2960
+ "full_accuracy": 0.66,
2961
+ "digit_accuracy": 0.9257142857142857,
2962
+ "n_examples": 50,
2963
  "per_subtask": {
2964
  "MD": {
2965
+ "accuracy": 0.9933333333333333,
2966
+ "count": 150
2967
  },
2968
  "MB": {
2969
  "accuracy": 1.0,
2970
+ "count": 50
2971
  },
2972
  "UB": {
2973
+ "accuracy": 0.8613861386138614,
2974
+ "count": 101
2975
  },
2976
  "UD": {
2977
+ "accuracy": 0.7755102040816326,
2978
+ "count": 49
2979
  }
2980
  }
2981
  },
2982
  "sub_B4": {
2983
+ "full_accuracy": 0.46,
2984
+ "digit_accuracy": 0.8885714285714286,
2985
+ "n_examples": 50,
2986
  "per_subtask": {
2987
  "MD": {
2988
  "accuracy": 1.0,
2989
+ "count": 100
2990
  },
2991
  "MB": {
2992
  "accuracy": 1.0,
2993
+ "count": 50
2994
  },
2995
  "UB": {
2996
+ "accuracy": 0.7768595041322314,
2997
+ "count": 121
2998
  },
2999
  "UD": {
3000
+ "accuracy": 0.8481012658227848,
3001
+ "count": 79
3002
  }
3003
  }
3004
  },
3005
  "sub_B5": {
3006
+ "full_accuracy": 0.62,
3007
+ "digit_accuracy": 0.9314285714285714,
3008
+ "n_examples": 50,
3009
  "per_subtask": {
3010
  "MD": {
3011
  "accuracy": 1.0,
3012
+ "count": 50
3013
  },
3014
  "MB": {
3015
  "accuracy": 1.0,
3016
+ "count": 50
3017
  },
3018
  "UB": {
3019
+ "accuracy": 0.8881578947368421,
3020
+ "count": 152
3021
  },
3022
  "UD": {
3023
+ "accuracy": 0.9285714285714286,
3024
+ "count": 98
3025
  }
3026
  }
3027
  }
3028
  },
3029
  "summary": {
3030
+ "overall_accuracy": 0.6913333333333334,
3031
+ "digit_accuracy": 0.9194285714285715,
3032
+ "total_examples": 1500,
3033
+ "n_splits": 24
3034
  }
3035
  },
3036
  "sorl_eval": {
 
3039
  "K": 4,
3040
  "mode": "sorl",
3041
  "n_digits": 6,
3042
+ "n_per_split": 50
3043
  },
3044
  "splits": {
3045
  "add_S0": {
3046
  "full_accuracy": 1.0,
3047
+ "digit_accuracy": 1.0,
3048
+ "n_examples": 50,
3049
  "per_subtask": {
3050
  "SA": {
3051
  "accuracy": 1.0,
3052
+ "count": 295
3053
  },
3054
  "SS": {
3055
  "accuracy": 1.0,
3056
+ "count": 55
3057
  }
3058
  }
3059
  },
3060
  "add_S1": {
3061
  "full_accuracy": 1.0,
3062
+ "digit_accuracy": 1.0,
3063
+ "n_examples": 50,
3064
  "per_subtask": {
3065
  "SA": {
3066
  "accuracy": 1.0,
3067
+ "count": 126
3068
  },
3069
  "SC": {
3070
  "accuracy": 1.0,
3071
+ "count": 79
3072
  },
3073
  "SS": {
3074
  "accuracy": 1.0,
3075
+ "count": 21
3076
  },
3077
  "UC": {
3078
  "accuracy": 1.0,
3079
+ "count": 124
3080
  }
3081
  }
3082
  },
3083
  "add_S2": {
3084
  "full_accuracy": 1.0,
3085
+ "digit_accuracy": 1.0,
3086
+ "n_examples": 50,
3087
  "per_subtask": {
3088
  "SA": {
3089
  "accuracy": 1.0,
3090
+ "count": 75
3091
  },
3092
  "SC": {
3093
  "accuracy": 1.0,
3094
+ "count": 62
3095
  },
3096
  "SS": {
3097
  "accuracy": 1.0,
3098
+ "count": 39
3099
  },
3100
  "UC": {
3101
  "accuracy": 1.0,
3102
+ "count": 111
3103
  },
3104
  "US": {
3105
  "accuracy": 1.0,
3106
+ "count": 63
3107
  }
3108
  }
3109
  },
3110
  "add_S3": {
3111
  "full_accuracy": 1.0,
3112
+ "digit_accuracy": 1.0,
3113
+ "n_examples": 50,
3114
  "per_subtask": {
3115
  "SA": {
3116
  "accuracy": 1.0,
3117
+ "count": 60
3118
  },
3119
  "SC": {
3120
  "accuracy": 1.0,
3121
+ "count": 57
3122
  },
3123
  "SS": {
3124
  "accuracy": 1.0,
3125
+ "count": 19
3126
  },
3127
  "UC": {
3128
  "accuracy": 1.0,
3129
+ "count": 104
3130
  },
3131
  "US": {
3132
  "accuracy": 1.0,
3133
+ "count": 110
3134
  }
3135
  }
3136
  },
3137
  "add_S4": {
3138
  "full_accuracy": 1.0,
3139
+ "digit_accuracy": 1.0,
3140
+ "n_examples": 50,
3141
  "per_subtask": {
3142
  "SA": {
3143
  "accuracy": 1.0,
3144
+ "count": 48
3145
  },
3146
  "SC": {
3147
  "accuracy": 1.0,
3148
+ "count": 52
3149
  },
3150
  "SS": {
3151
  "accuracy": 1.0,
3152
+ "count": 7
3153
  },
3154
  "UC": {
3155
  "accuracy": 1.0,
3156
+ "count": 89
3157
  },
3158
  "US": {
3159
  "accuracy": 1.0,
3160
+ "count": 154
3161
  }
3162
  }
3163
  },
3164
  "add_S5": {
3165
+ "full_accuracy": 0.78,
3166
+ "digit_accuracy": 0.9685714285714285,
3167
+ "n_examples": 50,
3168
  "per_subtask": {
3169
  "SA": {
3170
  "accuracy": 1.0,
3171
+ "count": 50
3172
  },
3173
  "SC": {
3174
  "accuracy": 1.0,
3175
+ "count": 50
3176
  },
3177
  "UC": {
3178
+ "accuracy": 0.78,
3179
+ "count": 50
3180
  },
3181
  "US": {
3182
  "accuracy": 1.0,
3183
+ "count": 200
3184
  }
3185
  }
3186
  },
3187
  "add_S6": {
3188
  "full_accuracy": 1.0,
3189
+ "digit_accuracy": 1.0,
3190
+ "n_examples": 50,
3191
  "per_subtask": {
3192
  "SC": {
3193
  "accuracy": 1.0,
3194
+ "count": 50
3195
  },
3196
  "UC": {
3197
  "accuracy": 1.0,
3198
+ "count": 50
3199
  },
3200
  "US": {
3201
  "accuracy": 1.0,
3202
+ "count": 250
3203
  }
3204
  }
3205
  },
3206
  "add_random": {
3207
  "full_accuracy": 1.0,
3208
+ "digit_accuracy": 1.0,
3209
  "n_examples": 200,
3210
  "per_subtask": {
3211
  "SA": {
3212
  "accuracy": 1.0,
3213
+ "count": 431
3214
  },
3215
  "SC": {
3216
  "accuracy": 1.0,
3217
+ "count": 316
3218
  },
3219
  "SS": {
3220
  "accuracy": 1.0,
3221
+ "count": 39
3222
  },
3223
  "UC": {
3224
  "accuracy": 1.0,
3225
+ "count": 560
3226
  },
3227
  "US": {
3228
  "accuracy": 1.0,
3229
+ "count": 54
3230
+ }
3231
+ }
3232
+ },
3233
+ "add_C1": {
3234
+ "full_accuracy": 1.0,
3235
+ "digit_accuracy": 1.0,
3236
+ "n_examples": 50,
3237
+ "per_subtask": {
3238
+ "SA": {
3239
+ "accuracy": 1.0,
3240
+ "count": 250
3241
+ },
3242
+ "SC": {
3243
+ "accuracy": 1.0,
3244
+ "count": 50
3245
+ },
3246
+ "UC": {
3247
+ "accuracy": 1.0,
3248
+ "count": 50
3249
+ }
3250
+ }
3251
+ },
3252
+ "add_C2": {
3253
+ "full_accuracy": 1.0,
3254
+ "digit_accuracy": 1.0,
3255
+ "n_examples": 50,
3256
+ "per_subtask": {
3257
+ "SA": {
3258
+ "accuracy": 1.0,
3259
+ "count": 200
3260
+ },
3261
+ "SC": {
3262
+ "accuracy": 1.0,
3263
+ "count": 50
3264
+ },
3265
+ "UC": {
3266
+ "accuracy": 1.0,
3267
+ "count": 83
3268
+ },
3269
+ "US": {
3270
+ "accuracy": 1.0,
3271
+ "count": 17
3272
  }
3273
  }
3274
  },
3275
  "add_C3": {
3276
  "full_accuracy": 1.0,
3277
+ "digit_accuracy": 1.0,
3278
+ "n_examples": 50,
3279
  "per_subtask": {
3280
  "SA": {
3281
  "accuracy": 1.0,
3282
+ "count": 150
3283
  },
3284
  "SC": {
3285
  "accuracy": 1.0,
3286
+ "count": 50
3287
  },
3288
  "UC": {
3289
  "accuracy": 1.0,
3290
+ "count": 100
3291
  },
3292
  "US": {
3293
  "accuracy": 1.0,
3294
+ "count": 50
3295
  }
3296
  }
3297
  },
3298
  "add_C4": {
3299
  "full_accuracy": 1.0,
3300
+ "digit_accuracy": 1.0,
3301
+ "n_examples": 50,
3302
  "per_subtask": {
3303
  "SA": {
3304
  "accuracy": 1.0,
3305
+ "count": 100
3306
  },
3307
  "SC": {
3308
  "accuracy": 1.0,
3309
+ "count": 50
3310
  },
3311
  "UC": {
3312
  "accuracy": 1.0,
3313
+ "count": 132
3314
  },
3315
  "US": {
3316
  "accuracy": 1.0,
3317
+ "count": 68
3318
  }
3319
  }
3320
  },
3321
  "add_C5": {
3322
  "full_accuracy": 1.0,
3323
+ "digit_accuracy": 1.0,
3324
+ "n_examples": 50,
3325
  "per_subtask": {
3326
  "SA": {
3327
  "accuracy": 1.0,
3328
+ "count": 50
3329
  },
3330
  "SC": {
3331
  "accuracy": 1.0,
3332
+ "count": 50
3333
  },
3334
  "UC": {
3335
  "accuracy": 1.0,
3336
+ "count": 146
3337
  },
3338
  "US": {
3339
  "accuracy": 1.0,
3340
+ "count": 104
3341
  }
3342
  }
3343
  },
3344
  "add_C6": {
3345
  "full_accuracy": 1.0,
3346
+ "digit_accuracy": 1.0,
3347
+ "n_examples": 50,
3348
  "per_subtask": {
3349
  "SC": {
3350
  "accuracy": 1.0,
3351
+ "count": 50
3352
  },
3353
  "UC": {
3354
  "accuracy": 1.0,
3355
+ "count": 189
3356
  },
3357
  "US": {
3358
  "accuracy": 1.0,
3359
+ "count": 111
3360
  }
3361
  }
3362
  },
3363
  "sub_M0": {
3364
  "full_accuracy": 1.0,
3365
+ "digit_accuracy": 1.0,
3366
+ "n_examples": 50,
3367
  "per_subtask": {
3368
  "MD": {
3369
  "accuracy": 1.0,
3370
+ "count": 303
3371
  },
3372
  "ME": {
3373
  "accuracy": 1.0,
3374
+ "count": 47
3375
  }
3376
  }
3377
  },
3378
  "sub_M1": {
3379
  "full_accuracy": 1.0,
3380
+ "digit_accuracy": 1.0,
3381
+ "n_examples": 50,
3382
  "per_subtask": {
3383
  "MD": {
3384
  "accuracy": 1.0,
3385
+ "count": 141
3386
  },
3387
  "MB": {
3388
  "accuracy": 1.0,
3389
+ "count": 72
3390
  },
3391
  "ME": {
3392
  "accuracy": 1.0,
3393
+ "count": 18
3394
  },
3395
  "UB": {
3396
  "accuracy": 1.0,
3397
+ "count": 119
3398
  }
3399
  }
3400
  },
3401
  "sub_M2": {
3402
  "full_accuracy": 1.0,
3403
+ "digit_accuracy": 1.0,
3404
+ "n_examples": 50,
3405
  "per_subtask": {
3406
  "MD": {
3407
  "accuracy": 1.0,
3408
+ "count": 112
3409
  },
3410
  "MB": {
3411
  "accuracy": 1.0,
3412
+ "count": 53
3413
  },
3414
  "ME": {
3415
  "accuracy": 1.0,
3416
+ "count": 47
3417
  },
3418
  "UB": {
3419
  "accuracy": 1.0,
3420
+ "count": 85
3421
  },
3422
  "UD": {
3423
  "accuracy": 1.0,
3424
+ "count": 53
3425
  }
3426
  }
3427
  },
3428
  "sub_M3": {
3429
  "full_accuracy": 1.0,
3430
+ "digit_accuracy": 1.0,
3431
+ "n_examples": 50,
3432
  "per_subtask": {
3433
  "MD": {
3434
  "accuracy": 1.0,
3435
+ "count": 97
3436
  },
3437
  "MB": {
3438
  "accuracy": 1.0,
3439
+ "count": 51
3440
  },
3441
  "ME": {
3442
  "accuracy": 1.0,
3443
+ "count": 27
3444
  },
3445
  "UB": {
3446
  "accuracy": 1.0,
3447
+ "count": 74
3448
  },
3449
  "UD": {
3450
  "accuracy": 1.0,
3451
+ "count": 101
3452
  }
3453
  }
3454
  },
3455
  "sub_M4": {
3456
  "full_accuracy": 1.0,
3457
+ "digit_accuracy": 1.0,
3458
+ "n_examples": 50,
3459
  "per_subtask": {
3460
  "MD": {
3461
  "accuracy": 1.0,
3462
+ "count": 100
3463
  },
3464
  "MB": {
3465
  "accuracy": 1.0,
3466
+ "count": 50
3467
  },
3468
  "UB": {
3469
  "accuracy": 1.0,
3470
+ "count": 50
3471
  },
3472
  "UD": {
3473
  "accuracy": 1.0,
3474
+ "count": 150
3475
  }
3476
  }
3477
  },
3478
  "sub_M5": {
3479
+ "full_accuracy": 0.98,
3480
+ "digit_accuracy": 0.9971428571428571,
3481
+ "n_examples": 50,
3482
  "per_subtask": {
3483
  "MD": {
3484
  "accuracy": 1.0,
3485
+ "count": 50
3486
  },
3487
  "MB": {
3488
  "accuracy": 1.0,
3489
+ "count": 50
3490
  },
3491
  "UB": {
3492
+ "accuracy": 0.98,
3493
+ "count": 50
3494
  },
3495
  "UD": {
3496
  "accuracy": 1.0,
3497
+ "count": 200
3498
  }
3499
  }
3500
  },
3501
  "sub_random": {
3502
  "full_accuracy": 1.0,
3503
+ "digit_accuracy": 1.0,
3504
  "n_examples": 200,
3505
  "per_subtask": {
3506
  "MD": {
3507
  "accuracy": 1.0,
3508
+ "count": 570
3509
  },
3510
  "MB": {
3511
  "accuracy": 1.0,
3512
+ "count": 277
3513
  },
3514
  "ME": {
3515
  "accuracy": 1.0,
 
3517
  },
3518
  "UB": {
3519
  "accuracy": 1.0,
3520
+ "count": 471
3521
  },
3522
  "UD": {
3523
  "accuracy": 1.0,
3524
+ "count": 29
3525
  }
3526
  }
3527
  },
3528
  "sub_B3": {
3529
  "full_accuracy": 1.0,
3530
+ "digit_accuracy": 1.0,
3531
+ "n_examples": 50,
3532
  "per_subtask": {
3533
  "MD": {
3534
  "accuracy": 1.0,
3535
+ "count": 150
3536
  },
3537
  "MB": {
3538
  "accuracy": 1.0,
3539
+ "count": 50
3540
  },
3541
  "UB": {
3542
  "accuracy": 1.0,
3543
+ "count": 101
3544
  },
3545
  "UD": {
3546
  "accuracy": 1.0,
3547
+ "count": 49
3548
  }
3549
  }
3550
  },
3551
  "sub_B4": {
3552
  "full_accuracy": 1.0,
3553
+ "digit_accuracy": 1.0,
3554
+ "n_examples": 50,
3555
  "per_subtask": {
3556
  "MD": {
3557
  "accuracy": 1.0,
3558
+ "count": 100
3559
  },
3560
  "MB": {
3561
  "accuracy": 1.0,
3562
+ "count": 50
3563
  },
3564
  "UB": {
3565
  "accuracy": 1.0,
3566
+ "count": 121
3567
  },
3568
  "UD": {
3569
  "accuracy": 1.0,
3570
+ "count": 79
3571
  }
3572
  }
3573
  },
3574
  "sub_B5": {
3575
+ "full_accuracy": 1.0,
3576
+ "digit_accuracy": 1.0,
3577
+ "n_examples": 50,
3578
  "per_subtask": {
3579
  "MD": {
3580
  "accuracy": 1.0,
3581
+ "count": 50
3582
  },
3583
  "MB": {
3584
  "accuracy": 1.0,
3585
+ "count": 50
3586
  },
3587
  "UB": {
3588
+ "accuracy": 1.0,
3589
+ "count": 152
3590
  },
3591
  "UD": {
3592
  "accuracy": 1.0,
3593
+ "count": 98
3594
  }
3595
  }
3596
  }
3597
  },
3598
  "summary": {
3599
+ "overall_accuracy": 0.992,
3600
+ "digit_accuracy": 0.9987619047619047,
3601
+ "total_examples": 1500,
3602
+ "n_splits": 24
3603
  }
3604
  },
3605
  "sorl_overall_accuracy": 0.9929166666666667,