amirali1985 commited on
Commit
b697f79
·
verified ·
1 Parent(s): 88582f0

Upload add_sub_sorl_v1_abs10_K1_25K_1L3H510d/metrics.json with huggingface_hub

Browse files
add_sub_sorl_v1_abs10_K1_25K_1L3H510d/metrics.json CHANGED
@@ -1190,502 +1190,567 @@
1190
  "K": null,
1191
  "mode": "sft",
1192
  "n_digits": 6,
1193
- "n_per_split": 100
1194
  },
1195
  "splits": {
1196
  "add_S0": {
1197
- "full_accuracy": 0.91,
1198
- "n_examples": 100,
 
1199
  "per_subtask": {
1200
  "SA": {
1201
- "accuracy": 0.9851239669421488,
1202
- "count": 605
1203
  },
1204
  "SS": {
1205
  "accuracy": 1.0,
1206
- "count": 95
1207
  }
1208
  }
1209
  },
1210
  "add_S1": {
1211
- "full_accuracy": 0.86,
1212
- "n_examples": 100,
 
1213
  "per_subtask": {
1214
  "SA": {
1215
- "accuracy": 0.9852941176470589,
1216
- "count": 204
1217
  },
1218
  "SC": {
1219
- "accuracy": 0.9822485207100592,
1220
- "count": 169
1221
  },
1222
  "SS": {
1223
- "accuracy": 1.0,
1224
- "count": 31
1225
  },
1226
  "UC": {
1227
- "accuracy": 0.9695945945945946,
1228
- "count": 296
1229
  }
1230
  }
1231
  },
1232
  "add_S2": {
1233
- "full_accuracy": 0.77,
1234
- "n_examples": 100,
 
1235
  "per_subtask": {
1236
  "SA": {
1237
- "accuracy": 1.0,
1238
- "count": 163
1239
  },
1240
  "SC": {
1241
- "accuracy": 0.9615384615384616,
1242
- "count": 130
1243
  },
1244
  "SS": {
1245
- "accuracy": 0.9770114942528736,
1246
- "count": 87
1247
  },
1248
  "UC": {
1249
- "accuracy": 0.9310344827586207,
1250
- "count": 203
1251
  },
1252
  "US": {
1253
- "accuracy": 0.9743589743589743,
1254
- "count": 117
1255
  }
1256
  }
1257
  },
1258
  "add_S3": {
1259
- "full_accuracy": 0.53,
1260
- "n_examples": 100,
 
1261
  "per_subtask": {
1262
  "SA": {
1263
- "accuracy": 0.9834710743801653,
1264
- "count": 121
1265
  },
1266
  "SC": {
1267
- "accuracy": 0.9669421487603306,
1268
- "count": 121
1269
  },
1270
  "SS": {
1271
  "accuracy": 1.0,
1272
- "count": 49
1273
  },
1274
  "UC": {
1275
- "accuracy": 0.8064516129032258,
1276
- "count": 186
1277
  },
1278
  "US": {
1279
- "accuracy": 0.9551569506726457,
1280
- "count": 223
1281
  }
1282
  }
1283
  },
1284
  "add_S4": {
1285
- "full_accuracy": 0.4,
1286
- "n_examples": 100,
 
1287
  "per_subtask": {
1288
  "SA": {
1289
  "accuracy": 1.0,
1290
- "count": 104
1291
  },
1292
  "SC": {
1293
- "accuracy": 0.9716981132075472,
1294
- "count": 106
1295
  },
1296
  "SS": {
1297
  "accuracy": 1.0,
1298
- "count": 23
1299
  },
1300
  "UC": {
1301
- "accuracy": 0.8125,
1302
- "count": 160
1303
  },
1304
  "US": {
1305
- "accuracy": 0.8045602605863192,
1306
- "count": 307
1307
  }
1308
  }
1309
  },
1310
  "add_S5": {
1311
- "full_accuracy": 0.27,
1312
- "n_examples": 100,
 
1313
  "per_subtask": {
1314
  "SA": {
1315
  "accuracy": 1.0,
1316
- "count": 100
1317
  },
1318
  "SC": {
1319
  "accuracy": 1.0,
1320
- "count": 100
1321
  },
1322
  "UC": {
1323
- "accuracy": 0.58,
1324
- "count": 100
1325
  },
1326
  "US": {
1327
- "accuracy": 0.6425,
1328
- "count": 400
1329
  }
1330
  }
1331
  },
1332
  "add_S6": {
1333
- "full_accuracy": 0.61,
1334
- "n_examples": 100,
 
1335
  "per_subtask": {
1336
  "SC": {
1337
  "accuracy": 1.0,
1338
- "count": 100
1339
  },
1340
  "UC": {
1341
- "accuracy": 0.74,
1342
- "count": 100
1343
  },
1344
  "US": {
1345
- "accuracy": 0.766,
1346
- "count": 500
1347
  }
1348
  }
1349
  },
1350
  "add_random": {
1351
- "full_accuracy": 0.865,
 
1352
  "n_examples": 200,
1353
  "per_subtask": {
1354
  "SA": {
1355
- "accuracy": 0.9910514541387024,
1356
- "count": 447
1357
  },
1358
  "SC": {
1359
- "accuracy": 0.975,
1360
- "count": 320
1361
  },
1362
  "SS": {
1363
- "accuracy": 0.9821428571428571,
1364
- "count": 56
1365
  },
1366
  "UC": {
1367
- "accuracy": 0.9754253308128544,
1368
- "count": 529
1369
  },
1370
  "US": {
1371
- "accuracy": 0.9583333333333334,
1372
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1373
  }
1374
  }
1375
  },
1376
  "add_C3": {
1377
- "full_accuracy": 0.61,
1378
- "n_examples": 100,
 
1379
  "per_subtask": {
1380
  "SA": {
1381
  "accuracy": 1.0,
1382
- "count": 300
1383
  },
1384
  "SC": {
1385
  "accuracy": 1.0,
1386
- "count": 100
1387
  },
1388
  "UC": {
1389
- "accuracy": 0.8082901554404145,
1390
- "count": 193
1391
  },
1392
  "US": {
1393
- "accuracy": 0.9158878504672897,
1394
- "count": 107
1395
  }
1396
  }
1397
  },
1398
  "add_C4": {
1399
- "full_accuracy": 0.65,
1400
- "n_examples": 100,
 
1401
  "per_subtask": {
1402
  "SA": {
1403
  "accuracy": 1.0,
1404
- "count": 200
1405
  },
1406
  "SC": {
1407
  "accuracy": 1.0,
1408
- "count": 100
1409
  },
1410
  "UC": {
1411
- "accuracy": 0.88671875,
1412
- "count": 256
1413
  },
1414
  "US": {
1415
- "accuracy": 0.8888888888888888,
1416
- "count": 144
1417
  }
1418
  }
1419
  },
1420
  "add_C5": {
1421
- "full_accuracy": 0.51,
1422
- "n_examples": 100,
 
1423
  "per_subtask": {
1424
  "SA": {
1425
  "accuracy": 1.0,
1426
- "count": 100
1427
  },
1428
  "SC": {
1429
  "accuracy": 1.0,
1430
- "count": 100
1431
  },
1432
  "UC": {
1433
- "accuracy": 0.8562091503267973,
1434
- "count": 306
1435
  },
1436
  "US": {
1437
- "accuracy": 0.9278350515463918,
1438
- "count": 194
1439
  }
1440
  }
1441
  },
1442
  "add_C6": {
1443
- "full_accuracy": 0.68,
1444
- "n_examples": 100,
 
1445
  "per_subtask": {
1446
  "SC": {
1447
  "accuracy": 1.0,
1448
- "count": 100
1449
  },
1450
  "UC": {
1451
- "accuracy": 0.9153005464480874,
1452
- "count": 366
1453
  },
1454
  "US": {
1455
- "accuracy": 0.9700854700854701,
1456
- "count": 234
1457
  }
1458
  }
1459
  },
1460
  "sub_M0": {
1461
- "full_accuracy": 0.88,
1462
- "n_examples": 100,
 
1463
  "per_subtask": {
1464
  "MD": {
1465
- "accuracy": 0.9866888519134775,
1466
- "count": 601
1467
  },
1468
  "ME": {
1469
- "accuracy": 0.9595959595959596,
1470
- "count": 99
1471
  }
1472
  }
1473
  },
1474
  "sub_M1": {
1475
- "full_accuracy": 0.78,
1476
- "n_examples": 100,
 
1477
  "per_subtask": {
1478
  "MD": {
1479
- "accuracy": 0.985663082437276,
1480
- "count": 279
1481
  },
1482
  "MB": {
1483
- "accuracy": 0.9586206896551724,
1484
- "count": 145
1485
  },
1486
  "ME": {
1487
- "accuracy": 0.9583333333333334,
1488
- "count": 24
1489
  },
1490
  "UB": {
1491
- "accuracy": 0.9484126984126984,
1492
- "count": 252
1493
  }
1494
  }
1495
  },
1496
  "sub_M2": {
1497
- "full_accuracy": 0.35,
1498
- "n_examples": 100,
 
1499
  "per_subtask": {
1500
  "MD": {
1501
- "accuracy": 0.9765258215962441,
1502
- "count": 213
1503
  },
1504
  "MB": {
1505
- "accuracy": 0.9734513274336283,
1506
- "count": 113
1507
  },
1508
  "ME": {
1509
- "accuracy": 0.9764705882352941,
1510
- "count": 85
1511
  },
1512
  "UB": {
1513
- "accuracy": 0.6629834254143646,
1514
- "count": 181
1515
  },
1516
  "UD": {
1517
- "accuracy": 0.9074074074074074,
1518
- "count": 108
1519
  }
1520
  }
1521
  },
1522
  "sub_M3": {
1523
- "full_accuracy": 0.11,
1524
- "n_examples": 100,
 
1525
  "per_subtask": {
1526
  "MD": {
1527
- "accuracy": 0.994413407821229,
1528
- "count": 179
1529
  },
1530
  "MB": {
1531
- "accuracy": 0.970873786407767,
1532
- "count": 103
1533
  },
1534
  "ME": {
1535
  "accuracy": 1.0,
1536
- "count": 56
1537
  },
1538
  "UB": {
1539
- "accuracy": 0.5167785234899329,
1540
- "count": 149
1541
  },
1542
  "UD": {
1543
- "accuracy": 0.6384976525821596,
1544
- "count": 213
1545
  }
1546
  }
1547
  },
1548
  "sub_M4": {
1549
- "full_accuracy": 0.09,
1550
- "n_examples": 100,
 
1551
  "per_subtask": {
1552
  "MD": {
1553
  "accuracy": 1.0,
1554
- "count": 200
1555
  },
1556
  "MB": {
1557
- "accuracy": 0.99,
1558
- "count": 100
1559
  },
1560
  "UB": {
1561
- "accuracy": 0.34,
1562
- "count": 100
1563
  },
1564
  "UD": {
1565
- "accuracy": 0.4,
1566
- "count": 300
1567
  }
1568
  }
1569
  },
1570
  "sub_M5": {
1571
- "full_accuracy": 0.0,
1572
- "n_examples": 100,
 
1573
  "per_subtask": {
1574
  "MD": {
1575
  "accuracy": 1.0,
1576
- "count": 100
1577
  },
1578
  "MB": {
1579
  "accuracy": 1.0,
1580
- "count": 100
1581
  },
1582
  "UB": {
1583
- "accuracy": 0.24,
1584
- "count": 100
1585
  },
1586
  "UD": {
1587
- "accuracy": 0.22,
1588
- "count": 400
1589
  }
1590
  }
1591
  },
1592
  "sub_random": {
1593
- "full_accuracy": 0.705,
 
1594
  "n_examples": 200,
1595
  "per_subtask": {
1596
  "MD": {
1597
- "accuracy": 0.985,
1598
- "count": 600
1599
  },
1600
  "MB": {
1601
- "accuracy": 0.9662921348314607,
1602
- "count": 267
1603
  },
1604
  "ME": {
1605
  "accuracy": 1.0,
1606
  "count": 53
1607
  },
1608
  "UB": {
1609
- "accuracy": 0.9020501138952164,
1610
- "count": 439
1611
  },
1612
  "UD": {
1613
- "accuracy": 0.975609756097561,
1614
- "count": 41
1615
  }
1616
  }
1617
  },
1618
  "sub_B3": {
1619
- "full_accuracy": 0.38,
1620
- "n_examples": 100,
 
1621
  "per_subtask": {
1622
  "MD": {
1623
- "accuracy": 0.99,
1624
- "count": 300
1625
  },
1626
  "MB": {
1627
  "accuracy": 1.0,
1628
- "count": 100
1629
  },
1630
  "UB": {
1631
- "accuracy": 0.7055837563451777,
1632
- "count": 197
1633
  },
1634
  "UD": {
1635
- "accuracy": 0.6893203883495146,
1636
- "count": 103
1637
  }
1638
  }
1639
  },
1640
  "sub_B4": {
1641
- "full_accuracy": 0.27,
1642
- "n_examples": 100,
 
1643
  "per_subtask": {
1644
  "MD": {
1645
  "accuracy": 1.0,
1646
- "count": 200
1647
  },
1648
  "MB": {
1649
- "accuracy": 0.99,
1650
- "count": 100
1651
  },
1652
  "UB": {
1653
- "accuracy": 0.6882591093117408,
1654
- "count": 247
1655
  },
1656
  "UD": {
1657
- "accuracy": 0.6405228758169934,
1658
- "count": 153
1659
  }
1660
  }
1661
  },
1662
  "sub_B5": {
1663
- "full_accuracy": 0.2,
1664
- "n_examples": 100,
 
1665
  "per_subtask": {
1666
  "MD": {
1667
  "accuracy": 1.0,
1668
- "count": 100
1669
  },
1670
  "MB": {
1671
  "accuracy": 1.0,
1672
- "count": 100
1673
  },
1674
  "UB": {
1675
- "accuracy": 0.7248322147651006,
1676
- "count": 298
1677
  },
1678
  "UD": {
1679
- "accuracy": 0.6287128712871287,
1680
- "count": 202
1681
  }
1682
  }
1683
  }
1684
  },
1685
  "summary": {
1686
- "overall_accuracy": 0.5416666666666666,
1687
- "total_examples": 2400,
1688
- "n_splits": 22
 
1689
  }
1690
  },
1691
  "sorl_eval": {
@@ -1694,502 +1759,567 @@
1694
  "K": 1,
1695
  "mode": "sorl",
1696
  "n_digits": 6,
1697
- "n_per_split": 100
1698
  },
1699
  "splits": {
1700
  "add_S0": {
1701
- "full_accuracy": 0.96,
1702
- "n_examples": 100,
 
1703
  "per_subtask": {
1704
  "SA": {
1705
- "accuracy": 0.9933884297520661,
1706
- "count": 605
1707
  },
1708
  "SS": {
1709
  "accuracy": 1.0,
1710
- "count": 95
1711
  }
1712
  }
1713
  },
1714
  "add_S1": {
1715
  "full_accuracy": 0.9,
1716
- "n_examples": 100,
 
1717
  "per_subtask": {
1718
  "SA": {
1719
- "accuracy": 0.9852941176470589,
1720
- "count": 204
1721
  },
1722
  "SC": {
1723
- "accuracy": 0.9881656804733728,
1724
- "count": 169
1725
  },
1726
  "SS": {
1727
  "accuracy": 1.0,
1728
- "count": 31
1729
  },
1730
  "UC": {
1731
- "accuracy": 0.9831081081081081,
1732
- "count": 296
1733
  }
1734
  }
1735
  },
1736
  "add_S2": {
1737
- "full_accuracy": 0.69,
1738
- "n_examples": 100,
 
1739
  "per_subtask": {
1740
  "SA": {
1741
- "accuracy": 0.9877300613496932,
1742
- "count": 163
1743
  },
1744
  "SC": {
1745
- "accuracy": 0.9384615384615385,
1746
- "count": 130
1747
  },
1748
  "SS": {
1749
- "accuracy": 0.9310344827586207,
1750
- "count": 87
1751
  },
1752
  "UC": {
1753
- "accuracy": 0.9064039408866995,
1754
- "count": 203
1755
  },
1756
  "US": {
1757
- "accuracy": 0.9914529914529915,
1758
- "count": 117
1759
  }
1760
  }
1761
  },
1762
  "add_S3": {
1763
- "full_accuracy": 0.57,
1764
- "n_examples": 100,
 
1765
  "per_subtask": {
1766
  "SA": {
1767
- "accuracy": 1.0,
1768
- "count": 121
1769
  },
1770
  "SC": {
1771
- "accuracy": 0.9752066115702479,
1772
- "count": 121
1773
  },
1774
  "SS": {
1775
- "accuracy": 0.9795918367346939,
1776
- "count": 49
1777
  },
1778
  "UC": {
1779
- "accuracy": 0.8279569892473119,
1780
- "count": 186
1781
  },
1782
  "US": {
1783
- "accuracy": 0.8923766816143498,
1784
- "count": 223
1785
  }
1786
  }
1787
  },
1788
  "add_S4": {
1789
- "full_accuracy": 0.58,
1790
- "n_examples": 100,
 
1791
  "per_subtask": {
1792
  "SA": {
1793
  "accuracy": 1.0,
1794
- "count": 104
1795
  },
1796
  "SC": {
1797
- "accuracy": 0.9905660377358491,
1798
- "count": 106
1799
  },
1800
  "SS": {
1801
  "accuracy": 1.0,
1802
- "count": 23
1803
  },
1804
  "UC": {
1805
- "accuracy": 0.825,
1806
- "count": 160
1807
  },
1808
  "US": {
1809
- "accuracy": 0.8306188925081434,
1810
- "count": 307
1811
  }
1812
  }
1813
  },
1814
  "add_S5": {
1815
- "full_accuracy": 0.37,
1816
- "n_examples": 100,
 
1817
  "per_subtask": {
1818
  "SA": {
1819
  "accuracy": 1.0,
1820
- "count": 100
1821
  },
1822
  "SC": {
1823
  "accuracy": 1.0,
1824
- "count": 100
1825
  },
1826
  "UC": {
1827
- "accuracy": 0.54,
1828
- "count": 100
1829
  },
1830
  "US": {
1831
- "accuracy": 0.6425,
1832
- "count": 400
1833
  }
1834
  }
1835
  },
1836
  "add_S6": {
1837
- "full_accuracy": 0.61,
1838
- "n_examples": 100,
 
1839
  "per_subtask": {
1840
  "SC": {
1841
  "accuracy": 1.0,
1842
- "count": 100
1843
  },
1844
  "UC": {
1845
- "accuracy": 0.69,
1846
- "count": 100
1847
  },
1848
  "US": {
1849
- "accuracy": 0.77,
1850
- "count": 500
1851
  }
1852
  }
1853
  },
1854
  "add_random": {
1855
- "full_accuracy": 0.925,
 
1856
  "n_examples": 200,
1857
  "per_subtask": {
1858
  "SA": {
1859
- "accuracy": 0.9932885906040269,
1860
- "count": 447
1861
  },
1862
  "SC": {
1863
- "accuracy": 0.9875,
1864
- "count": 320
1865
  },
1866
  "SS": {
1867
- "accuracy": 0.9821428571428571,
1868
- "count": 56
1869
  },
1870
  "UC": {
1871
- "accuracy": 0.9848771266540642,
1872
- "count": 529
1873
  },
1874
  "US": {
 
 
 
 
 
 
 
 
 
 
 
1875
  "accuracy": 1.0,
1876
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1877
  }
1878
  }
1879
  },
1880
  "add_C3": {
1881
- "full_accuracy": 0.62,
1882
- "n_examples": 100,
 
1883
  "per_subtask": {
1884
  "SA": {
1885
  "accuracy": 1.0,
1886
- "count": 300
1887
  },
1888
  "SC": {
1889
  "accuracy": 1.0,
1890
- "count": 100
1891
  },
1892
  "UC": {
1893
- "accuracy": 0.8186528497409327,
1894
- "count": 193
1895
  },
1896
  "US": {
1897
- "accuracy": 0.8785046728971962,
1898
- "count": 107
1899
  }
1900
  }
1901
  },
1902
  "add_C4": {
1903
- "full_accuracy": 0.61,
1904
- "n_examples": 100,
 
1905
  "per_subtask": {
1906
  "SA": {
1907
  "accuracy": 1.0,
1908
- "count": 200
1909
  },
1910
  "SC": {
1911
  "accuracy": 1.0,
1912
- "count": 100
1913
  },
1914
  "UC": {
1915
- "accuracy": 0.87890625,
1916
- "count": 256
1917
  },
1918
  "US": {
1919
- "accuracy": 0.8611111111111112,
1920
- "count": 144
1921
  }
1922
  }
1923
  },
1924
  "add_C5": {
1925
- "full_accuracy": 0.59,
1926
- "n_examples": 100,
 
1927
  "per_subtask": {
1928
  "SA": {
1929
  "accuracy": 1.0,
1930
- "count": 100
1931
  },
1932
  "SC": {
1933
  "accuracy": 1.0,
1934
- "count": 100
1935
  },
1936
  "UC": {
1937
- "accuracy": 0.8823529411764706,
1938
- "count": 306
1939
  },
1940
  "US": {
1941
- "accuracy": 0.8969072164948454,
1942
- "count": 194
1943
  }
1944
  }
1945
  },
1946
  "add_C6": {
1947
- "full_accuracy": 0.69,
1948
- "n_examples": 100,
 
1949
  "per_subtask": {
1950
  "SC": {
1951
  "accuracy": 1.0,
1952
- "count": 100
1953
  },
1954
  "UC": {
1955
- "accuracy": 0.912568306010929,
1956
- "count": 366
1957
  },
1958
  "US": {
1959
- "accuracy": 0.9444444444444444,
1960
- "count": 234
1961
  }
1962
  }
1963
  },
1964
  "sub_M0": {
1965
- "full_accuracy": 0.89,
1966
- "n_examples": 100,
 
1967
  "per_subtask": {
1968
  "MD": {
1969
- "accuracy": 0.9850249584026622,
1970
- "count": 601
1971
  },
1972
  "ME": {
1973
- "accuracy": 0.9696969696969697,
1974
- "count": 99
1975
  }
1976
  }
1977
  },
1978
  "sub_M1": {
1979
  "full_accuracy": 0.82,
1980
- "n_examples": 100,
 
1981
  "per_subtask": {
1982
  "MD": {
1983
- "accuracy": 0.982078853046595,
1984
- "count": 279
1985
  },
1986
  "MB": {
1987
- "accuracy": 0.9793103448275862,
1988
- "count": 145
1989
  },
1990
  "ME": {
1991
- "accuracy": 0.9583333333333334,
1992
- "count": 24
1993
  },
1994
  "UB": {
1995
- "accuracy": 0.9603174603174603,
1996
- "count": 252
1997
  }
1998
  }
1999
  },
2000
  "sub_M2": {
2001
- "full_accuracy": 0.33,
2002
- "n_examples": 100,
 
2003
  "per_subtask": {
2004
  "MD": {
2005
- "accuracy": 0.971830985915493,
2006
- "count": 213
2007
  },
2008
  "MB": {
2009
- "accuracy": 0.9380530973451328,
2010
- "count": 113
2011
  },
2012
  "ME": {
2013
- "accuracy": 0.9764705882352941,
2014
- "count": 85
2015
  },
2016
  "UB": {
2017
- "accuracy": 0.6464088397790055,
2018
- "count": 181
2019
  },
2020
  "UD": {
2021
- "accuracy": 0.9444444444444444,
2022
- "count": 108
2023
  }
2024
  }
2025
  },
2026
  "sub_M3": {
2027
  "full_accuracy": 0.22,
2028
- "n_examples": 100,
 
2029
  "per_subtask": {
2030
  "MD": {
2031
- "accuracy": 0.9832402234636871,
2032
- "count": 179
2033
  },
2034
  "MB": {
2035
- "accuracy": 0.9805825242718447,
2036
- "count": 103
2037
  },
2038
  "ME": {
2039
- "accuracy": 1.0,
2040
- "count": 56
2041
  },
2042
  "UB": {
2043
- "accuracy": 0.5704697986577181,
2044
- "count": 149
2045
  },
2046
  "UD": {
2047
- "accuracy": 0.7136150234741784,
2048
- "count": 213
2049
  }
2050
  }
2051
  },
2052
  "sub_M4": {
2053
- "full_accuracy": 0.2,
2054
- "n_examples": 100,
 
2055
  "per_subtask": {
2056
  "MD": {
2057
- "accuracy": 0.99,
2058
- "count": 200
2059
  },
2060
  "MB": {
2061
  "accuracy": 1.0,
2062
- "count": 100
2063
  },
2064
  "UB": {
2065
- "accuracy": 0.49,
2066
- "count": 100
2067
  },
2068
  "UD": {
2069
- "accuracy": 0.49,
2070
- "count": 300
2071
  }
2072
  }
2073
  },
2074
  "sub_M5": {
2075
- "full_accuracy": 0.1,
2076
- "n_examples": 100,
 
2077
  "per_subtask": {
2078
  "MD": {
2079
  "accuracy": 1.0,
2080
- "count": 100
2081
  },
2082
  "MB": {
2083
  "accuracy": 1.0,
2084
- "count": 100
2085
  },
2086
  "UB": {
2087
- "accuracy": 0.4,
2088
- "count": 100
2089
  },
2090
  "UD": {
2091
- "accuracy": 0.33,
2092
- "count": 400
2093
  }
2094
  }
2095
  },
2096
  "sub_random": {
2097
- "full_accuracy": 0.735,
 
2098
  "n_examples": 200,
2099
  "per_subtask": {
2100
  "MD": {
2101
- "accuracy": 0.9766666666666667,
2102
- "count": 600
2103
  },
2104
  "MB": {
2105
- "accuracy": 0.9588014981273408,
2106
- "count": 267
2107
  },
2108
  "ME": {
2109
  "accuracy": 1.0,
2110
  "count": 53
2111
  },
2112
  "UB": {
2113
- "accuracy": 0.9225512528473804,
2114
- "count": 439
2115
  },
2116
  "UD": {
2117
- "accuracy": 0.975609756097561,
2118
- "count": 41
2119
  }
2120
  }
2121
  },
2122
  "sub_B3": {
2123
- "full_accuracy": 0.44,
2124
- "n_examples": 100,
 
2125
  "per_subtask": {
2126
  "MD": {
2127
- "accuracy": 0.98,
2128
- "count": 300
2129
  },
2130
  "MB": {
2131
- "accuracy": 0.99,
2132
- "count": 100
2133
  },
2134
  "UB": {
2135
- "accuracy": 0.7360406091370558,
2136
- "count": 197
2137
  },
2138
  "UD": {
2139
- "accuracy": 0.7766990291262136,
2140
- "count": 103
2141
  }
2142
  }
2143
  },
2144
  "sub_B4": {
2145
- "full_accuracy": 0.39,
2146
- "n_examples": 100,
 
2147
  "per_subtask": {
2148
  "MD": {
2149
  "accuracy": 1.0,
2150
- "count": 200
2151
  },
2152
  "MB": {
2153
- "accuracy": 0.99,
2154
- "count": 100
2155
  },
2156
  "UB": {
2157
- "accuracy": 0.7530364372469636,
2158
- "count": 247
2159
  },
2160
  "UD": {
2161
- "accuracy": 0.7058823529411765,
2162
- "count": 153
2163
  }
2164
  }
2165
  },
2166
  "sub_B5": {
2167
- "full_accuracy": 0.27,
2168
- "n_examples": 100,
 
2169
  "per_subtask": {
2170
  "MD": {
2171
  "accuracy": 1.0,
2172
- "count": 100
2173
  },
2174
  "MB": {
2175
  "accuracy": 1.0,
2176
- "count": 100
2177
  },
2178
  "UB": {
2179
- "accuracy": 0.7583892617449665,
2180
- "count": 298
2181
  },
2182
  "UD": {
2183
- "accuracy": 0.7128712871287128,
2184
- "count": 202
2185
  }
2186
  }
2187
  }
2188
  },
2189
  "summary": {
2190
- "overall_accuracy": 0.5895833333333333,
2191
- "total_examples": 2400,
2192
- "n_splits": 22
 
2193
  }
2194
  },
2195
  "sorl_overall_accuracy": 0.5895833333333333,
 
1190
  "K": null,
1191
  "mode": "sft",
1192
  "n_digits": 6,
1193
+ "n_per_split": 50
1194
  },
1195
  "splits": {
1196
  "add_S0": {
1197
+ "full_accuracy": 0.88,
1198
+ "digit_accuracy": 0.9828571428571429,
1199
+ "n_examples": 50,
1200
  "per_subtask": {
1201
  "SA": {
1202
+ "accuracy": 0.9796610169491525,
1203
+ "count": 295
1204
  },
1205
  "SS": {
1206
  "accuracy": 1.0,
1207
+ "count": 55
1208
  }
1209
  }
1210
  },
1211
  "add_S1": {
1212
+ "full_accuracy": 0.82,
1213
+ "digit_accuracy": 0.9714285714285714,
1214
+ "n_examples": 50,
1215
  "per_subtask": {
1216
  "SA": {
1217
+ "accuracy": 0.9920634920634921,
1218
+ "count": 126
1219
  },
1220
  "SC": {
1221
+ "accuracy": 0.9873417721518988,
1222
+ "count": 79
1223
  },
1224
  "SS": {
1225
+ "accuracy": 0.9523809523809523,
1226
+ "count": 21
1227
  },
1228
  "UC": {
1229
+ "accuracy": 0.9435483870967742,
1230
+ "count": 124
1231
  }
1232
  }
1233
  },
1234
  "add_S2": {
1235
+ "full_accuracy": 0.66,
1236
+ "digit_accuracy": 0.9371428571428572,
1237
+ "n_examples": 50,
1238
  "per_subtask": {
1239
  "SA": {
1240
+ "accuracy": 0.9733333333333334,
1241
+ "count": 75
1242
  },
1243
  "SC": {
1244
+ "accuracy": 0.9516129032258065,
1245
+ "count": 62
1246
  },
1247
  "SS": {
1248
+ "accuracy": 0.9230769230769231,
1249
+ "count": 39
1250
  },
1251
  "UC": {
1252
+ "accuracy": 0.8828828828828829,
1253
+ "count": 111
1254
  },
1255
  "US": {
1256
+ "accuracy": 0.9841269841269841,
1257
+ "count": 63
1258
  }
1259
  }
1260
  },
1261
  "add_S3": {
1262
+ "full_accuracy": 0.58,
1263
+ "digit_accuracy": 0.9285714285714286,
1264
+ "n_examples": 50,
1265
  "per_subtask": {
1266
  "SA": {
1267
+ "accuracy": 1.0,
1268
+ "count": 60
1269
  },
1270
  "SC": {
1271
+ "accuracy": 0.9649122807017544,
1272
+ "count": 57
1273
  },
1274
  "SS": {
1275
  "accuracy": 1.0,
1276
+ "count": 19
1277
  },
1278
  "UC": {
1279
+ "accuracy": 0.8076923076923077,
1280
+ "count": 104
1281
  },
1282
  "US": {
1283
+ "accuracy": 0.9727272727272728,
1284
+ "count": 110
1285
  }
1286
  }
1287
  },
1288
  "add_S4": {
1289
+ "full_accuracy": 0.46,
1290
+ "digit_accuracy": 0.8628571428571429,
1291
+ "n_examples": 50,
1292
  "per_subtask": {
1293
  "SA": {
1294
  "accuracy": 1.0,
1295
+ "count": 48
1296
  },
1297
  "SC": {
1298
+ "accuracy": 1.0,
1299
+ "count": 52
1300
  },
1301
  "SS": {
1302
  "accuracy": 1.0,
1303
+ "count": 7
1304
  },
1305
  "UC": {
1306
+ "accuracy": 0.7528089887640449,
1307
+ "count": 89
1308
  },
1309
  "US": {
1310
+ "accuracy": 0.8311688311688312,
1311
+ "count": 154
1312
  }
1313
  }
1314
  },
1315
  "add_S5": {
1316
+ "full_accuracy": 0.28,
1317
+ "digit_accuracy": 0.6857142857142857,
1318
+ "n_examples": 50,
1319
  "per_subtask": {
1320
  "SA": {
1321
  "accuracy": 1.0,
1322
+ "count": 50
1323
  },
1324
  "SC": {
1325
  "accuracy": 1.0,
1326
+ "count": 50
1327
  },
1328
  "UC": {
1329
+ "accuracy": 0.56,
1330
+ "count": 50
1331
  },
1332
  "US": {
1333
+ "accuracy": 0.56,
1334
+ "count": 200
1335
  }
1336
  }
1337
  },
1338
  "add_S6": {
1339
+ "full_accuracy": 0.64,
1340
+ "digit_accuracy": 0.7914285714285715,
1341
+ "n_examples": 50,
1342
  "per_subtask": {
1343
  "SC": {
1344
  "accuracy": 1.0,
1345
+ "count": 50
1346
  },
1347
  "UC": {
1348
+ "accuracy": 0.76,
1349
+ "count": 50
1350
  },
1351
  "US": {
1352
+ "accuracy": 0.756,
1353
+ "count": 250
1354
  }
1355
  }
1356
  },
1357
  "add_random": {
1358
+ "full_accuracy": 0.85,
1359
+ "digit_accuracy": 0.9764285714285714,
1360
  "n_examples": 200,
1361
  "per_subtask": {
1362
  "SA": {
1363
+ "accuracy": 0.988399071925754,
1364
+ "count": 431
1365
  },
1366
  "SC": {
1367
+ "accuracy": 0.9810126582278481,
1368
+ "count": 316
1369
  },
1370
  "SS": {
1371
+ "accuracy": 0.9743589743589743,
1372
+ "count": 39
1373
  },
1374
  "UC": {
1375
+ "accuracy": 0.9625,
1376
+ "count": 560
1377
  },
1378
  "US": {
1379
+ "accuracy": 1.0,
1380
+ "count": 54
1381
+ }
1382
+ }
1383
+ },
1384
+ "add_C1": {
1385
+ "full_accuracy": 0.84,
1386
+ "digit_accuracy": 0.9771428571428571,
1387
+ "n_examples": 50,
1388
+ "per_subtask": {
1389
+ "SA": {
1390
+ "accuracy": 1.0,
1391
+ "count": 250
1392
+ },
1393
+ "SC": {
1394
+ "accuracy": 1.0,
1395
+ "count": 50
1396
+ },
1397
+ "UC": {
1398
+ "accuracy": 0.84,
1399
+ "count": 50
1400
+ }
1401
+ }
1402
+ },
1403
+ "add_C2": {
1404
+ "full_accuracy": 0.84,
1405
+ "digit_accuracy": 0.9657142857142857,
1406
+ "n_examples": 50,
1407
+ "per_subtask": {
1408
+ "SA": {
1409
+ "accuracy": 1.0,
1410
+ "count": 200
1411
+ },
1412
+ "SC": {
1413
+ "accuracy": 1.0,
1414
+ "count": 50
1415
+ },
1416
+ "UC": {
1417
+ "accuracy": 0.9156626506024096,
1418
+ "count": 83
1419
+ },
1420
+ "US": {
1421
+ "accuracy": 0.7058823529411765,
1422
+ "count": 17
1423
  }
1424
  }
1425
  },
1426
  "add_C3": {
1427
+ "full_accuracy": 0.56,
1428
+ "digit_accuracy": 0.9257142857142857,
1429
+ "n_examples": 50,
1430
  "per_subtask": {
1431
  "SA": {
1432
  "accuracy": 1.0,
1433
+ "count": 150
1434
  },
1435
  "SC": {
1436
  "accuracy": 1.0,
1437
+ "count": 50
1438
  },
1439
  "UC": {
1440
+ "accuracy": 0.85,
1441
+ "count": 100
1442
  },
1443
  "US": {
1444
+ "accuracy": 0.78,
1445
+ "count": 50
1446
  }
1447
  }
1448
  },
1449
  "add_C4": {
1450
+ "full_accuracy": 0.7,
1451
+ "digit_accuracy": 0.9514285714285714,
1452
+ "n_examples": 50,
1453
  "per_subtask": {
1454
  "SA": {
1455
  "accuracy": 1.0,
1456
+ "count": 100
1457
  },
1458
  "SC": {
1459
  "accuracy": 1.0,
1460
+ "count": 50
1461
  },
1462
  "UC": {
1463
+ "accuracy": 0.9090909090909091,
1464
+ "count": 132
1465
  },
1466
  "US": {
1467
+ "accuracy": 0.9264705882352942,
1468
+ "count": 68
1469
  }
1470
  }
1471
  },
1472
  "add_C5": {
1473
+ "full_accuracy": 0.6,
1474
+ "digit_accuracy": 0.9228571428571428,
1475
+ "n_examples": 50,
1476
  "per_subtask": {
1477
  "SA": {
1478
  "accuracy": 1.0,
1479
+ "count": 50
1480
  },
1481
  "SC": {
1482
  "accuracy": 1.0,
1483
+ "count": 50
1484
  },
1485
  "UC": {
1486
+ "accuracy": 0.9041095890410958,
1487
+ "count": 146
1488
  },
1489
  "US": {
1490
+ "accuracy": 0.875,
1491
+ "count": 104
1492
  }
1493
  }
1494
  },
1495
  "add_C6": {
1496
+ "full_accuracy": 0.56,
1497
+ "digit_accuracy": 0.9142857142857143,
1498
+ "n_examples": 50,
1499
  "per_subtask": {
1500
  "SC": {
1501
  "accuracy": 1.0,
1502
+ "count": 50
1503
  },
1504
  "UC": {
1505
+ "accuracy": 0.8994708994708994,
1506
+ "count": 189
1507
  },
1508
  "US": {
1509
+ "accuracy": 0.9009009009009009,
1510
+ "count": 111
1511
  }
1512
  }
1513
  },
1514
  "sub_M0": {
1515
+ "full_accuracy": 0.86,
1516
+ "digit_accuracy": 0.98,
1517
+ "n_examples": 50,
1518
  "per_subtask": {
1519
  "MD": {
1520
+ "accuracy": 0.976897689768977,
1521
+ "count": 303
1522
  },
1523
  "ME": {
1524
+ "accuracy": 1.0,
1525
+ "count": 47
1526
  }
1527
  }
1528
  },
1529
  "sub_M1": {
1530
+ "full_accuracy": 0.72,
1531
+ "digit_accuracy": 0.9571428571428572,
1532
+ "n_examples": 50,
1533
  "per_subtask": {
1534
  "MD": {
1535
+ "accuracy": 1.0,
1536
+ "count": 141
1537
  },
1538
  "MB": {
1539
+ "accuracy": 0.9722222222222222,
1540
+ "count": 72
1541
  },
1542
  "ME": {
1543
+ "accuracy": 0.8888888888888888,
1544
+ "count": 18
1545
  },
1546
  "UB": {
1547
+ "accuracy": 0.907563025210084,
1548
+ "count": 119
1549
  }
1550
  }
1551
  },
1552
  "sub_M2": {
1553
+ "full_accuracy": 0.3,
1554
+ "digit_accuracy": 0.8714285714285714,
1555
+ "n_examples": 50,
1556
  "per_subtask": {
1557
  "MD": {
1558
+ "accuracy": 0.9732142857142857,
1559
+ "count": 112
1560
  },
1561
  "MB": {
1562
+ "accuracy": 0.9056603773584906,
1563
+ "count": 53
1564
  },
1565
  "ME": {
1566
+ "accuracy": 0.9574468085106383,
1567
+ "count": 47
1568
  },
1569
  "UB": {
1570
+ "accuracy": 0.6470588235294118,
1571
+ "count": 85
1572
  },
1573
  "UD": {
1574
+ "accuracy": 0.9056603773584906,
1575
+ "count": 53
1576
  }
1577
  }
1578
  },
1579
  "sub_M3": {
1580
+ "full_accuracy": 0.2,
1581
+ "digit_accuracy": 0.8,
1582
+ "n_examples": 50,
1583
  "per_subtask": {
1584
  "MD": {
1585
+ "accuracy": 0.9896907216494846,
1586
+ "count": 97
1587
  },
1588
  "MB": {
1589
+ "accuracy": 0.9607843137254902,
1590
+ "count": 51
1591
  },
1592
  "ME": {
1593
  "accuracy": 1.0,
1594
+ "count": 27
1595
  },
1596
  "UB": {
1597
+ "accuracy": 0.5405405405405406,
1598
+ "count": 74
1599
  },
1600
  "UD": {
1601
+ "accuracy": 0.6732673267326733,
1602
+ "count": 101
1603
  }
1604
  }
1605
  },
1606
  "sub_M4": {
1607
+ "full_accuracy": 0.0,
1608
+ "digit_accuracy": 0.6428571428571429,
1609
+ "n_examples": 50,
1610
  "per_subtask": {
1611
  "MD": {
1612
  "accuracy": 1.0,
1613
+ "count": 100
1614
  },
1615
  "MB": {
1616
+ "accuracy": 1.0,
1617
+ "count": 50
1618
  },
1619
  "UB": {
1620
+ "accuracy": 0.26,
1621
+ "count": 50
1622
  },
1623
  "UD": {
1624
+ "accuracy": 0.41333333333333333,
1625
+ "count": 150
1626
  }
1627
  }
1628
  },
1629
  "sub_M5": {
1630
+ "full_accuracy": 0.02,
1631
+ "digit_accuracy": 0.44285714285714284,
1632
+ "n_examples": 50,
1633
  "per_subtask": {
1634
  "MD": {
1635
  "accuracy": 1.0,
1636
+ "count": 50
1637
  },
1638
  "MB": {
1639
  "accuracy": 1.0,
1640
+ "count": 50
1641
  },
1642
  "UB": {
1643
+ "accuracy": 0.32,
1644
+ "count": 50
1645
  },
1646
  "UD": {
1647
+ "accuracy": 0.195,
1648
+ "count": 200
1649
  }
1650
  }
1651
  },
1652
  "sub_random": {
1653
+ "full_accuracy": 0.765,
1654
+ "digit_accuracy": 0.9628571428571429,
1655
  "n_examples": 200,
1656
  "per_subtask": {
1657
  "MD": {
1658
+ "accuracy": 0.9894736842105263,
1659
+ "count": 570
1660
  },
1661
  "MB": {
1662
+ "accuracy": 0.9566787003610109,
1663
+ "count": 277
1664
  },
1665
  "ME": {
1666
  "accuracy": 1.0,
1667
  "count": 53
1668
  },
1669
  "UB": {
1670
+ "accuracy": 0.9299363057324841,
1671
+ "count": 471
1672
  },
1673
  "UD": {
1674
+ "accuracy": 0.9655172413793104,
1675
+ "count": 29
1676
  }
1677
  }
1678
  },
1679
  "sub_B3": {
1680
+ "full_accuracy": 0.48,
1681
+ "digit_accuracy": 0.8971428571428571,
1682
+ "n_examples": 50,
1683
  "per_subtask": {
1684
  "MD": {
1685
+ "accuracy": 1.0,
1686
+ "count": 150
1687
  },
1688
  "MB": {
1689
  "accuracy": 1.0,
1690
+ "count": 50
1691
  },
1692
  "UB": {
1693
+ "accuracy": 0.7623762376237624,
1694
+ "count": 101
1695
  },
1696
  "UD": {
1697
+ "accuracy": 0.7551020408163265,
1698
+ "count": 49
1699
  }
1700
  }
1701
  },
1702
  "sub_B4": {
1703
+ "full_accuracy": 0.24,
1704
+ "digit_accuracy": 0.82,
1705
+ "n_examples": 50,
1706
  "per_subtask": {
1707
  "MD": {
1708
  "accuracy": 1.0,
1709
+ "count": 100
1710
  },
1711
  "MB": {
1712
+ "accuracy": 1.0,
1713
+ "count": 50
1714
  },
1715
  "UB": {
1716
+ "accuracy": 0.6942148760330579,
1717
+ "count": 121
1718
  },
1719
  "UD": {
1720
+ "accuracy": 0.6708860759493671,
1721
+ "count": 79
1722
  }
1723
  }
1724
  },
1725
  "sub_B5": {
1726
+ "full_accuracy": 0.14,
1727
+ "digit_accuracy": 0.7714285714285715,
1728
+ "n_examples": 50,
1729
  "per_subtask": {
1730
  "MD": {
1731
  "accuracy": 1.0,
1732
+ "count": 50
1733
  },
1734
  "MB": {
1735
  "accuracy": 1.0,
1736
+ "count": 50
1737
  },
1738
  "UB": {
1739
+ "accuracy": 0.6907894736842105,
1740
+ "count": 152
1741
  },
1742
  "UD": {
1743
+ "accuracy": 0.6632653061224489,
1744
+ "count": 98
1745
  }
1746
  }
1747
  }
1748
  },
1749
  "summary": {
1750
+ "overall_accuracy": 0.594,
1751
+ "digit_accuracy": 0.8919047619047619,
1752
+ "total_examples": 1500,
1753
+ "n_splits": 24
1754
  }
1755
  },
1756
  "sorl_eval": {
 
1759
  "K": 1,
1760
  "mode": "sorl",
1761
  "n_digits": 6,
1762
+ "n_per_split": 50
1763
  },
1764
  "splits": {
1765
  "add_S0": {
1766
+ "full_accuracy": 0.94,
1767
+ "digit_accuracy": 0.9914285714285714,
1768
+ "n_examples": 50,
1769
  "per_subtask": {
1770
  "SA": {
1771
+ "accuracy": 0.9898305084745763,
1772
+ "count": 295
1773
  },
1774
  "SS": {
1775
  "accuracy": 1.0,
1776
+ "count": 55
1777
  }
1778
  }
1779
  },
1780
  "add_S1": {
1781
  "full_accuracy": 0.9,
1782
+ "digit_accuracy": 0.9857142857142858,
1783
+ "n_examples": 50,
1784
  "per_subtask": {
1785
  "SA": {
1786
+ "accuracy": 0.9920634920634921,
1787
+ "count": 126
1788
  },
1789
  "SC": {
1790
+ "accuracy": 0.9620253164556962,
1791
+ "count": 79
1792
  },
1793
  "SS": {
1794
  "accuracy": 1.0,
1795
+ "count": 21
1796
  },
1797
  "UC": {
1798
+ "accuracy": 0.9919354838709677,
1799
+ "count": 124
1800
  }
1801
  }
1802
  },
1803
  "add_S2": {
1804
+ "full_accuracy": 0.7,
1805
+ "digit_accuracy": 0.9457142857142857,
1806
+ "n_examples": 50,
1807
  "per_subtask": {
1808
  "SA": {
1809
+ "accuracy": 0.9866666666666667,
1810
+ "count": 75
1811
  },
1812
  "SC": {
1813
+ "accuracy": 0.9193548387096774,
1814
+ "count": 62
1815
  },
1816
  "SS": {
1817
+ "accuracy": 0.8974358974358975,
1818
+ "count": 39
1819
  },
1820
  "UC": {
1821
+ "accuracy": 0.918918918918919,
1822
+ "count": 111
1823
  },
1824
  "US": {
1825
+ "accuracy": 1.0,
1826
+ "count": 63
1827
  }
1828
  }
1829
  },
1830
  "add_S3": {
1831
+ "full_accuracy": 0.6,
1832
+ "digit_accuracy": 0.9285714285714286,
1833
+ "n_examples": 50,
1834
  "per_subtask": {
1835
  "SA": {
1836
+ "accuracy": 0.9833333333333333,
1837
+ "count": 60
1838
  },
1839
  "SC": {
1840
+ "accuracy": 0.9824561403508771,
1841
+ "count": 57
1842
  },
1843
  "SS": {
1844
+ "accuracy": 0.9473684210526315,
1845
+ "count": 19
1846
  },
1847
  "UC": {
1848
+ "accuracy": 0.8269230769230769,
1849
+ "count": 104
1850
  },
1851
  "US": {
1852
+ "accuracy": 0.9636363636363636,
1853
+ "count": 110
1854
  }
1855
  }
1856
  },
1857
  "add_S4": {
1858
+ "full_accuracy": 0.68,
1859
+ "digit_accuracy": 0.8971428571428571,
1860
+ "n_examples": 50,
1861
  "per_subtask": {
1862
  "SA": {
1863
  "accuracy": 1.0,
1864
+ "count": 48
1865
  },
1866
  "SC": {
1867
+ "accuracy": 1.0,
1868
+ "count": 52
1869
  },
1870
  "SS": {
1871
  "accuracy": 1.0,
1872
+ "count": 7
1873
  },
1874
  "UC": {
1875
+ "accuracy": 0.8651685393258427,
1876
+ "count": 89
1877
  },
1878
  "US": {
1879
+ "accuracy": 0.8441558441558441,
1880
+ "count": 154
1881
  }
1882
  }
1883
  },
1884
  "add_S5": {
1885
+ "full_accuracy": 0.4,
1886
+ "digit_accuracy": 0.7514285714285714,
1887
+ "n_examples": 50,
1888
  "per_subtask": {
1889
  "SA": {
1890
  "accuracy": 1.0,
1891
+ "count": 50
1892
  },
1893
  "SC": {
1894
  "accuracy": 1.0,
1895
+ "count": 50
1896
  },
1897
  "UC": {
1898
+ "accuracy": 0.62,
1899
+ "count": 50
1900
  },
1901
  "US": {
1902
+ "accuracy": 0.66,
1903
+ "count": 200
1904
  }
1905
  }
1906
  },
1907
  "add_S6": {
1908
+ "full_accuracy": 0.48,
1909
+ "digit_accuracy": 0.76,
1910
+ "n_examples": 50,
1911
  "per_subtask": {
1912
  "SC": {
1913
  "accuracy": 1.0,
1914
+ "count": 50
1915
  },
1916
  "UC": {
1917
+ "accuracy": 0.64,
1918
+ "count": 50
1919
  },
1920
  "US": {
1921
+ "accuracy": 0.736,
1922
+ "count": 250
1923
  }
1924
  }
1925
  },
1926
  "add_random": {
1927
+ "full_accuracy": 0.9,
1928
+ "digit_accuracy": 0.9842857142857143,
1929
  "n_examples": 200,
1930
  "per_subtask": {
1931
  "SA": {
1932
+ "accuracy": 0.9953596287703016,
1933
+ "count": 431
1934
  },
1935
  "SC": {
1936
+ "accuracy": 0.990506329113924,
1937
+ "count": 316
1938
  },
1939
  "SS": {
1940
+ "accuracy": 0.9743589743589743,
1941
+ "count": 39
1942
  },
1943
  "UC": {
1944
+ "accuracy": 0.9732142857142857,
1945
+ "count": 560
1946
  },
1947
  "US": {
1948
+ "accuracy": 0.9814814814814815,
1949
+ "count": 54
1950
+ }
1951
+ }
1952
+ },
1953
+ "add_C1": {
1954
+ "full_accuracy": 0.96,
1955
+ "digit_accuracy": 0.9942857142857143,
1956
+ "n_examples": 50,
1957
+ "per_subtask": {
1958
+ "SA": {
1959
  "accuracy": 1.0,
1960
+ "count": 250
1961
+ },
1962
+ "SC": {
1963
+ "accuracy": 1.0,
1964
+ "count": 50
1965
+ },
1966
+ "UC": {
1967
+ "accuracy": 0.96,
1968
+ "count": 50
1969
+ }
1970
+ }
1971
+ },
1972
+ "add_C2": {
1973
+ "full_accuracy": 0.9,
1974
+ "digit_accuracy": 0.98,
1975
+ "n_examples": 50,
1976
+ "per_subtask": {
1977
+ "SA": {
1978
+ "accuracy": 1.0,
1979
+ "count": 200
1980
+ },
1981
+ "SC": {
1982
+ "accuracy": 1.0,
1983
+ "count": 50
1984
+ },
1985
+ "UC": {
1986
+ "accuracy": 0.963855421686747,
1987
+ "count": 83
1988
+ },
1989
+ "US": {
1990
+ "accuracy": 0.7647058823529411,
1991
+ "count": 17
1992
  }
1993
  }
1994
  },
1995
  "add_C3": {
1996
+ "full_accuracy": 0.58,
1997
+ "digit_accuracy": 0.9142857142857143,
1998
+ "n_examples": 50,
1999
  "per_subtask": {
2000
  "SA": {
2001
  "accuracy": 1.0,
2002
+ "count": 150
2003
  },
2004
  "SC": {
2005
  "accuracy": 1.0,
2006
+ "count": 50
2007
  },
2008
  "UC": {
2009
+ "accuracy": 0.85,
2010
+ "count": 100
2011
  },
2012
  "US": {
2013
+ "accuracy": 0.7,
2014
+ "count": 50
2015
  }
2016
  }
2017
  },
2018
  "add_C4": {
2019
+ "full_accuracy": 0.6,
2020
+ "digit_accuracy": 0.9285714285714286,
2021
+ "n_examples": 50,
2022
  "per_subtask": {
2023
  "SA": {
2024
  "accuracy": 1.0,
2025
+ "count": 100
2026
  },
2027
  "SC": {
2028
  "accuracy": 1.0,
2029
+ "count": 50
2030
  },
2031
  "UC": {
2032
+ "accuracy": 0.8712121212121212,
2033
+ "count": 132
2034
  },
2035
  "US": {
2036
+ "accuracy": 0.8823529411764706,
2037
+ "count": 68
2038
  }
2039
  }
2040
  },
2041
  "add_C5": {
2042
+ "full_accuracy": 0.62,
2043
+ "digit_accuracy": 0.9085714285714286,
2044
+ "n_examples": 50,
2045
  "per_subtask": {
2046
  "SA": {
2047
  "accuracy": 1.0,
2048
+ "count": 50
2049
  },
2050
  "SC": {
2051
  "accuracy": 1.0,
2052
+ "count": 50
2053
  },
2054
  "UC": {
2055
+ "accuracy": 0.8698630136986302,
2056
+ "count": 146
2057
  },
2058
  "US": {
2059
+ "accuracy": 0.875,
2060
+ "count": 104
2061
  }
2062
  }
2063
  },
2064
  "add_C6": {
2065
+ "full_accuracy": 0.62,
2066
+ "digit_accuracy": 0.9171428571428571,
2067
+ "n_examples": 50,
2068
  "per_subtask": {
2069
  "SC": {
2070
  "accuracy": 1.0,
2071
+ "count": 50
2072
  },
2073
  "UC": {
2074
+ "accuracy": 0.8994708994708994,
2075
+ "count": 189
2076
  },
2077
  "US": {
2078
+ "accuracy": 0.9099099099099099,
2079
+ "count": 111
2080
  }
2081
  }
2082
  },
2083
  "sub_M0": {
2084
+ "full_accuracy": 0.76,
2085
+ "digit_accuracy": 0.96,
2086
+ "n_examples": 50,
2087
  "per_subtask": {
2088
  "MD": {
2089
+ "accuracy": 0.9570957095709571,
2090
+ "count": 303
2091
  },
2092
  "ME": {
2093
+ "accuracy": 0.9787234042553191,
2094
+ "count": 47
2095
  }
2096
  }
2097
  },
2098
  "sub_M1": {
2099
  "full_accuracy": 0.82,
2100
+ "digit_accuracy": 0.9657142857142857,
2101
+ "n_examples": 50,
2102
  "per_subtask": {
2103
  "MD": {
2104
+ "accuracy": 0.9858156028368794,
2105
+ "count": 141
2106
  },
2107
  "MB": {
2108
+ "accuracy": 0.9861111111111112,
2109
+ "count": 72
2110
  },
2111
  "ME": {
2112
+ "accuracy": 0.8888888888888888,
2113
+ "count": 18
2114
  },
2115
  "UB": {
2116
+ "accuracy": 0.9411764705882353,
2117
+ "count": 119
2118
  }
2119
  }
2120
  },
2121
  "sub_M2": {
2122
+ "full_accuracy": 0.32,
2123
+ "digit_accuracy": 0.8828571428571429,
2124
+ "n_examples": 50,
2125
  "per_subtask": {
2126
  "MD": {
2127
+ "accuracy": 0.9464285714285714,
2128
+ "count": 112
2129
  },
2130
  "MB": {
2131
+ "accuracy": 0.9433962264150944,
2132
+ "count": 53
2133
  },
2134
  "ME": {
2135
+ "accuracy": 0.9787234042553191,
2136
+ "count": 47
2137
  },
2138
  "UB": {
2139
+ "accuracy": 0.6705882352941176,
2140
+ "count": 85
2141
  },
2142
  "UD": {
2143
+ "accuracy": 0.9433962264150944,
2144
+ "count": 53
2145
  }
2146
  }
2147
  },
2148
  "sub_M3": {
2149
  "full_accuracy": 0.22,
2150
+ "digit_accuracy": 0.7885714285714286,
2151
+ "n_examples": 50,
2152
  "per_subtask": {
2153
  "MD": {
2154
+ "accuracy": 0.9896907216494846,
2155
+ "count": 97
2156
  },
2157
  "MB": {
2158
+ "accuracy": 0.9607843137254902,
2159
+ "count": 51
2160
  },
2161
  "ME": {
2162
+ "accuracy": 0.8888888888888888,
2163
+ "count": 27
2164
  },
2165
  "UB": {
2166
+ "accuracy": 0.5405405405405406,
2167
+ "count": 74
2168
  },
2169
  "UD": {
2170
+ "accuracy": 0.6633663366336634,
2171
+ "count": 101
2172
  }
2173
  }
2174
  },
2175
  "sub_M4": {
2176
+ "full_accuracy": 0.08,
2177
+ "digit_accuracy": 0.6914285714285714,
2178
+ "n_examples": 50,
2179
  "per_subtask": {
2180
  "MD": {
2181
+ "accuracy": 1.0,
2182
+ "count": 100
2183
  },
2184
  "MB": {
2185
  "accuracy": 1.0,
2186
+ "count": 50
2187
  },
2188
  "UB": {
2189
+ "accuracy": 0.44,
2190
+ "count": 50
2191
  },
2192
  "UD": {
2193
+ "accuracy": 0.4666666666666667,
2194
+ "count": 150
2195
  }
2196
  }
2197
  },
2198
  "sub_M5": {
2199
+ "full_accuracy": 0.24,
2200
+ "digit_accuracy": 0.6028571428571429,
2201
+ "n_examples": 50,
2202
  "per_subtask": {
2203
  "MD": {
2204
  "accuracy": 1.0,
2205
+ "count": 50
2206
  },
2207
  "MB": {
2208
  "accuracy": 1.0,
2209
+ "count": 50
2210
  },
2211
  "UB": {
2212
+ "accuracy": 0.56,
2213
+ "count": 50
2214
  },
2215
  "UD": {
2216
+ "accuracy": 0.415,
2217
+ "count": 200
2218
  }
2219
  }
2220
  },
2221
  "sub_random": {
2222
+ "full_accuracy": 0.825,
2223
+ "digit_accuracy": 0.9735714285714285,
2224
  "n_examples": 200,
2225
  "per_subtask": {
2226
  "MD": {
2227
+ "accuracy": 0.9894736842105263,
2228
+ "count": 570
2229
  },
2230
  "MB": {
2231
+ "accuracy": 0.9819494584837545,
2232
+ "count": 277
2233
  },
2234
  "ME": {
2235
  "accuracy": 1.0,
2236
  "count": 53
2237
  },
2238
  "UB": {
2239
+ "accuracy": 0.9490445859872612,
2240
+ "count": 471
2241
  },
2242
  "UD": {
2243
+ "accuracy": 0.9310344827586207,
2244
+ "count": 29
2245
  }
2246
  }
2247
  },
2248
  "sub_B3": {
2249
+ "full_accuracy": 0.46,
2250
+ "digit_accuracy": 0.9,
2251
+ "n_examples": 50,
2252
  "per_subtask": {
2253
  "MD": {
2254
+ "accuracy": 0.9933333333333333,
2255
+ "count": 150
2256
  },
2257
  "MB": {
2258
+ "accuracy": 1.0,
2259
+ "count": 50
2260
  },
2261
  "UB": {
2262
+ "accuracy": 0.7722772277227723,
2263
+ "count": 101
2264
  },
2265
  "UD": {
2266
+ "accuracy": 0.7755102040816326,
2267
+ "count": 49
2268
  }
2269
  }
2270
  },
2271
  "sub_B4": {
2272
+ "full_accuracy": 0.32,
2273
+ "digit_accuracy": 0.8285714285714286,
2274
+ "n_examples": 50,
2275
  "per_subtask": {
2276
  "MD": {
2277
  "accuracy": 1.0,
2278
+ "count": 100
2279
  },
2280
  "MB": {
2281
+ "accuracy": 0.98,
2282
+ "count": 50
2283
  },
2284
  "UB": {
2285
+ "accuracy": 0.6859504132231405,
2286
+ "count": 121
2287
  },
2288
  "UD": {
2289
+ "accuracy": 0.7341772151898734,
2290
+ "count": 79
2291
  }
2292
  }
2293
  },
2294
  "sub_B5": {
2295
+ "full_accuracy": 0.18,
2296
+ "digit_accuracy": 0.7657142857142857,
2297
+ "n_examples": 50,
2298
  "per_subtask": {
2299
  "MD": {
2300
  "accuracy": 1.0,
2301
+ "count": 50
2302
  },
2303
  "MB": {
2304
  "accuracy": 1.0,
2305
+ "count": 50
2306
  },
2307
  "UB": {
2308
+ "accuracy": 0.6578947368421053,
2309
+ "count": 152
2310
  },
2311
  "UD": {
2312
+ "accuracy": 0.6938775510204082,
2313
+ "count": 98
2314
  }
2315
  }
2316
  }
2317
  },
2318
  "summary": {
2319
+ "overall_accuracy": 0.642,
2320
+ "digit_accuracy": 0.9039047619047619,
2321
+ "total_examples": 1500,
2322
+ "n_splits": 24
2323
  }
2324
  },
2325
  "sorl_overall_accuracy": 0.5895833333333333,